In [20]:
pip install yfinance

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [21]:
pip install tensorflow

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [22]:
import numpy as np
import pandas as pd
import requests
import warnings
warnings.filterwarnings('ignore')

from collections import deque
import random
from datetime import datetime, timedelta
import json

# Try to import optional dependencies
try:
    import yfinance as yf
    HAS_YFINANCE = True
except ImportError:
    HAS_YFINANCE = False
    print("yfinance not available. Install with: pip install yfinance")

try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    HAS_TENSORFLOW = True
except ImportError:
    HAS_TENSORFLOW = False
    print("tensorflow not available. Install with: pip install tensorflow")

try:
    import matplotlib.pyplot as plt
    HAS_MATPLOTLIB = True
except ImportError:
    HAS_MATPLOTLIB = False
    print("matplotlib not available. Install with: pip install matplotlib")

# Simple gym-like environment base classes (no external dependency)
class Space:
    """Base class for observation and action spaces"""
    pass

class Discrete(Space):
    """Discrete action space"""
    def __init__(self, n):
        self.n = n
    
    def sample(self):
        return np.random.randint(0, self.n)

class Box(Space):
    """Continuous space with bounds"""
    def __init__(self, low, high, shape, dtype=np.float32):
        self.low = low
        self.high = high
        self.shape = shape
        self.dtype = dtype
    
    def sample(self):
        return np.random.uniform(self.low, self.high, self.shape).astype(self.dtype)

# ===============================
# Financial Data API Integration
# ===============================

class FinancialDataProvider:
    """Unified interface for financial data from multiple sources"""
    
    def __init__(self, alpha_vantage_key=None):
        self.alpha_vantage_key = alpha_vantage_key
    
    def get_yahoo_data(self, symbol, period="1y", interval="1d"):
        """Get data from Yahoo Finance"""
        if not HAS_YFINANCE:
            print("yfinance not installed. Using sample data.")
            return self._generate_sample_data()
        
        try:
            ticker = yf.Ticker(symbol)
            data = ticker.history(period=period, interval=interval)
            return data
        except Exception as e:
            print(f"Error fetching Yahoo data: {e}")
            print("Using sample data instead.")
            return self._generate_sample_data()
    
    def _generate_sample_data(self):
        """Generate sample stock data for testing"""
        np.random.seed(42)
        dates = pd.date_range(start='2022-01-01', end='2024-01-01', freq='D')
        n_days = len(dates)
        
        # Generate realistic stock price data using geometric Brownian motion
        initial_price = 100
        drift = 0.0005  # Daily drift
        volatility = 0.02  # Daily volatility
        
        returns = np.random.normal(drift, volatility, n_days)
        prices = [initial_price]
        
        for i in range(1, n_days):
            price = prices[-1] * (1 + returns[i])
            prices.append(price)
        
        # Create OHLCV data
        data = pd.DataFrame(index=dates)
        data['Close'] = prices
        data['Open'] = data['Close'].shift(1) * (1 + np.random.normal(0, 0.005, n_days))
        data['High'] = data[['Open', 'Close']].max(axis=1) * (1 + np.abs(np.random.normal(0, 0.01, n_days)))
        data['Low'] = data[['Open', 'Close']].min(axis=1) * (1 - np.abs(np.random.normal(0, 0.01, n_days)))
        data['Volume'] = np.random.lognormal(10, 1, n_days)
        
        data = data.dropna()
        return data
    
    def get_alpha_vantage_data(self, symbol, function="TIME_SERIES_DAILY"):
        """Get data from Alpha Vantage API"""
        if not self.alpha_vantage_key:
            print("Alpha Vantage API key not provided")
            return None
        
        url = f'https://www.alphavantage.co/query'
        params = {
            'function': function,
            'symbol': symbol,
            'apikey': self.alpha_vantage_key,
            'outputsize': 'full'
        }
        
        try:
            response = requests.get(url, params=params)
            data = response.json()
            
            if "Time Series (Daily)" in data:
                df = pd.DataFrame(data["Time Series (Daily)"]).T
                df.index = pd.to_datetime(df.index)
                df = df.astype(float)
                df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
                return df
            else:
                print("Error in API response:", data.get('Error Message', 'Unknown error'))
                return None
        except Exception as e:
            print(f"Error fetching Alpha Vantage data: {e}")
            return None
    
    def get_crypto_data(self, symbol, vs_currency="usd", days=365):
        """Get cryptocurrency data from CoinGecko API"""
        url = f"https://api.coingecko.com/api/v3/coins/{symbol}/market_chart"
        params = {
            'vs_currency': vs_currency,
            'days': days,
            'interval': 'daily'
        }
        
        try:
            response = requests.get(url, params=params)
            data = response.json()
            
            prices = data['prices']
            volumes = data['total_volumes']
            
            df = pd.DataFrame(prices, columns=['timestamp', 'price'])
            df['volume'] = [vol[1] for vol in volumes]
            df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
            df.set_index('timestamp', inplace=True)
            
            return df
        except Exception as e:
            print(f"Error fetching crypto data: {e}")
            return None

# ===============================
# Technical Indicators
# ===============================

class TechnicalIndicators:
    """Calculate various technical indicators"""
    
    @staticmethod
    def sma(data, window):
        """Simple Moving Average"""
        return data.rolling(window=window).mean()
    
    @staticmethod
    def ema(data, window):
        """Exponential Moving Average"""
        return data.ewm(span=window).mean()
    
    @staticmethod
    def rsi(data, window=14):
        """Relative Strength Index"""
        delta = data.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
        rs = gain / loss
        rsi = 100 - (100 / (1 + rs))
        return rsi
    
    @staticmethod
    def bollinger_bands(data, window=20, num_std=2):
        """Bollinger Bands"""
        sma = data.rolling(window=window).mean()
        std = data.rolling(window=window).std()
        upper_band = sma + (std * num_std)
        lower_band = sma - (std * num_std)
        return upper_band, sma, lower_band
    
    @staticmethod
    def macd(data, fast=12, slow=26, signal=9):
        """MACD Indicator"""
        exp1 = data.ewm(span=fast).mean()
        exp2 = data.ewm(span=slow).mean()
        macd_line = exp1 - exp2
        signal_line = macd_line.ewm(span=signal).mean()
        histogram = macd_line - signal_line
        return macd_line, signal_line, histogram

# ===============================
# Custom Trading Environment
# ===============================

class TradingEnvironment:
    """Custom trading environment for RL agents (no external gym dependency)"""
    
    def __init__(self, data, initial_balance=10000, transaction_cost=0.001, 
                 max_position=1.0, lookback_window=20):
        
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        self.transaction_cost = transaction_cost
        self.max_position = max_position
        self.lookback_window = lookback_window
        
        # Action space: 0=Hold, 1=Buy, 2=Sell
        self.action_space = Discrete(3)
        
        # Observation space: price data + technical indicators + portfolio state
        self.observation_space = Box(
            low=-np.inf, high=np.inf, 
            shape=(self.lookback_window * 8 + 3,), 
            dtype=np.float32
        )
        
        self.reset()
    
    def reset(self, seed=None):
        """Reset the environment"""
        if seed is not None:
            np.random.seed(seed)
            random.seed(seed)
        
        self.current_step = self.lookback_window
        self.balance = self.initial_balance
        self.position = 0.0  # -1 to 1 (short to long)
        self.total_profit = 0.0
        self.trades = []
        self.portfolio_values = []
        
        return self._get_observation(), {}
    
    def _get_observation(self):
        """Get current observation state"""
        # Get lookback window of data
        start_idx = max(0, self.current_step - self.lookback_window)
        end_idx = self.current_step
        
        window_data = self.data.iloc[start_idx:end_idx]
        
        # Price features (normalized)
        prices = window_data['Close'].values
        price_returns = np.diff(np.log(prices + 1e-8))
        price_returns = np.pad(price_returns, (1, 0), 'constant', constant_values=0)
        
        volumes = window_data['Volume'].values
        volume_ma = np.mean(volumes)
        volume_norm = volumes / (volume_ma + 1e-8)
        
        # Technical indicators
        ti = TechnicalIndicators()
        
        # Ensure we have enough data for indicators
        if len(window_data) >= 20:
            rsi = ti.rsi(window_data['Close'], 14).fillna(50).values
            macd, signal, _ = ti.macd(window_data['Close'])
            macd = macd.fillna(0).values
            signal = signal.fillna(0).values
            
            sma_20 = ti.sma(window_data['Close'], 20).fillna(method='ffill').fillna(window_data['Close'].iloc[0]).values
            price_to_sma = prices / (sma_20 + 1e-8)
            
            upper_bb, middle_bb, lower_bb = ti.bollinger_bands(window_data['Close'], 20)
            # Fix the fillna issue by using scalar values
            upper_bb_filled = upper_bb.fillna(method='ffill').fillna(window_data['Close'].iloc[-1])
            lower_bb_filled = lower_bb.fillna(method='ffill').fillna(window_data['Close'].iloc[-1])
            
            # Convert to numpy arrays for calculation
            upper_bb_vals = upper_bb_filled.values
            lower_bb_vals = lower_bb_filled.values
            
            bb_position = (prices - lower_bb_vals) / (upper_bb_vals - lower_bb_vals + 1e-8)
            # Handle any remaining NaN values
            bb_position = np.nan_to_num(bb_position, nan=0.5)
        else:
            # Fill with neutral values if not enough data
            rsi = np.full(len(window_data), 50)
            macd = np.zeros(len(window_data))
            signal = np.zeros(len(window_data))
            price_to_sma = np.ones(len(window_data))
            bb_position = np.full(len(window_data), 0.5)
        
        # Pad arrays to consistent length
        target_length = self.lookback_window
        arrays_to_pad = [price_returns, volume_norm, rsi/100, macd, signal, 
                        price_to_sma, bb_position, prices/np.mean(prices)]
        
        padded_arrays = []
        for arr in arrays_to_pad:
            if len(arr) < target_length:
                padded = np.pad(arr, (target_length - len(arr), 0), 'constant', 
                               constant_values=arr[0] if len(arr) > 0 else 0)
            else:
                padded = arr[-target_length:]
            padded_arrays.append(padded)
        
        # Flatten all features
        features = np.concatenate(padded_arrays)
        
        # Portfolio state
        portfolio_state = np.array([
            self.position,
            self.balance / self.initial_balance,
            self.total_profit / self.initial_balance
        ])
        
        observation = np.concatenate([features, portfolio_state]).astype(np.float32)
        return observation
    
    def step(self, action):
        """Execute one step in the environment"""
        current_price = self.data.iloc[self.current_step]['Close']
        
        # Execute action
        reward = 0
        old_position = self.position
        
        if action == 1:  # Buy
            if self.position < self.max_position:
                trade_amount = min(0.1, self.max_position - self.position)
                cost = trade_amount * current_price * (1 + self.transaction_cost)
                if cost <= self.balance:
                    self.position += trade_amount
                    self.balance -= cost
                    self.trades.append(('BUY', self.current_step, current_price, trade_amount))
        
        elif action == 2:  # Sell
            if self.position > -self.max_position:
                trade_amount = min(0.1, self.position + self.max_position)
                if trade_amount > 0:
                    proceeds = trade_amount * current_price * (1 - self.transaction_cost)
                    self.position -= trade_amount
                    self.balance += proceeds
                    self.trades.append(('SELL', self.current_step, current_price, trade_amount))
        
        # Calculate portfolio value and reward
        portfolio_value = self.balance + self.position * current_price
        self.portfolio_values.append(portfolio_value)
        
        # Reward calculation
        if len(self.portfolio_values) > 1:
            portfolio_return = (portfolio_value - self.portfolio_values[-2]) / self.portfolio_values[-2]
            market_return = 0
            if self.current_step > 0:
                prev_price = self.data.iloc[self.current_step - 1]['Close']
                market_return = (current_price - prev_price) / prev_price
            
            # Reward based on portfolio performance vs market
            reward = portfolio_return - market_return
            
            # Penalty for excessive trading
            if action != 0:  # If not holding
                reward -= 0.001
        
        self.current_step += 1
        self.total_profit = portfolio_value - self.initial_balance
        
        # Check if episode is done
        done = self.current_step >= len(self.data) - 1
        
        info = {
            'portfolio_value': portfolio_value,
            'position': self.position,
            'balance': self.balance,
            'total_return': (portfolio_value - self.initial_balance) / self.initial_balance
        }
        
        return self._get_observation(), reward, done, False, info
    
    def render(self):
        """Render the environment (optional)"""
        if len(self.portfolio_values) > 0:
            print(f"Step: {self.current_step}, Portfolio Value: {self.portfolio_values[-1]:.2f}, "
                  f"Position: {self.position:.2f}, Balance: {self.balance:.2f}")

# ===============================
# Deep Q-Network Agent
# ===============================

class DQNAgent:
    """Deep Q-Network agent for trading (with fallback to simple Q-learning)"""
    
    def __init__(self, state_size, action_size, learning_rate=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.memory = deque(maxlen=2000)
        self.batch_size = 32
        
        # Initialize use_neural_network first
        self.use_neural_network = HAS_TENSORFLOW
        
        if HAS_TENSORFLOW:
            self.q_network = self._build_model()
            self.target_network = self._build_model()
            self.update_target_network()
        else:
            # Fallback to simple Q-table with state discretization
            print("TensorFlow not available. Using simplified Q-learning with state discretization.")
            self.q_table = {}
            self.q_network = None
            self.target_network = None
    
    def _build_model(self):
        """Build the neural network model"""
        if not HAS_TENSORFLOW:
            return None
            
        model = keras.Sequential([
            layers.Dense(128, input_shape=(self.state_size,), activation='relu'),
            layers.Dropout(0.2),
            layers.Dense(64, activation='relu'),
            layers.Dropout(0.2),
            layers.Dense(32, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=self.learning_rate),
                     loss='mse')
        return model
    
    def _discretize_state(self, state):
        """Discretize continuous state for Q-table"""
        # Simple discretization - bin key features
        price_change = int(state[0] * 100) // 5  # Group price changes into bins
        rsi = int(state[self.state_size//4] * 20)  # RSI discretized
        position = int(state[-3] * 10)  # Position discretized
        
        return (price_change, rsi, position)
    
    def remember(self, state, action, reward, next_state, done):
        """Store experience in replay buffer"""
        if self.use_neural_network:
            self.memory.append((state, action, reward, next_state, done))
        else:
            # For Q-table, update immediately
            self._update_q_table(state, action, reward, next_state, done)
    
    def _update_q_table(self, state, action, reward, next_state, done):
        """Update Q-table (fallback method)"""
        state_key = self._discretize_state(state)
        next_state_key = self._discretize_state(next_state)
        
        if state_key not in self.q_table:
            self.q_table[state_key] = np.zeros(self.action_size)
        
        if next_state_key not in self.q_table:
            self.q_table[next_state_key] = np.zeros(self.action_size)
        
        # Q-learning update
        if done:
            target = reward
        else:
            target = reward + 0.95 * np.max(self.q_table[next_state_key])
        
        self.q_table[state_key][action] += self.learning_rate * (target - self.q_table[state_key][action])
    
    def act(self, state):
        """Choose action using epsilon-greedy policy"""
        if np.random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        
        if self.use_neural_network:
            q_values = self.q_network.predict(state.reshape(1, -1), verbose=0)
            return np.argmax(q_values[0])
        else:
            # Use Q-table
            state_key = self._discretize_state(state)
            if state_key in self.q_table:
                return np.argmax(self.q_table[state_key])
            else:
                return random.randrange(self.action_size)
    
    def replay(self):
        """Train the model on a batch of experiences"""
        if not self.use_neural_network:
            # Q-table updates happen immediately in remember()
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            return
            
        if len(self.memory) < self.batch_size:
            return
        
        batch = random.sample(self.memory, self.batch_size)
        states = np.array([e[0] for e in batch])
        actions = np.array([e[1] for e in batch])
        rewards = np.array([e[2] for e in batch])
        next_states = np.array([e[3] for e in batch])
        dones = np.array([e[4] for e in batch])
        
        current_q_values = self.q_network.predict(states, verbose=0)
        next_q_values = self.target_network.predict(next_states, verbose=0)
        
        for i in range(self.batch_size):
            if dones[i]:
                current_q_values[i][actions[i]] = rewards[i]
            else:
                current_q_values[i][actions[i]] = rewards[i] + 0.95 * np.max(next_q_values[i])
        
        self.q_network.fit(states, current_q_values, epochs=1, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def update_target_network(self):
        """Update target network weights"""
        if self.use_neural_network and self.target_network is not None:
            self.target_network.set_weights(self.q_network.get_weights())

# ===============================
# Training and Evaluation
# ===============================

class TradingAgentTrainer:
    """Trainer for the trading agent"""
    
    def __init__(self, symbol="AAPL", data_provider=None):
        self.symbol = symbol
        self.data_provider = data_provider or FinancialDataProvider()
        
    def prepare_data(self, train_split=0.8):
        """Prepare training and testing data"""
        # Try Yahoo Finance first
        data = self.data_provider.get_yahoo_data(self.symbol, period="2y", interval="1d")
        
        if data is None or data.empty:
            print(f"Could not fetch data for {self.symbol}")
            return None, None
        
        # Clean data
        data = data.dropna()
        data.reset_index(drop=True, inplace=True)
        
        # Split data
        split_index = int(len(data) * train_split)
        train_data = data.iloc[:split_index].copy()
        test_data = data.iloc[split_index:].copy()
        
        return train_data, test_data
    
    def train_agent(self, episodes=100):
        """Train the DQN agent"""
        train_data, test_data = self.prepare_data()
        
        if train_data is None:
            return None, None
        
        # Create environment
        env = TradingEnvironment(train_data)
        state_size = env.observation_space.shape[0]
        action_size = env.action_space.n
        
        # Create agent
        agent = DQNAgent(state_size, action_size)
        
        # Training loop
        scores = []
        portfolio_values = []
        
        for episode in range(episodes):
            state, _ = env.reset()
            total_reward = 0
            
            while True:
                action = agent.act(state)
                next_state, reward, done, truncated, info = env.step(action)
                agent.remember(state, action, reward, next_state, done)
                
                state = next_state
                total_reward += reward
                
                if done:
                    break
            
            agent.replay()
            if episode % 10 == 0:
                agent.update_target_network()
            
            scores.append(total_reward)
            portfolio_values.append(info['portfolio_value'])
            
            if episode % 20 == 0:
                print(f"Episode {episode}/{episodes}, Score: {total_reward:.2f}, "
                      f"Portfolio Value: {info['portfolio_value']:.2f}, "
                      f"Epsilon: {agent.epsilon:.2f}")
        
        return agent, env
    
    def evaluate_agent(self, agent, test_data):
        """Evaluate the trained agent"""
        test_env = TradingEnvironment(test_data)
        state, _ = test_env.reset()
        
        total_reward = 0
        portfolio_values = []
        actions_taken = []
        
        while True:
            action = agent.act(state)
            state, reward, done, truncated, info = test_env.step(action)
            
            total_reward += reward
            portfolio_values.append(info['portfolio_value'])
            actions_taken.append(action)
            
            if done:
                break
        
        # Calculate metrics
        final_value = portfolio_values[-1]
        total_return = (final_value - test_env.initial_balance) / test_env.initial_balance
        
        # Calculate market return for comparison
        start_price = test_data.iloc[0]['Close']
        end_price = test_data.iloc[-1]['Close']
        market_return = (end_price - start_price) / start_price
        
        print("\n=== Evaluation Results ===")
        print(f"Initial Balance: ${test_env.initial_balance:,.2f}")
        print(f"Final Portfolio Value: ${final_value:,.2f}")
        print(f"Total Return: {total_return:.2%}")
        print(f"Market Return: {market_return:.2%}")
        print(f"Alpha (Excess Return): {(total_return - market_return):.2%}")
        print(f"Number of Trades: {len(test_env.trades)}")
        
        return {
            'portfolio_values': portfolio_values,
            'actions': actions_taken,
            'total_return': total_return,
            'market_return': market_return,
            'final_value': final_value,
            'trades': test_env.trades
        }

# ===============================
# Example Usage
# ===============================

def main():
    """Main function to demonstrate the trading agent"""
    
    print("=== AI Trading Agent with Reinforcement Learning ===")
    print("\nChecking dependencies...")
    print(f"NumPy/Pandas: Available")
    print(f"yfinance: {'Available' if HAS_YFINANCE else 'Not Available (will use sample data)'}")
    print(f"TensorFlow: {'Available' if HAS_TENSORFLOW else 'Not Available (will use Q-table)'}")
    print(f"Matplotlib: {'Available' if HAS_MATPLOTLIB else 'Not Available (no plots)'}")
    
    # Initialize trainer
    trainer = TradingAgentTrainer(symbol="AAPL")
    
    print(f"\nTraining AI Trading Agent for {trainer.symbol}...")
    agent, train_env = trainer.train_agent(episodes=50 if HAS_TENSORFLOW else 100)
    
    if agent is None:
        print("Training failed - could not fetch or generate data")
        return
    
    # Get test data for evaluation
    _, test_data = trainer.prepare_data()
    
    print("\nEvaluating trained agent...")
    results = trainer.evaluate_agent(agent, test_data)
    
    # Simple text-based visualization if no matplotlib
    if not HAS_MATPLOTLIB:
        print("\n=== Portfolio Performance ===")
        portfolio_values = results['portfolio_values']
        print(f"Starting Value: ${portfolio_values[0]:,.2f}")
        print(f"Ending Value: ${portfolio_values[-1]:,.2f}")
        print(f"Peak Value: ${max(portfolio_values):,.2f}")
        print(f"Lowest Value: ${min(portfolio_values):,.2f}")
        
        # Show some key trading points
        actions = results['actions']
        buy_count = sum(1 for a in actions if a == 1)
        sell_count = sum(1 for a in actions if a == 2)
        hold_count = sum(1 for a in actions if a == 0)
        
        print(f"\n=== Trading Activity ===")
        print(f"Buy signals: {buy_count}")
        print(f"Sell signals: {sell_count}")
        print(f"Hold periods: {hold_count}")
    
    print("\nTraining completed successfully!")
    print("\nTo install optional dependencies:")
    print("pip install yfinance tensorflow matplotlib")

if __name__ == "__main__":
    main()

=== AI Trading Agent with Reinforcement Learning ===

Checking dependencies...
NumPy/Pandas: Available
yfinance: Available
TensorFlow: Available
Matplotlib: Available

Training AI Trading Agent for AAPL...
Episode 0/50, Score: -0.64, Portfolio Value: 9983.71, Epsilon: 0.99
Episode 20/50, Score: -0.60, Portfolio Value: 9984.81, Epsilon: 0.90
Episode 40/50, Score: -0.66, Portfolio Value: 9949.01, Epsilon: 0.81

Evaluating trained agent...

=== Evaluation Results ===
Initial Balance: $10,000.00
Final Portfolio Value: $9,985.25
Total Return: -0.15%
Market Return: -14.42%
Alpha (Excess Return): 14.28%
Number of Trades: 38

Training completed successfully!

To install optional dependencies:
pip install yfinance tensorflow matplotlib
