<a href="https://colab.research.google.com/github/BlackPhosphorus/MDP-trading-algo/blob/main/MDP_algo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install robin_stocks
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import robin_stocks as r # pip install robin_stocks
import time
import random
from collections import deque
import tensorflow as tf
import logging
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam



In [None]:
def get_market_data(ticker, period='1y', interval='1d'):
    try:
        data = yf.download(ticker, period=period, interval=interval)
        if data.empty:
            raise ValueError(f"No data found for ticker {ticker}")
        return data
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

In [None]:
def calculate_indicators(data):
    data['MA20'] = data['Close'].rolling(window=20).mean()
    data['MA50'] = data['Close'].rolling(window=50).mean()
    data['MA200'] = data['Close'].rolling(window=200).mean()
    data['RSI'] = 100 - (100 / (1 + data['Close'].pct_change().rolling(window=14).apply(lambda x: (x[x > 0].sum() / abs(x[x < 0].sum())))))
    data['MACD'] = data['Close'].ewm(span=12, adjust=False).mean() - data['Close'].ewm(span=26, adjust=False).mean()
    data['MACD_Signal'] = data['MACD'].ewm(span=9, adjust=False).mean()
    data['MACD_Hist'] = data['MACD'] - data['MACD_Signal']
    data['Bollinger_Up'] = data['Close'].rolling(window=20).mean() + (data['Close'].rolling(window=20).std() * 2)
    data['Bollinger_Down'] = data['Close'].rolling(window=20).mean() - (data['Close'].rolling(window=20).std() * 2)
    data['Volatility'] = data['Close'].rolling(window=20).std()
    data['Momentum'] = data['Close'] - data['Close'].shift(10)
    data['SMA'] = data['Close'].rolling(window=50).mean()
    data['EMA'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['Returns'] = data['Close'].pct_change()
    data['Cumulative_Returns'] = (1 + data['Returns']).cumprod()
    return data

In [None]:
def define_states(data):
    conditions = [
        (data['MA50'] > data['MA200']) & (data['RSI'] < 30) & (data['Close'] < data['Bollinger_Down']),
        (data['MA50'] > data['MA200']) & (data['RSI'] > 70) & (data['Close'] > data['Bollinger_Up']),
        (data['MA50'] < data['MA200']) & (data['RSI'] < 30) & (data['Close'] < data['Bollinger_Down']),
        (data['MA50'] < data['MA200']) & (data['RSI'] > 70) & (data['Close'] > data['Bollinger_Up']),
        (data['Volatility'] > data['Volatility'].mean() + data['Volatility'].std()) & (data['Momentum'] > 0),
        (data['Volatility'] > data['Volatility'].mean() + data['Volatility'].std()) & (data['Momentum'] < 0),
        (data['MACD'] > data['MACD_Signal']),
        (data['MACD'] < data['MACD_Signal']),
        (data['MA20'] > data['MA50']) & (data['MA50'] > data['MA200']),
        (data['MA20'] < data['MA50']) & (data['MA50'] < data['MA200']),
        (data['RSI'] < 30) & (data['MACD_Hist'] > 0),
        (data['RSI'] > 70) & (data['MACD_Hist'] < 0),
        (data['SMA'] > data['EMA']),
        (data['SMA'] < data['EMA'])
    ]
    choices = [
        'Bullish', 'Overbought', 'Bearish', 'Oversold', 'High_Vol_Momentum_Up',
        'High_Vol_Momentum_Down', 'MACD_Positive', 'MACD_Negative', 'Strong_Bullish',
        'Strong_Bearish', 'RSI_Bullish', 'RSI_Bearish', 'SMA_Above_EMA', 'SMA_Below_EMA'
    ]
    data['State'] = np.select(conditions, choices, default='Neutral')
    return data

In [None]:
# class QLearningTrader:
#     def __init__(self, states, actions, learning_rate=0.01, discount_factor=0.9, exploration_rate=1.0, exploration_decay=0.995):
#         self.q_table = pd.DataFrame(0.0, index=states, columns=actions)  # Initialize with float values
#         self.learning_rate = learning_rate
#         self.discount_factor = discount_factor
#         self.exploration_rate = exploration_rate
#         self.exploration_decay = exploration_decay
#         self.actions = actions

#     def choose_action(self, state):
#         if np.random.rand() < self.exploration_rate:
#             return random.choice(self.actions)
#         else:
#             return self.q_table.loc[state].idxmax()

#     def learn(self, state, action, reward, next_state):
#         predict = self.q_table.loc[state, action]
#         target = reward + self.discount_factor * self.q_table.loc[next_state].max()
#         self.q_table.loc[state, action] += self.learning_rate * (target - predict)
#         self.exploration_rate *= self.exploration_decay
## Outdated

In [None]:
class DQNTrader:
    def __init__(self, state_size, action_size, learning_rate=0.001, discount_factor=0.95, exploration_rate=1.0, exploration_decay=0.995, min_exploration_rate=0.01, batch_size=32, memory_size=2000):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.min_exploration_rate = min_exploration_rate
        self.batch_size = batch_size
        self.memory = deque(maxlen=memory_size)
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(optimizer=Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def choose_action(self, state):
        if np.random.rand() <= self.exploration_rate:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state, verbose=0)
        return np.argmax(q_values[0])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.discount_factor * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.exploration_rate > self.min_exploration_rate:
            self.exploration_rate *= self.exploration_decay

In [None]:
def get_reward(action, next_state, data):
    if action == 1:
        return data['Close'].pct_change().shift(-1).iloc[next_state]
    elif action == -1:
        return -data['Close'].pct_change().shift(-1).iloc[next_state]
    else:
        return 0

In [None]:
# def login_robinhood(username, password):
#     r.login(username, password)

# def get_portfolio():
#     return r.account.build_holdings()

# def place_order(action, symbol, quantity):
#     if action == 1:
#         r.orders.order_buy_market(symbol, quantity)
#     elif action == -1:
#         r.orders.order_sell_market(symbol, quantity)

In [None]:
# def monitor_market(ticker, username, password):
#     login_robinhood(username, password)
#     trader = QLearningTrader(states=['Bullish', 'Overbought', 'Bearish', 'Oversold', 'High_Vol_Momentum_Up', 'High_Vol_Momentum_Down', 'Neutral'], actions=[0, 1, -1])
#     while True:
#         data = get_market_data(ticker)
#         data = calculate_indicators(data)
#         data = define_states(data)
#         portfolio = get_portfolio()
#         holdings = portfolio.get(ticker, {})
#         quantity = int(holdings.get('quantity', 0))
#         latest_state = data['State'].iloc[-1]
#         action = trader.choose_action(latest_state)
#         next_state = data['State'].shift(-1).iloc[-1]
#         reward = get_reward(action, next_state, data)
#         trader.learn(latest_state, action, reward, next_state)
#         if action == 1:
#             quantity_to_trade = 1
#         elif action == -1 and quantity > 0:
#             quantity_to_trade = quantity  # SELL ALL
#         else:
#             quantity_to_trade = 0  # No action or insufficient holdings to sell
#         if quantity_to_trade > 0:
#             place_order(action, ticker, quantity_to_trade)
#         time.sleep(60)  # Check every minute

  ## OUTDATED, WILL UPDATE IF/WHEN REAL TIME TRADING STUFF IS A POSSIBLE OUTCOME

In [None]:
def get_sector_tickers(sector):
    sector_map = {
        'technology': ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'NVDA'],
        'healthcare': ['JNJ', 'PFE', 'MRK', 'ABBV', 'TMO'],
        'financials': ['JPM', 'BAC', 'WFC', 'C', 'GS'],
    }
    return sector_map.get(sector.lower(), [])

In [None]:
def backtest_strategy_for_sectors(sectors, train_model=True):
    combined_sector_returns = pd.DataFrame()

    for sector in sectors:
        tickers = get_sector_tickers(sector)
        sector_returns = pd.DataFrame()

        for ticker in tickers:
            data = get_market_data(ticker, period='5y', interval='1d')
            if data is None:
                continue
            data = calculate_indicators(data)
            data = define_states(data)

            feature_columns = data.columns.drop(['State'])
            state_size = len(feature_columns)
            action_size = 3
            trader = DQNTrader(state_size, action_size)

            if train_model:
                rewards = []
                for i in range(len(data) - 1):
                    state = data[feature_columns].iloc[i:i+1].values
                    next_state = data[feature_columns].iloc[i+1:i+2].values
                    action = trader.choose_action(state)
                    reward = get_reward(action, i + 1, data)
                    done = i == len(data) - 2
                    trader.remember(state, action, reward, next_state, done)
                    trader.replay()
                    rewards.append(reward)

                data['Strategy_Return'] = pd.Series(rewards, index=data.index[:len(rewards)])
                sector_returns[ticker] = data['Strategy_Return']
                trader.model.save(f'dqn_trader_model_{sector}_{ticker}.h5')
            else:
                trader.model = load_model(f'dqn_trader_model_{sector}_{ticker}.h5')

                rewards = []
                for i in range(len(data) - 1):
                    state = data[feature_columns].iloc[i:i+1].values
                    action = trader.choose_action(state)
                    reward = get_reward(action, i + 1, data)
                    rewards.append(reward)

                data['Strategy_Return'] = pd.Series(rewards, index=data.index[:len(rewards)])
                sector_returns[ticker] = data['Strategy_Return']

        if not sector_returns.empty:
            sector_returns['Sector_Strategy_Return'] = sector_returns.mean(axis=1)
            combined_sector_returns[sector] = sector_returns['Sector_Strategy_Return']

    if not combined_sector_returns.empty:
        combined_sector_returns['Overall_Strategy_Return'] = combined_sector_returns.mean(axis=1)
        combined_sector_returns['Cumulative_Overall_Strategy_Return'] = (1 + combined_sector_returns['Overall_Strategy_Return']).cumprod()

        plt.figure(figsize=(14, 7))
        for sector in sectors:
            if sector in combined_sector_returns:
                plt.plot((1 + combined_sector_returns[sector]).cumprod(), label=f'{sector} Strategy Return')
        plt.plot(combined_sector_returns['Cumulative_Overall_Strategy_Return'], label='Overall Strategy Return', linewidth=2, linestyle='--')
        plt.legend()
        plt.show()

        sharpe_ratio = np.mean(combined_sector_returns['Overall_Strategy_Return']) / np.std(combined_sector_returns['Overall_Strategy_Return']) * np.sqrt(252)
        max_drawdown = (combined_sector_returns['Cumulative_Overall_Strategy_Return'].cummax() - combined_sector_returns['Cumulative_Overall_Strategy_Return']).max()
        return sharpe_ratio, max_drawdown
    else:
        print("No valid sector returns were generated.")
        return None, None

sectors = ['technology', 'healthcare', 'financials']
sharpe_ratio, max_drawdown = backtest_strategy_for_sectors(sectors, train_model=True)
if sharpe_ratio is not None and max_drawdown is not None:
    print(f'Sharpe Ratio: {sharpe_ratio}, Max Drawdown: {max_drawdown}')
else:
    print("no results.")

sharpe_ratio, max_drawdown = backtest_strategy_for_sectors(sectors, train_model=False)
if sharpe_ratio is not None and max_drawdown is not None:
    print(f'Sharpe Ratio: {sharpe_ratio}, Max Drawdown: {max_drawdown}')
else:
    print("no results.")


[*********************100%%**********************]  1 of 1 completed
Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x7c0a5aeda560>
Traceback (most recent call last):
  File "/usr/lib/python3.10/weakref.py", line 370, in remove
    def remove(k, selfref=ref(self)):
KeyboardInterrupt: 


KeyboardInterrupt: 

In [None]:
# ticker = 'AAPL'
# username = 'your_username'
# password = 'your_password'
# monitor_market(ticker, username, password)