## Summary
In this notebook, I use technical indicators to predict the move in the price nof n-days ahead

## Imports

In [84]:
from IPython.display import clear_output, display
!pip install yfinance==0.1.70
clear_output()
import yfinance as yf

In [85]:
import warnings
warnings.filterwarnings('ignore')

import os
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.model_selection import train_test_split
from sklearn.metrics import *
from sklearn.linear_model import LogisticRegression as LR
from sklearn.tree import DecisionTreeClassifier as DT
from sklearn.ensemble import RandomForestClassifier as RF
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

import plotly.io as pio
import cufflinks as cf
import plotly.express as px

pd.options.display.max_columns=None
pd.options.display.max_rows=100
pd.set_option('display.precision', 2)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.options.plotting.backend = 'plotly'

pio.templates.default = "plotly"
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False
cf.go_offline()

In [86]:
tickers = [
'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'JNJ', 'XOM', 'V', 'WMT', 'NVDA', 'PG',
'LLY', 'CVX', 'MA', 'HD', 'PFE', 'ABBV', 'MRK', 'KO', 'PEP', 'AVGO', 'ORCL',
'TMO', 'AZN', 'CSCO', 'DHR', 'MCD', 'ABT', 'TMUS', 'ACN', 'NEE', 'VZ', 'TTE',
'LIN', 'DIS', 'PM', 'BMY', 'CMCSA', 'SCHW', 'UPS', 'TXN', 'RTX', 'COP'
]

## Transform data

In [87]:
def transform_data(df, ticker):
    df = df.copy()
    window = 0
    cols = df.columns
    for column in cols[:-1]:
        for i in range(1, window):
            df[f'{column}_t-{i}'] = df[column].shift(i)

    df = df.dropna(how='any')

    return df

## Data input

In [88]:
def read_data(tickers, trade_length, transform=True):
    train_dfs = []
    test_dfs = {}

    
    for ticker in tickers:
        path = f'/kaggle/input/us-stocks-price-prediction/Data/train/Technicals/{ticker}_technicals_train.csv'
        df = pd.read_csv(path, parse_dates=['Date'])#.drop('Date', axis=1)
        df.set_index('Date', inplace=True)
        df['label'] = df['close'].diff(trade_length).shift(-trade_length).map(lambda x: np.sign(x)).map({-1:0, 1:1})
        df.drop(['EP', 'rsi_oversold'], axis=1, inplace=True)

        try:
            df.drop(['rsi_overbought'], axis=1, inplace=True)
        except Exception as e:
            pass

        close = df.pop('close')

        if transform:
            df = transform_data(df, ticker)
        
        df['close'] = close
        train_size = int(len(df)*0.8)

        train_df = df[:train_size]
        test_df = df[train_size:]

        train_dfs.append(train_df)
        test_dfs[ticker] = test_df
    
    train_df = pd.concat(train_dfs)
    
    return train_df, test_dfs

## Model training

In [89]:
models_per_trade_length = {}

for trade_length in range(5, 31, 5):
    
    print(f'trade length = {trade_length}')

    df, test_dfs = read_data(tickers[:], trade_length)

    x = df.drop(['label', 'close'], axis=1)
    y = df[['label']]

    x.isna().sum().to_frame().sort_values(by=0, ascending=False).sum()

    ## Split data and prepare models and metrics

    shuffle = True
    train_size = int(len(x)*0.8)

    if shuffle:
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    else:
        x_train, x_test, y_train, y_test = x[:train_size], x[train_size:], y[:train_size], y[train_size:]

    lr = LR()
    dt = DT()
    rf = RF()
    svr = SVC(probability=True)
    knn = KNN(10)

    models = [dt, rf, knn]
    models_names = ['Descision Tree', 'Random Forest','KNN']
    metrics = ['accuracy_score', 'precision_score', 'recall_score', 'confusion_matrix']
    train_scores = {}
    test_scores = {}
    model_preds = {}
    test_df = y_test.copy()
    train_df = y_train.copy()

    for model, model_name in zip(models[:], models_names[:]):
        model.fit(x_train, y_train)

        test_df[model_name] = model.predict(x_test).flatten()
        train_df[model_name] = model.predict(x_train).flatten()

        train_scores[model_name] = []
        test_scores[model_name] = []

        for metric in metrics:
            train_scores[model_name].append(eval(metric)(train_df[model_name], y_train))
            test_scores[model_name].append(eval(metric)(test_df[model_name], y_test))
    
    test_df.sort_index(inplace=True)
    train_df.sort_index(inplace=True)

    test_scores = pd.DataFrame(test_scores, index = metrics)
    train_scores = pd.DataFrame(train_scores, index = metrics)
    
#     display(train_scores)
    display(test_scores)
    
    models_per_trade_length[trade_length] = models

trade length = 5


Unnamed: 0,Descision Tree,Random Forest,KNN
accuracy_score,0.641,0.728,0.640
precision_score,0.679,0.863,0.654
recall_score,0.681,0.713,0.690
confusion_matrix,"[[4331, 3004], [2966, 6344]]","[[4054, 1285], [3243, 8063]]","[[4550, 3239], [2747, 6109]]"


trade length = 10


Unnamed: 0,Descision Tree,Random Forest,KNN
accuracy_score,0.726,0.791,0.704
precision_score,0.764,0.913,0.729
recall_score,0.769,0.773,0.758
confusion_matrix,"[[4603, 2309], [2239, 7470]]","[[4221, 847], [2621, 8932]]","[[4568, 2647], [2274, 7132]]"


trade length = 15


Unnamed: 0,Descision Tree,Random Forest,KNN
accuracy_score,0.769,0.820,0.730
precision_score,0.804,0.939,0.763
recall_score,0.817,0.801,0.789
confusion_matrix,"[[4576, 1993], [1837, 8190]]","[[4043, 620], [2370, 9563]]","[[4337, 2413], [2076, 7770]]"


trade length = 20


Unnamed: 0,Descision Tree,Random Forest,KNN
accuracy_score,0.790,0.835,0.751
precision_score,0.828,0.954,0.789
recall_score,0.830,0.812,0.803
confusion_matrix,"[[4618, 1753], [1730, 8468]]","[[4086, 474], [2262, 9747]]","[[4375, 2160], [1973, 8061]]"


trade length = 25


Unnamed: 0,Descision Tree,Random Forest,KNN
accuracy_score,0.814,0.837,0.778
precision_score,0.861,0.968,0.819
recall_score,0.843,0.808,0.823
confusion_matrix,"[[4622, 1426], [1649, 8842]]","[[3907, 333], [2364, 9935]]","[[4463, 1858], [1808, 8410]]"


trade length = 30


Unnamed: 0,Descision Tree,Random Forest,KNN
accuracy_score,0.832,0.851,0.789
precision_score,0.869,0.969,0.828
recall_score,0.868,0.827,0.840
confusion_matrix,"[[4550, 1380], [1401, 9178]]","[[3811, 325], [2140, 10233]]","[[4285, 1819], [1666, 8739]]"


## Calculate Returns

***Buy if pred = 1 which means the price is predicted to up after 5 days <br>
After 5 days check if the buy signal is still valid <br>
if yes keep holding your position <br>
if no sell <br>
    ***

In [93]:
def calculate_returns(models, models_names, test_dfs, trade_length):
    
    returns_per_model = {}
    
    for model, model_name in zip(models, models_names):

        returns = {}

        for ticker, test_df in test_dfs.items():
            p = 10000 # $10000

            preds = model.predict(test_df.drop(['label', 'close'], axis=1))
            signals = test_df[['close', 'label']]
            signals['pred'] = preds
            signals = signals.replace(0, -1)
            signals.reset_index(inplace=True)

            signals['holding'] = np.zeros(len(signals)).astype(bool)
            signals['buy'] = np.zeros(len(signals)).astype(bool)
            signals['sell'] = np.zeros(len(signals)).astype(bool)

            sell = False
            buy = False
            holding = False
            counter = 1

            PnL = []

            for i, row in signals.iterrows():

                signals.holding[i] = holding 
                if counter % trade_length == 0 and counter != 0 and holding:

                    sell = True
                    buy = False
                    holding = False
                    counter = 1


                if holding:
                    counter+=1
                    continue

                if row.pred == 1:
                    buy = True
                    n_shares = p/row.close
                    holding = True

                signals.buy[i] = buy
                signals.sell[i] = sell

            for i, row in signals.iterrows():

                if row.buy and not row.holding:

                    buying_price = row.close
                    n_shares = np.floor(p/buying_price)

                if row.holding and not row.buy and row.sell:

                    selling_price = row.close
                    profit = (selling_price - buying_price) * n_shares
                    PnL.append(profit)

                    p = row.close * n_shares

                returns[ticker] = [sum(PnL), (signals.close.iloc[-1] - signals.close.iloc[0]) * 10000/signals.close.iloc[0]]

        returns = pd.DataFrame(returns).T
        returns.columns = ['predicted_returns', 'buy_and_hold_returns']
        returns = returns.astype(int)
        returns['difference'] = returns.predicted_returns - returns.buy_and_hold_returns
        returns_per_model[model_name] = returns
    
    returns_df = []
    for model_name in models_names:
        returns_df.append((returns_per_model[model_name].sum()/100/len(returns_per_model[model_name])).to_frame())

    returns_df = pd.concat(returns_df, axis=1).round(2)
    returns_df.columns = models_names
    
    return returns_df

In [94]:
try:
    ticker_obj = yf.Ticker('SPY')
    history = ticker_obj.history(start='2017-12-26', end='2019-12-20')
    spx_return = (history.Close.iloc[-1] / history.Close.iloc[0] - 1) * 100
except:
    pass

## Returns for each model for each trade length 

In [96]:
print(f'Returns of the SPX for the testing period = {round(spx_return, 2)}')

for trade_length, models_list in models_per_trade_length.items():
    print(f'trade_length = {trade_length}')
    display(calculate_returns(models_list, models_names, test_dfs, trade_length))

Returns of the SPX for the testing period = 24.09
trade_length = 5


Unnamed: 0,Descision Tree,Random Forest,KNN
predicted_returns,23.25,30.55,18.06
buy_and_hold_returns,31.76,31.76,31.76
difference,-8.51,-1.2,-13.7


trade_length = 10


Unnamed: 0,Descision Tree,Random Forest,KNN
predicted_returns,24.88,27.08,21.87
buy_and_hold_returns,31.76,31.76,31.76
difference,-6.88,-4.68,-9.88


trade_length = 15


Unnamed: 0,Descision Tree,Random Forest,KNN
predicted_returns,27.92,26.2,20.39
buy_and_hold_returns,31.76,31.76,31.76
difference,-3.84,-5.55,-11.37


trade_length = 20


Unnamed: 0,Descision Tree,Random Forest,KNN
predicted_returns,22.89,21.84,20.75
buy_and_hold_returns,31.76,31.76,31.76
difference,-8.86,-9.92,-11.0


trade_length = 25


Unnamed: 0,Descision Tree,Random Forest,KNN
predicted_returns,21.4,20.43,19.7
buy_and_hold_returns,31.76,31.76,31.76
difference,-10.35,-11.33,-12.06


trade_length = 30


Unnamed: 0,Descision Tree,Random Forest,KNN
predicted_returns,23.61,12.95,24.53
buy_and_hold_returns,31.76,31.76,31.76
difference,-8.15,-18.81,-7.22
