# Importing Libraries

In [1]:
import yfinance as yf
import pandas as pd
import talib as ta
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Getting the dataset and Preprocessing it

In [2]:
def load_stock_data(ticker, start_date, end_date):
    df = yf.download(ticker, start=start_date, end=end_date)
    df.dropna(inplace=True)    
    return df

In [3]:
dfg = load_stock_data('GOOGL', '2000-01-01', '2023-03-26')

[*********************100%***********************]  1 of 1 completed


In [4]:
dfg

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-08-19,2.502503,2.604104,2.401401,2.511011,2.511011,893181924
2004-08-20,2.527778,2.729730,2.515015,2.710460,2.710460,456686856
2004-08-23,2.771522,2.839840,2.728979,2.737738,2.737738,365122512
2004-08-24,2.783784,2.792793,2.591842,2.624374,2.624374,304946748
2004-08-25,2.626627,2.702703,2.599600,2.652653,2.652653,183772044
...,...,...,...,...,...,...
2023-03-20,100.120003,101.750000,99.870003,101.220001,101.220001,32960400
2023-03-21,101.250000,105.099998,101.220001,104.919998,104.919998,42110300
2023-03-22,104.269997,106.589996,103.330002,103.370003,103.370003,43427400
2023-03-23,105.059998,106.300003,104.459999,105.599998,105.599998,40797800


In [5]:
def data_preprocess(df):
    df['RSI'] = ta.RSI(df['Close'], timeperiod=14)    
    df['EMA50'] = ta.EMA(df['Close'], timeperiod=50)
    df['EMA200'] = ta.EMA(df['Close'], timeperiod=200)
    df['EMA13'] = ta.EMA(df['Close'], timeperiod=13)
    df['EMA26'] = ta.EMA(df['Close'], timeperiod=26)
    df['MACD'], _, _ = ta.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
    df['Signal'] = ''
    df.loc[(df['RSI'] > 60) | (df['EMA200'] > df['EMA50']) | (df['EMA26'] > df['EMA13']) | (df['MACD'] < 0), 'Signal'] = 'SELL'
    df.loc[(df['RSI'] > 30) & (df['RSI'] < 60) & (df['EMA50'] > df['EMA200']) & (df['EMA26'] < df['EMA13']) & (df['MACD'] > 0), 'Signal'] = 'BUY'
    
    df.dropna(inplace=True)    
    return df

In [6]:
dfg_pre = data_preprocess(dfg)

In [7]:
dfg_pre

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,EMA50,EMA200,EMA13,EMA26,MACD,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2005-06-03,7.176927,7.239740,6.942192,7.013514,7.013514,750540708,79.570203,5.738345,4.490811,6.599478,6.188008,0.451730,SELL
2005-06-06,7.066817,7.351101,7.052803,7.280781,7.280781,900134964,82.771387,5.798833,4.518572,6.696807,6.268955,0.469406,SELL
2005-06-07,7.434935,7.497247,7.264765,7.335335,7.335335,971947080,83.345049,5.859088,4.546599,6.788025,6.347946,0.482257,SELL
2005-06-08,7.328579,7.337087,6.956957,6.995996,6.995996,1027007964,68.145385,5.903673,4.570971,6.817735,6.395949,0.459760,SELL
2005-06-09,7.125125,7.219720,7.021021,7.164915,7.164915,656986356,70.982305,5.953133,4.596782,6.867332,6.452910,0.450369,SELL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-20,100.120003,101.750000,99.870003,101.220001,101.220001,32960400,63.879496,94.949889,102.049693,96.150838,95.193775,1.140843,SELL
2023-03-21,101.250000,105.099998,101.220001,104.919998,104.919998,42110300,69.118412,95.340873,102.078254,97.403575,95.914236,1.741210,SELL
2023-03-22,104.269997,106.589996,103.330002,103.370003,103.370003,43427400,64.873500,95.655741,102.091107,98.255922,96.466515,2.068093,SELL
2023-03-23,105.059998,106.300003,104.459999,105.599998,105.599998,40797800,67.925555,96.045712,102.126021,99.305076,97.143069,2.478522,SELL


In [8]:
dfam = load_stock_data('AMZN', '2000-01-01', '2023-03-26')

[*********************100%***********************]  1 of 1 completed


In [9]:
dfam

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,4.075000,4.478125,3.952344,4.468750,4.468750,322352000
2000-01-04,4.268750,4.575000,4.087500,4.096875,4.096875,349748000
2000-01-05,3.525000,3.756250,3.400000,3.487500,3.487500,769148000
2000-01-06,3.565625,3.634375,3.200000,3.278125,3.278125,375040000
2000-01-07,3.350000,3.525000,3.309375,3.478125,3.478125,210108000
...,...,...,...,...,...,...
2023-03-20,98.410004,98.480003,95.699997,97.709999,97.709999,62388900
2023-03-21,98.139999,100.849998,98.000000,100.610001,100.610001,58597300
2023-03-22,100.449997,102.099998,98.610001,98.699997,98.699997,57475400
2023-03-23,100.430000,101.059998,97.620003,98.709999,98.709999,57559300


In [10]:
dfam_pre = data_preprocess(dfam)

In [11]:
dfam_pre

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,EMA50,EMA200,EMA13,EMA26,MACD,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000-10-16,1.406250,1.415625,1.206250,1.215625,1.215625,142954000,29.847250,1.837001,2.583973,1.553552,1.716049,-0.182109,SELL
2000-10-17,1.243750,1.259375,1.043750,1.096875,1.096875,302810000,27.277272,1.807977,2.569176,1.488312,1.670184,-0.203485,SELL
2000-10-18,0.993750,1.268750,0.968750,1.256250,1.256250,375774000,35.325979,1.786340,2.556112,1.455161,1.639523,-0.205200,SELL
2000-10-19,1.328125,1.406250,1.209375,1.387500,1.387500,217498000,41.106743,1.770700,2.544484,1.445495,1.620854,-0.193735,SELL
2000-10-20,1.384375,1.562500,1.356250,1.540625,1.540625,232438000,47.052805,1.761677,2.534495,1.459085,1.614911,-0.170330,SELL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-20,98.410004,98.480003,95.699997,97.709999,97.709999,62388900,54.506122,95.994143,108.698671,95.870113,95.782249,0.151208,SELL
2023-03-21,98.139999,100.849998,98.000000,100.610001,100.610001,58597300,59.944940,96.175157,108.618187,96.547240,96.139861,0.513065,SELL
2023-03-22,100.449997,102.099998,98.610001,98.699997,98.699997,57475400,55.259213,96.274171,108.519498,96.854777,96.329500,0.638359,SELL
2023-03-23,100.430000,101.059998,97.620003,98.709999,98.709999,57559300,55.278928,96.369693,108.421892,97.119808,96.505834,0.730048,SELL


In [12]:
dfm = load_stock_data('MSFT', '2000-01-01', '2023-03-26')

[*********************100%***********************]  1 of 1 completed


In [13]:
dfm

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,58.687500,59.312500,56.000000,58.281250,36.361572,53228400
2000-01-04,56.781250,58.562500,56.125000,56.312500,35.133266,54119000
2000-01-05,55.562500,58.187500,54.687500,56.906250,35.503716,64059600
2000-01-06,56.093750,56.937500,54.187500,55.000000,34.314400,54976600
2000-01-07,54.312500,56.125000,53.656250,55.718750,34.762836,62013600
...,...,...,...,...,...,...
2023-03-20,276.980011,277.480011,269.850006,272.230011,272.230011,43466600
2023-03-21,274.880005,275.000000,269.519989,273.779999,273.779999,34558700
2023-03-22,273.399994,281.040009,272.179993,272.290009,272.290009,34873300
2023-03-23,277.940002,281.059998,275.200012,277.660004,277.660004,36610900


In [14]:
dfm_pre = data_preprocess(dfm)

In [15]:
dfm_pre

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,EMA50,EMA200,EMA13,EMA26,MACD,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000-10-16,26.750000,26.906250,24.781250,25.187500,15.714438,119759000,19.789613,31.728628,40.175312,27.943053,29.678459,-1.891401,SELL
2000-10-17,25.937500,26.218750,25.125000,25.218750,15.733933,81276600,20.117376,31.473339,40.026491,27.553867,29.348111,-1.956176,SELL
2000-10-18,24.812500,26.625000,24.218750,25.875000,16.143370,110536400,26.875079,31.253796,39.885680,27.314029,29.090843,-1.932283,SELL
2000-10-19,29.218750,31.093750,29.000000,30.937500,19.301855,256993200,57.055886,31.241392,39.796644,27.831668,29.227633,-1.487697,SELL
2000-10-20,30.656250,33.062500,30.562500,32.593750,20.335186,160378600,62.507832,31.294426,39.724973,28.511965,29.476975,-0.990299,SELL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-20,276.980011,277.480011,269.850006,272.230011,272.230011,43466600,61.893345,254.671187,255.848215,263.232717,259.001024,4.772832,SELL
2023-03-21,274.880005,275.000000,269.519989,273.779999,273.779999,34558700,62.909061,255.420552,256.026640,264.739472,260.095763,5.217500,SELL
2023-03-22,273.399994,281.040009,272.179993,272.290009,272.290009,34873300,61.219775,256.082099,256.188465,265.818120,260.999041,5.387568,SELL
2023-03-23,277.940002,281.059998,275.200012,277.660004,277.660004,36610900,64.880092,256.928291,256.402112,267.509817,262.233186,5.887791,SELL


In [16]:
dfme = load_stock_data('META', '2000-01-01', '2023-03-26')

[*********************100%***********************]  1 of 1 completed


In [17]:
dfme

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-05-18,42.049999,45.000000,38.000000,38.230000,38.230000,573576400
2012-05-21,36.529999,36.660000,33.000000,34.029999,34.029999,168192700
2012-05-22,32.610001,33.590000,30.940001,31.000000,31.000000,101786600
2012-05-23,31.370001,32.500000,31.360001,32.000000,32.000000,73600000
2012-05-24,32.950001,33.209999,31.770000,33.029999,33.029999,50237200
...,...,...,...,...,...,...
2023-03-20,198.479996,199.360001,193.639999,197.809998,197.809998,25186300
2023-03-21,203.199997,203.550003,197.949997,202.160004,202.160004,31827000
2023-03-22,202.500000,207.369995,199.669998,199.809998,199.809998,28477800
2023-03-23,202.839996,207.880005,202.149994,204.279999,204.279999,27389700


In [18]:
dfme_pre = data_preprocess(dfme)

In [19]:
dfme_pre

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,EMA50,EMA200,EMA13,EMA26,MACD,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-03-07,27.570000,28.680000,27.469999,28.580000,28.580000,74540200,53.299947,28.005706,25.670100,27.833552,28.148382,-0.337753,SELL
2013-03-08,28.430000,28.469999,27.730000,27.959999,27.959999,44198900,48.552036,28.003914,25.692885,27.851616,28.134428,-0.300819,SELL
2013-03-11,28.010000,28.639999,27.830000,28.139999,28.139999,35642100,49.946089,28.009250,25.717234,27.892813,28.134841,-0.254094,SELL
2013-03-12,28.100000,28.320000,27.600000,27.830000,27.830000,27569600,47.556132,28.002221,25.738257,27.883840,28.112260,-0.239321,SELL
2013-03-13,27.620001,27.650000,26.920000,27.080000,27.080000,39619500,42.284394,27.966055,25.751608,27.769006,28.035796,-0.284848,SELL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-20,198.479996,199.360001,193.639999,197.809998,197.809998,25186300,65.972865,168.992707,164.982213,189.365669,181.436091,8.620685,SELL
2023-03-21,203.199997,203.550003,197.949997,202.160004,202.160004,31827000,68.550291,170.293385,165.352142,191.193431,182.971195,8.947616,SELL
2023-03-22,202.500000,207.369995,199.669998,199.809998,199.809998,28477800,65.656914,171.450899,165.695006,192.424369,184.218514,8.914326,SELL
2023-03-23,202.839996,207.880005,202.149994,204.279999,204.279999,27389700,68.389944,172.738315,166.078936,194.118030,185.704550,9.143237,SELL


In [20]:
dfn = load_stock_data('NFLX', '2000-01-01', '2023-03-26')

[*********************100%***********************]  1 of 1 completed


In [21]:
dfn

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2002-05-23,1.156429,1.242857,1.145714,1.196429,1.196429,104790000
2002-05-24,1.214286,1.225000,1.197143,1.210000,1.210000,11104800
2002-05-28,1.213571,1.232143,1.157143,1.157143,1.157143,6609400
2002-05-29,1.164286,1.164286,1.085714,1.103571,1.103571,6757800
2002-05-30,1.107857,1.107857,1.071429,1.071429,1.071429,10154200
...,...,...,...,...,...,...
2023-03-20,299.790009,307.500000,296.000000,305.130005,305.130005,5113400
2023-03-21,306.320007,307.920013,300.429993,305.790009,305.790009,4886300
2023-03-22,306.309998,306.450012,293.540009,293.899994,293.899994,5808000
2023-03-23,304.679993,322.779999,304.140015,320.369995,320.369995,15653300


In [22]:
dfn_pre = data_preprocess(dfn)

In [23]:
dfn_pre

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,EMA50,EMA200,EMA13,EMA26,MACD,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2003-03-10,1.141429,1.164286,1.122143,1.123571,1.123571,3424400,58.984931,0.967998,0.872775,1.111655,1.048159,0.068681,BUY
2003-03-11,1.123571,1.125000,1.071429,1.107143,1.107143,5531400,56.877317,0.973454,0.875107,1.111010,1.052528,0.062820,BUY
2003-03-12,1.103571,1.107857,1.064286,1.075714,1.075714,5751200,52.977259,0.977464,0.877103,1.105968,1.054246,0.055005,BUY
2003-03-13,1.097143,1.183571,1.089286,1.160714,1.160714,7560000,60.804980,0.984651,0.879925,1.113789,1.062132,0.055036,SELL
2003-03-14,1.185714,1.210714,1.170714,1.182143,1.182143,6129200,62.499813,0.992395,0.882932,1.123554,1.071022,0.056142,SELL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-20,299.790009,307.500000,296.000000,305.130005,305.130005,5113400,41.293028,320.039576,308.123378,307.594139,316.530490,-9.709873,SELL
2023-03-21,306.320007,307.920013,300.429993,305.790009,305.790009,4886300,41.856482,319.480769,308.100161,307.336406,315.734899,-9.072837,SELL
2023-03-22,306.309998,306.450012,293.540009,293.899994,293.899994,5808000,35.286062,318.477602,307.958866,305.416919,314.117499,-9.418832,SELL
2023-03-23,304.679993,322.779999,304.140015,320.369995,320.369995,15653300,52.981327,318.551813,308.082359,307.553073,314.580647,-7.471006,SELL


In [24]:
df = pd.concat([dfam_pre, dfg_pre, dfm_pre, dfme_pre, dfn_pre])

In [25]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,RSI,EMA50,EMA200,EMA13,EMA26,MACD,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000-10-16,1.406250,1.415625,1.206250,1.215625,1.215625,142954000,29.847250,1.837001,2.583973,1.553552,1.716049,-0.182109,SELL
2000-10-17,1.243750,1.259375,1.043750,1.096875,1.096875,302810000,27.277272,1.807977,2.569176,1.488312,1.670184,-0.203485,SELL
2000-10-18,0.993750,1.268750,0.968750,1.256250,1.256250,375774000,35.325979,1.786340,2.556112,1.455161,1.639523,-0.205200,SELL
2000-10-19,1.328125,1.406250,1.209375,1.387500,1.387500,217498000,41.106743,1.770700,2.544484,1.445495,1.620854,-0.193735,SELL
2000-10-20,1.384375,1.562500,1.356250,1.540625,1.540625,232438000,47.052805,1.761677,2.534495,1.459085,1.614911,-0.170330,SELL
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-03-20,299.790009,307.500000,296.000000,305.130005,305.130005,5113400,41.293028,320.039576,308.123378,307.594139,316.530490,-9.709873,SELL
2023-03-21,306.320007,307.920013,300.429993,305.790009,305.790009,4886300,41.856482,319.480769,308.100161,307.336406,315.734899,-9.072837,SELL
2023-03-22,306.309998,306.450012,293.540009,293.899994,293.899994,5808000,35.286062,318.477602,307.958866,305.416919,314.117499,-9.418832,SELL
2023-03-23,304.679993,322.779999,304.140015,320.369995,320.369995,15653300,52.981327,318.551813,308.082359,307.553073,314.580647,-7.471006,SELL


# Model Building

In [26]:
from sklearn.tree import DecisionTreeClassifier
# Step 3: Feature Selection
features = ['RSI', 'EMA50', 'EMA200', 'MACD', 'EMA13', 'EMA26']
X = df[features].values

y = df['Signal'].apply(lambda x: 1 if x == 'BUY' else 0).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

DecisionTreeClassifier()

In [27]:
# Evaluate the model on the test data
y_pred = model.predict(X_test)

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.9965746092913723


In [28]:
from sklearn.ensemble import RandomForestClassifier
# Step 3: Feature Selection
features = ['RSI', 'EMA50', 'EMA200', 'MACD', 'EMA13', 'EMA26']
X = df[features].values

y = df['Signal'].apply(lambda x: 1 if x == 'BUY' else 0).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
modelR = RandomForestClassifier()
modelR.fit(X_train, y_train)

RandomForestClassifier()

In [29]:
# Evaluate the model on the test data
y_pred = modelR.predict(X_test)

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.9970027831299507


In [30]:
from xgboost import XGBClassifier
features = ['RSI', 'EMA50', 'EMA200', 'MACD', 'EMA13', 'EMA26']
X = df[features].values

y = df['Signal'].apply(lambda x: 1 if x == 'BUY' else 0).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
modelX = XGBClassifier()
modelX.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, feature_types=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_bin=256, max_cat_threshold=64, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0, ...)

In [31]:
# Evaluate the model on the test data
y_pred = modelX.predict(X_test)

# Calculate the accuracy score
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

Accuracy: 0.9976450438878185


# Model Testing using AAPL stocks

In [32]:
dfap = load_stock_data('AAPL', '2000-01-01', '2023-03-26')

[*********************100%***********************]  1 of 1 completed


In [33]:
dfap_pre = data_preprocess(dfap)

In [34]:
features = ['RSI', 'EMA50', 'EMA200', 'MACD', 'EMA13', 'EMA26']
X = dfap_pre[features].values

y = dfap_pre['Signal'].apply(lambda x: 1 if x == 'BUY' else 0).values

In [35]:
def backtest_strategy(df, investment):
    df['Return'] = 0.0
    shares = 0

    for i in range(1, len(df)):
        if df['y_pred'][i] == 1 and df['y_pred'][i-1] == 0:  # Buy signal
            shares = investment / df['Close'][i]
            investment = 0
        elif df['y_pred'][i] == 0 and df['y_pred'][i-1] == 1:  # Sell signal
            investment = shares * df['Close'][i]
            shares = 0
        elif df['y_pred'][i] == 1 and df['y_pred'][i-1] == 1:  # Hold
            pass
        elif df['y_pred'][i] == 0 and df['y_pred'][i-1] == 0:  # Hold
            pass
        df['Return'][i] = (investment + shares * df['Close'][i]) / investment

    total_return = df['Return'][-1] * investment
    
    return total_return

In [36]:
dfap_pre['y_pred'] = model.predict(X)
total_return1 = backtest_strategy(dfap_pre, 100000)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Return'][i] = (investment + shares * df['Close'][i]) / investment
  df['Return'][i] = (investment + shares * df['Close'][i]) / investment


In [39]:
total_return1

254587.41791144398

In [40]:
dfap_pre['y_pred'] = modelR.predict(X)
total_return2 = backtest_strategy(dfap_pre, 100000)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Return'][i] = (investment + shares * df['Close'][i]) / investment
  df['Return'][i] = (investment + shares * df['Close'][i]) / investment


In [41]:
total_return2

242696.73598748195

In [None]:
dfap_pre['y_pred'] = modelX.predict(X)
total_return3 = backtest_strategy(dfap_pre, 100000)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Return'][i] = (investment + shares * df['Close'][i]) / investment
  df['Return'][i] = (investment + shares * df['Close'][i]) / investment


In [None]:
total_return3