In [27]:
import pandas as pd
import pandas_ta as ta

# STEP 1: Get the dataframe

In [28]:
df = pd.read_csv('/Users/omarhamdi/code/Stanislas-Motte/MLStocks4Everyone/AEX.csv')

In [29]:
df

Unnamed: 0,DATETIME,price
0,2009-08-18 23:00:00,121.97
1,2009-08-19 23:00:00,121.67
2,2009-08-20 23:00:00,125.77
3,2009-08-21 23:00:00,132.17
4,2009-08-24 23:00:00,135.37
...,...,...
63132,2021-03-08 17:00:00,667.00
63133,2021-03-08 17:35:00,666.60
63134,2021-03-08 18:00:00,663.70
63135,2021-03-08 19:00:00,663.65


In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63137 entries, 0 to 63136
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   DATETIME  63137 non-null  object 
 1   price     60928 non-null  float64
dtypes: float64(1), object(1)
memory usage: 986.6+ KB


In [31]:
df['DATETIME'] = pd.to_datetime(df['DATETIME'])

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63137 entries, 0 to 63136
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   DATETIME  63137 non-null  datetime64[ns]
 1   price     60928 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 986.6 KB


In [33]:
df.set_index('DATETIME', inplace=True)

In [34]:
df

Unnamed: 0_level_0,price
DATETIME,Unnamed: 1_level_1
2009-08-18 23:00:00,121.97
2009-08-19 23:00:00,121.67
2009-08-20 23:00:00,125.77
2009-08-21 23:00:00,132.17
2009-08-24 23:00:00,135.37
...,...
2021-03-08 17:00:00,667.00
2021-03-08 17:35:00,666.60
2021-03-08 18:00:00,663.70
2021-03-08 19:00:00,663.65


# STEP 2: Create the Features

In [35]:
# List of all pandas_ta indicators
df.ta.indicators()

Pandas TA - Technical Analysis Indicators - v0.3.14b0
Total Indicators & Utilities: 205
Abbreviations:
    aberration, above, above_value, accbands, ad, adosc, adx, alma, amat, ao, aobv, apo, aroon, atr, bbands, below, below_value, bias, bop, brar, cci, cdl_pattern, cdl_z, cfo, cg, chop, cksp, cmf, cmo, coppock, cross, cross_value, cti, decay, decreasing, dema, dm, donchian, dpo, ebsw, efi, ema, entropy, eom, er, eri, fisher, fwma, ha, hilo, hl2, hlc3, hma, hwc, hwma, ichimoku, increasing, inertia, jma, kama, kc, kdj, kst, kurtosis, kvo, linreg, log_return, long_run, macd, mad, massi, mcgd, median, mfi, midpoint, midprice, mom, natr, nvi, obv, ohlc4, pdist, percent_return, pgo, ppo, psar, psl, pvi, pvo, pvol, pvr, pvt, pwma, qqe, qstick, quantile, rma, roc, rsi, rsx, rvgi, rvi, short_run, sinwma, skew, slope, sma, smi, squeeze, squeeze_pro, ssf, stc, stdev, stoch, stochrsi, supertrend, swma, t3, td_seq, tema, thermo, tos_stdevall, trima, trix, true_range, tsi, tsignals, ttm_trend, ui, 

### Moving average

In [36]:
# Create a column with the average of the last three values of price
df['AVG_PRICE'] = df['price'].rolling(window=7).mean()

### Exponential Moving Average (EMA)

In [37]:
df['EMA'] = ta.ema(df['price'], length=7)

### Stochastic oscillator

In [38]:
# Calculate the stochastic oscillator
df['stochastic_oscillator'] = (df['price'] - df['price'].rolling(window=14).min()) / (df['price'].rolling(window=14).max() - df['price'].rolling(window=14).min())

# Multiply by 100 to get the percentage
df['stochastic_oscillator'] *= 100

In [39]:
df.head(28)

Unnamed: 0_level_0,price,AVG_PRICE,EMA,stochastic_oscillator
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009-08-18 23:00:00,121.97,,,
2009-08-19 23:00:00,121.67,,,
2009-08-20 23:00:00,125.77,,,
2009-08-21 23:00:00,132.17,,,
2009-08-24 23:00:00,135.37,,,
2009-08-25 23:00:00,137.57,,,
2009-08-26 23:00:00,136.27,130.112857,130.112857,
2009-08-27 23:00:00,134.52,131.905714,131.214643,
2009-08-28 23:00:00,137.87,134.22,132.878482,
2009-08-31 23:00:00,134.27,135.434286,133.226362,


### Relative Strength Index (RSI)

In [40]:
df['RSI'] = ta.rsi(df['price'])

In [41]:
df

Unnamed: 0_level_0,price,AVG_PRICE,EMA,stochastic_oscillator,RSI
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2009-08-18 23:00:00,121.97,,,,
2009-08-19 23:00:00,121.67,,,,
2009-08-20 23:00:00,125.77,,,,
2009-08-21 23:00:00,132.17,,,,
2009-08-24 23:00:00,135.37,,,,
...,...,...,...,...,...
2021-03-08 17:00:00,667.00,663.621429,664.031350,100.000000,63.893826
2021-03-08 17:35:00,666.60,664.371429,664.673513,96.190476,62.810624
2021-03-08 18:00:00,663.70,664.907143,664.430135,68.571429,55.468530
2021-03-08 19:00:00,663.65,665.078571,664.235101,68.095238,55.348401


### Fibonacci 🤌 retracement

In [42]:
## df['fibonacci_retracement'] = df['price'] - df['AVG_PRICE'] ?????????

### Ichiboku cloud 💩☁️

In [43]:
df['Tenkan_Sen'] = (df['price'].rolling(window=9).max() + df['price'].rolling(window=9).min()) / 2
df['Kijun_Sen'] = (df['price'].rolling(window=26).max() + df['price'].rolling(window=26).min()) / 2
df['Senkou_Span_A'] = (df['Tenkan_Sen'] + df['Kijun_Sen']) / 2
df['Senkou_Span_B'] = (df['price'].rolling(window=52).max() + df['price'].rolling(window=52).min()) / 2
df['Chikou_Span'] = df['price'].shift(-26)

In [44]:
df

Unnamed: 0_level_0,price,AVG_PRICE,EMA,stochastic_oscillator,RSI,Tenkan_Sen,Kijun_Sen,Senkou_Span_A,Senkou_Span_B,Chikou_Span
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009-08-18 23:00:00,121.97,,,,,,,,,150.92
2009-08-19 23:00:00,121.67,,,,,,,,,146.27
2009-08-20 23:00:00,125.77,,,,,,,,,143.22
2009-08-21 23:00:00,132.17,,,,,,,,,149.77
2009-08-24 23:00:00,135.37,,,,,,,,,149.82
...,...,...,...,...,...,...,...,...,...,...
2021-03-08 17:00:00,667.00,663.621429,664.031350,100.000000,63.893826,661.750,659.425,660.5875,656.35,
2021-03-08 17:35:00,666.60,664.371429,664.673513,96.190476,62.810624,663.000,660.150,661.5750,656.35,
2021-03-08 18:00:00,663.70,664.907143,664.430135,68.571429,55.468530,663.475,660.150,661.8125,656.35,
2021-03-08 19:00:00,663.65,665.078571,664.235101,68.095238,55.348401,663.475,661.500,662.4875,656.35,


### Average directional index (ADX)

In [45]:
#df['ADX'] = ta.adx(df['High'], df['Low'], df['Close'], length=14)

### Bolinger Bands

In [46]:
# train = df[df['DATETIME'] < '2020-01-01']
# test = df[df['DATETIME'] >= '2020-01-01']

# Compute standard deviation of price over past 20 days and add to price

# for i in range(1,11):
#     df[f'BB_SellSignal {i} %'] = df['price'] > (df['price'] + df['price'].rolling(window=20).std() * i/10)
#     df[f'BB_BuySignal {i} %'] = df['price'] < (df['price'] - df['price'].rolling(window=20).std() * i/10)

In [47]:
# df['HowMuchAbove3dAverage'] = df['price'] - df['AVG_PRICE']

In [48]:
# df

# Train test split

In [49]:
df

Unnamed: 0_level_0,price,AVG_PRICE,EMA,stochastic_oscillator,RSI,Tenkan_Sen,Kijun_Sen,Senkou_Span_A,Senkou_Span_B,Chikou_Span
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009-08-18 23:00:00,121.97,,,,,,,,,150.92
2009-08-19 23:00:00,121.67,,,,,,,,,146.27
2009-08-20 23:00:00,125.77,,,,,,,,,143.22
2009-08-21 23:00:00,132.17,,,,,,,,,149.77
2009-08-24 23:00:00,135.37,,,,,,,,,149.82
...,...,...,...,...,...,...,...,...,...,...
2021-03-08 17:00:00,667.00,663.621429,664.031350,100.000000,63.893826,661.750,659.425,660.5875,656.35,
2021-03-08 17:35:00,666.60,664.371429,664.673513,96.190476,62.810624,663.000,660.150,661.5750,656.35,
2021-03-08 18:00:00,663.70,664.907143,664.430135,68.571429,55.468530,663.475,660.150,661.8125,656.35,
2021-03-08 19:00:00,663.65,665.078571,664.235101,68.095238,55.348401,663.475,661.500,662.4875,656.35,


In [50]:
df.dropna(inplace=True)

In [51]:
df

Unnamed: 0_level_0,price,AVG_PRICE,EMA,stochastic_oscillator,RSI,Tenkan_Sen,Kijun_Sen,Senkou_Span_A,Senkou_Span_B,Chikou_Span
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009-10-28 23:00:00,142.96,156.524286,153.447676,0.000000,38.800816,154.760,151.715,153.2375,144.115,158.16
2009-10-29 23:00:00,149.16,154.310000,152.375757,26.271186,46.649908,154.760,151.715,153.2375,144.115,161.61
2009-10-30 23:00:00,142.11,151.045714,149.809318,0.000000,40.317744,153.535,151.715,152.6250,146.165,160.86
2009-11-02 23:00:00,142.16,148.438571,147.896988,0.204499,40.379555,153.535,151.715,152.6250,146.615,156.16
2009-11-03 23:00:00,139.71,145.581429,145.850241,0.000000,38.287127,150.060,151.715,150.8875,146.615,153.31
...,...,...,...,...,...,...,...,...,...,...
2021-03-04 20:00:00,651.25,653.571429,652.723193,41.263941,38.932811,652.425,656.100,654.2625,656.475,667.00
2021-03-05 07:00:00,651.85,652.635714,652.504894,45.724907,40.260472,652.150,655.475,653.8125,656.475,666.60
2021-03-05 08:00:00,653.95,651.971429,652.866171,61.338290,44.785152,652.150,654.100,653.1250,656.475,663.70
2021-03-05 09:00:00,653.30,651.585714,652.974628,56.505576,43.682317,652.150,653.950,653.0500,656.475,663.65


In [52]:
X = pd.DataFrame(df.drop(['price'], axis=1))
y = pd.DataFrame(df['price'])

In [53]:
X

Unnamed: 0_level_0,AVG_PRICE,EMA,stochastic_oscillator,RSI,Tenkan_Sen,Kijun_Sen,Senkou_Span_A,Senkou_Span_B,Chikou_Span
DATETIME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2009-10-28 23:00:00,156.524286,153.447676,0.000000,38.800816,154.760,151.715,153.2375,144.115,158.16
2009-10-29 23:00:00,154.310000,152.375757,26.271186,46.649908,154.760,151.715,153.2375,144.115,161.61
2009-10-30 23:00:00,151.045714,149.809318,0.000000,40.317744,153.535,151.715,152.6250,146.165,160.86
2009-11-02 23:00:00,148.438571,147.896988,0.204499,40.379555,153.535,151.715,152.6250,146.615,156.16
2009-11-03 23:00:00,145.581429,145.850241,0.000000,38.287127,150.060,151.715,150.8875,146.615,153.31
...,...,...,...,...,...,...,...,...,...
2021-03-04 20:00:00,653.571429,652.723193,41.263941,38.932811,652.425,656.100,654.2625,656.475,667.00
2021-03-05 07:00:00,652.635714,652.504894,45.724907,40.260472,652.150,655.475,653.8125,656.475,666.60
2021-03-05 08:00:00,651.971429,652.866171,61.338290,44.785152,652.150,654.100,653.1250,656.475,663.70
2021-03-05 09:00:00,651.585714,652.974628,56.505576,43.682317,652.150,653.950,653.0500,656.475,663.65


In [55]:
split_index = int(0.7*len(df))

In [56]:
X_train = df.iloc[:split_index]
X_test = df.iloc[split_index:]

In [57]:
y_train = X_train[['price']]
y_test = X_test[['price']]

In [58]:
X_train = X_train.drop(columns=['price'])
X_test = X_test.drop(columns=['price'])

In [59]:
y_train

Unnamed: 0_level_0,price
DATETIME,Unnamed: 1_level_1
2009-10-28 23:00:00,142.96
2009-10-29 23:00:00,149.16
2009-10-30 23:00:00,142.11
2009-11-02 23:00:00,142.16
2009-11-03 23:00:00,139.71
...,...
2019-01-18 18:15:00,476.60
2019-01-18 18:30:00,476.80
2019-01-18 18:45:00,477.25
2019-01-18 19:00:00,477.15


# Step 4: Create your target variable : what you want to predict --> Future returns %

In [None]:
#Make target the daily returns
#df['target'] = ((df['Price'] / df['price'].shift(-1)) - 1)*100

In [None]:
#df['price']

0        121.97
1        121.67
2        125.77
3        132.17
4        135.37
          ...  
63132    667.00
63133    666.60
63134    663.70
63135    663.65
63136    662.80
Name: price, Length: 63137, dtype: float64

In [None]:
#df['price'].shift(-1)

0        121.67
1        125.77
2        132.17
3        135.37
4        137.57
          ...  
63132    666.60
63133    663.70
63134    663.65
63135    662.80
63136       NaN
Name: price, Length: 63137, dtype: float64

In [77]:
# Create a DataFrame so 'ta' can be used.
df = pd.DataFrame()

# Help about this, 'ta', extension
#help(df.ta)

# List of all indicators
df.ta.indicators()

Pandas TA - Technical Analysis Indicators - v0.3.14b0
Total Indicators & Utilities: 205
Abbreviations:
    aberration, above, above_value, accbands, ad, adosc, adx, alma, amat, ao, aobv, apo, aroon, atr, bbands, below, below_value, bias, bop, brar, cci, cdl_pattern, cdl_z, cfo, cg, chop, cksp, cmf, cmo, coppock, cross, cross_value, cti, decay, decreasing, dema, dm, donchian, dpo, ebsw, efi, ema, entropy, eom, er, eri, fisher, fwma, ha, hilo, hl2, hlc3, hma, hwc, hwma, ichimoku, increasing, inertia, jma, kama, kc, kdj, kst, kurtosis, kvo, linreg, log_return, long_run, macd, mad, massi, mcgd, median, mfi, midpoint, midprice, mom, natr, nvi, obv, ohlc4, pdist, percent_return, pgo, ppo, psar, psl, pvi, pvo, pvol, pvr, pvt, pwma, qqe, qstick, quantile, rma, roc, rsi, rsx, rvgi, rvi, short_run, sinwma, skew, slope, sma, smi, squeeze, squeeze_pro, ssf, stc, stdev, stoch, stochrsi, supertrend, swma, t3, td_seq, tema, thermo, tos_stdevall, trima, trix, true_range, tsi, tsignals, ttm_trend, ui, 

In [78]:
# Help about an indicator such as bbands
help(ta.accbands)

Help on function accbands in module pandas_ta.volatility.accbands:

accbands(high, low, close, length=None, c=None, drift=None, mamode=None, offset=None, **kwargs)
    Acceleration Bands (ACCBANDS)
    
    Acceleration Bands created by Price Headley plots upper and lower envelope
    bands around a simple moving average.
    
    Sources:
        https://www.tradingtechnologies.com/help/x-study/technical-indicator-definitions/acceleration-bands-abands/
    
    Calculation:
        Default Inputs:
            length=10, c=4
        EMA = Exponential Moving Average
        SMA = Simple Moving Average
        HL_RATIO = c * (high - low) / (high + low)
        LOW = low * (1 - HL_RATIO)
        HIGH = high * (1 + HL_RATIO)
    
        if 'ema':
            LOWER = EMA(LOW, length)
            MID = EMA(close, length)
            UPPER = EMA(HIGH, length)
        else:
            LOWER = SMA(LOW, length)
            MID = SMA(close, length)
            UPPER = SMA(HIGH, length)
    
   