In [1]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
from talib import RSI, BBANDS,WILLR,WMA,SMA,EMA,TEMA,KAMA,CCI,CMO,MACD,PPO,ROC,APO,DX,MOM,STOCH,AROON,ADX,OBV 
import matplotlib.pyplot as plt
import requests
import lxml
import datetime as dt
import os


## retrieve stock tickers data

### Cyclic Tickers

In [2]:
#use saved tickers in txt file to get the data of STI stocks from yahoo

def get_data_from_yahoo():
    
    with open('cyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
        
    if not os.path.exists('data/cyclic_tickers/initial'):
        os.makedirs('data/cyclic_tickers/initial')
        
    start = dt.datetime(2020,1,1)
    end = dt.datetime(2021,1,1)
    
    for ticker in tickers:
        print(ticker)
        if not os.path.exists('data/cyclic_tickers/initial/{}_test.csv'.format(ticker)):
            df = web.DataReader(ticker,'yahoo',start,end)
            df.to_csv('data/cyclic_tickers/initial/{}_test.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))
            
get_data_from_yahoo()

C6L.SI
JPM
BA
O39.SI
600104.SS
0883.HK
1398.HK


### Non-Cyclic Tickers

In [3]:
#use saved tickers in txt file to get the data of STI stocks from yahoo

def get_data_from_yahoo():
    
    with open('noncyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
        
    if not os.path.exists('data/noncyclic_tickers/initial'):
        os.makedirs('data/noncyclic_tickers/initial')
        
    start = dt.datetime(2020,1,1)
    end = dt.datetime(2021,1,1)
    
    for ticker in tickers:
        print(ticker)
        if not os.path.exists('data/noncyclic_tickers/initial/{}_test.csv'.format(ticker)):
            df = web.DataReader(ticker,'yahoo',start,end)
            df.to_csv('data/noncyclic_tickers/initial/{}_test.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))
            
get_data_from_yahoo()

600519.SS
COST
PG
000858.SZ
WMT
INO
NVAX
2319.HK
000333.SZ


## Function to create labels

In [4]:
import os
import re
from operator import itemgetter

import pandas as pd
import pickle
import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.utils import compute_class_weight
from tqdm.auto import tqdm

def create_labels(df, col_name, window_size=15):
        """
        Data is labeled as per the logic in research paper
        Label code : BUY => 1, SELL => 0, HOLD => 2
        params :
            df => Dataframe with data
            col_name => name of column which should be used to determine strategy
        returns : numpy array with integer codes for labels with
                  size = total-(window_size)+1
        """
        row_counter = 0
        total_rows = len(df)
        labels = np.zeros(total_rows)
        labels[:] = np.nan
        print("Calculating labels")
        pbar = tqdm(total=total_rows)

        while row_counter < total_rows:
            if row_counter >= window_size - 1:
                window_begin = row_counter - (window_size - 1)
                window_end = row_counter
                window_middle = (window_begin + window_end) / 2
                window_middle = int(window_middle)

                min_ = np.inf
                min_index = -1
                max_ = -np.inf
                max_index = -1
                for i in range(window_begin, window_end + 1):
                    price = df.iloc[i][col_name]
                    if price < min_:
                        min_ = price
                        min_index = i
                    if price > max_:
                        max_ = price
                        max_index = i
                        
                end_price = df.iloc[window_end][col_name]
                mid_price = df.iloc[window_middle][col_name]

                if max_index == window_middle:
                    labels[window_middle] = 0
                elif min_index == window_middle:
                    labels[window_middle] = 1
                else:
                    labels[window_middle] = 2

            row_counter = row_counter + 1
            pbar.update(1)

        pbar.close()
        return labels

## Generate technical indicators

### Cyclic indicators

In [5]:
def compile_data():
    with open('cyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('data/cyclic_tickers/initial/{}_test.csv'.format(ticker))
        df  = df.dropna()
            
        high = df['High'].shift(1).values
        low = df['Low'].shift(1).values
        close = df['Close'].shift(1).values
        volume = df['Volume'].shift(1).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)


        ema = EMA(close, timeperiod=7)
        arr = np.array(ema)
        df['EMA_7'] = arr


        willr = WILLR(high, low, close, timeperiod=7)
        arr = np.array(willr)
        df['willr_7'] = arr
            
        obv = OBV(close, volume)
        arr = np.array(obv)
        df['OBV_7'] = arr

        df.set_index('Date', inplace=True)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df.drop(['Close'], 1, inplace=True)
        df  = df.dropna()
        
        print(df.head())
        df.to_csv('data/cyclic_tickers/initial_indicators/{}_data_test.csv'.format(ticker))
    
compile_data()

         Date  Close
0  2020-01-02   9.11
1  2020-01-03   9.02
2  2020-01-06   8.93
3  2020-01-07   9.01
4  2020-01-08   8.99
            Close  previous_1d  EMA_7  willr_7      OBV_7
Date                                                     
2020-01-02   9.11          NaN    NaN      NaN        NaN
2020-01-03   9.02         9.11    NaN      NaN  1131100.0
2020-01-06   8.93         9.02    NaN      NaN   129500.0
2020-01-07   9.01         8.93    NaN      NaN -1090400.0
2020-01-08   8.99         9.01    NaN      NaN    37000.0
<class 'pandas.core.frame.DataFrame'>
Index: 250 entries, 2020-01-02 to 2020-12-30
Data columns (total 5 columns):
Close          250 non-null float64
previous_1d    249 non-null float64
EMA_7          243 non-null float64
willr_7        243 non-null float64
OBV_7          249 non-null float64
dtypes: float64(5)
memory usage: 11.7+ KB
None
Calculating labels


HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))


            previous_1d     EMA_7    willr_7      OBV_7  labels
Date                                                           
2020-01-13         8.95  8.991429 -58.620724 -1419500.0     2.0
2020-01-14         8.95  8.981071 -58.620724 -1419500.0     2.0
2020-01-15         9.01  8.988304  -5.263264  -694100.0     2.0
2020-01-16         9.03  8.998728 -16.666667   289800.0     2.0
2020-01-17         9.04  9.009046 -19.230826   986700.0     0.0
         Date       Close
0  2019-12-31  139.399994
1  2020-01-02  141.089996
2  2020-01-03  138.339996
3  2020-01-06  138.229996
4  2020-01-07  135.880005
                 Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                           
2019-12-31  139.399994          NaN    NaN      NaN         NaN
2020-01-02  141.089996   139.399994    NaN      NaN   7201600.0
2020-01-03  138.339996   141.089996    NaN      NaN  18005300.0
2020-01-06  138.229996   138.339996    NaN      NaN   7618500.0
2020-01-07 

HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


            previous_1d       EMA_7    willr_7       OBV_7  labels
Date                                                              
2020-01-10   137.440002  138.188570 -66.545521   5992500.0     2.0
2020-01-13   136.070007  137.658930 -91.454523  -4198400.0     2.0
2020-01-14   137.199997  137.544196 -55.922991   8156800.0     0.0
2020-01-15   138.800003  137.858148 -37.984416  33062800.0     2.0
2020-01-16   136.720001  137.573611 -78.294619  16769400.0     2.0
         Date       Close
0  2019-12-31  325.760010
1  2020-01-02  333.320007
2  2020-01-03  332.760010
3  2020-01-06  333.739990
4  2020-01-07  337.279999
                 Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                           
2019-12-31  325.760010          NaN    NaN      NaN         NaN
2020-01-02  333.320007   325.760010    NaN      NaN   4958800.0
2020-01-03  332.760010   333.320007    NaN      NaN   9503200.0
2020-01-06  333.739990   332.760010    NaN      NaN   

HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


            previous_1d       EMA_7    willr_7       OBV_7  labels
Date                                                              
2020-01-10   336.339996  332.938572 -37.613838  20817300.0     2.0
2020-01-13   329.920013  332.183933 -86.537280  13655600.0     2.0
2020-01-14   330.220001  331.692950 -85.652994  19304100.0     2.0
2020-01-15   332.350006  331.857214 -72.593489  26249400.0     2.0
2020-01-16   329.799988  331.342907 -90.446331  21671100.0     2.0
         Date  Close
0  2020-01-02  11.03
1  2020-01-03  11.02
2  2020-01-06  10.92
3  2020-01-07  11.00
4  2020-01-08  11.01
            Close  previous_1d  EMA_7  willr_7      OBV_7
Date                                                     
2020-01-02  11.03          NaN    NaN      NaN        NaN
2020-01-03  11.02        11.03    NaN      NaN  3831900.0
2020-01-06  10.92        11.02    NaN      NaN  -549500.0
2020-01-07  11.00        10.92    NaN      NaN -4949000.0
2020-01-08  11.01        11.00    NaN      NaN    -7800.

HBox(children=(FloatProgress(value=0.0, max=250.0), HTML(value='')))


            previous_1d      EMA_7    willr_7       OBV_7  labels
Date                                                             
2020-01-13        11.08  11.012857 -16.666755  15355600.0     2.0
2020-01-14        11.06  11.024643 -22.222163  13005500.0     2.0
2020-01-15        11.10  11.043482  -5.882204  16214400.0     2.0
2020-01-16        10.98  11.027612 -41.176553  12801600.0     2.0
2020-01-17        11.11  11.048209  -8.333510  25303300.0     0.0
         Date      Close
0  2020-01-02  24.170000
1  2020-01-03  24.100000
2  2020-01-06  23.360001
3  2020-01-07  23.530001
4  2020-01-08  23.889999
                Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                          
2020-01-02  24.170000          NaN    NaN      NaN         NaN
2020-01-03  24.100000    24.170000    NaN      NaN  37373749.0
2020-01-06  23.360001    24.100000    NaN      NaN  17364725.0
2020-01-07  23.530001    23.360001    NaN      NaN -35091670.0
2020-01-0

HBox(children=(FloatProgress(value=0.0, max=243.0), HTML(value='')))


            previous_1d      EMA_7    willr_7        OBV_7  labels
Date                                                              
2020-01-13    25.049999  24.150000  -5.434803   94492336.0     0.0
2020-01-14    25.299999  24.437500  -0.000000  118548367.0     2.0
2020-01-15    24.969999  24.570625 -16.582913   83761110.0     2.0
2020-01-16    24.559999  24.567968 -37.755108   64685365.0     2.0
2020-01-17    24.799999  24.625976 -35.460997   83356598.0     2.0
         Date  Close
0  2020-01-02  12.98
1  2020-01-03  13.24
2  2020-01-06  13.72
3  2020-01-07  13.64
4  2020-01-08  13.64
            Close  previous_1d  EMA_7  willr_7        OBV_7
Date                                                       
2020-01-02  12.98          NaN    NaN      NaN          NaN
2020-01-03  13.24        12.98    NaN      NaN   40154251.0
2020-01-06  13.72        13.24    NaN      NaN  137496548.0
2020-01-07  13.64        13.72    NaN      NaN  231955197.0
2020-01-08  13.64        13.64    NaN      N

HBox(children=(FloatProgress(value=0.0, max=248.0), HTML(value='')))


            previous_1d      EMA_7    willr_7        OBV_7  labels
Date                                                              
2020-01-13        13.54  13.491429 -40.322588  262967202.0     0.0
2020-01-14        13.74  13.553571 -27.272735  302254493.0     2.0
2020-01-15        13.54  13.550179 -73.529379  221195821.0     2.0
2020-01-16        13.22  13.467634 -95.348840  110173648.0     2.0
2020-01-17        13.44  13.460725 -69.767514  192230675.0     2.0
         Date  Close
0  2020-01-02   6.05
1  2020-01-03   6.00
2  2020-01-06   5.93
3  2020-01-07   5.91
4  2020-01-08   5.84
            Close  previous_1d  EMA_7  willr_7        OBV_7
Date                                                       
2020-01-02   6.05          NaN    NaN      NaN          NaN
2020-01-03   6.00         6.05    NaN      NaN  141558831.0
2020-01-06   5.93         6.00    NaN      NaN  -77815985.0
2020-01-07   5.91         5.93    NaN      NaN -261304462.0
2020-01-08   5.84         5.91    NaN      N

HBox(children=(FloatProgress(value=0.0, max=248.0), HTML(value='')))


            previous_1d     EMA_7    willr_7        OBV_7  labels
Date                                                             
2020-01-13         5.93  5.942857 -50.000066 -590208756.0     2.0
2020-01-14         5.97  5.949643 -37.142900 -455908484.0     2.0
2020-01-15         5.91  5.939732 -42.857240 -662485703.0     2.0
2020-01-16         5.90  5.929799 -46.428578 -764517226.0     2.0
2020-01-17         5.85  5.909849 -64.285775 -928071512.0     2.0


### Non-cyclic Indicators

In [6]:
def compile_data():
    with open('noncyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('data/noncyclic_tickers/initial/{}_test.csv'.format(ticker))
        df  = df.dropna()
            
        high = df['High'].shift(1).values
        low = df['Low'].shift(1).values
        close = df['Close'].shift(1).values
        volume = df['Volume'].shift(1).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)


        ema = EMA(close, timeperiod=7)
        arr = np.array(ema)
        df['EMA_7'] = arr


        willr = WILLR(high, low, close, timeperiod=7)
        arr = np.array(willr)
        df['willr_7'] = arr
            
        obv = OBV(close, volume)
        arr = np.array(obv)
        df['OBV_7'] = arr

        df.set_index('Date', inplace=True)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df.drop(['Close'], 1, inplace=True)
        df  = df.dropna()
        
        print(df.head())
        df.to_csv('data/noncyclic_tickers/initial_indicators/{}_data_test.csv'.format(ticker))
    
compile_data()

         Date        Close
0  2020-01-02  1130.000000
1  2020-01-03  1078.560059
2  2020-01-06  1077.989990
3  2020-01-07  1094.530029
4  2020-01-08  1088.140015
                  Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                            
2020-01-02  1130.000000          NaN    NaN      NaN         NaN
2020-01-03  1078.560059  1130.000000    NaN      NaN  14809916.0
2020-01-06  1077.989990  1078.560059    NaN      NaN   1778038.0
2020-01-07  1094.530029  1077.989990    NaN      NaN  -4563440.0
2020-01-08  1088.140015  1094.530029    NaN      NaN    221919.0
<class 'pandas.core.frame.DataFrame'>
Index: 243 entries, 2020-01-02 to 2020-12-31
Data columns (total 5 columns):
Close          243 non-null float64
previous_1d    242 non-null float64
EMA_7          236 non-null float64
willr_7        236 non-null float64
OBV_7          242 non-null float64
dtypes: float64(5)
memory usage: 11.4+ KB
None
Calculating labels


HBox(children=(FloatProgress(value=0.0, max=243.0), HTML(value='')))


            previous_1d        EMA_7    willr_7      OBV_7  labels
Date                                                              
2020-01-13  1112.500000  1097.774292 -41.872498  5059268.0     2.0
2020-01-14  1124.270020  1104.398224  -0.000000  8910843.0     2.0
2020-01-15  1107.400024  1105.148674 -29.612083  5396378.0     2.0
2020-01-16  1112.130005  1106.894007 -25.360384  7999289.0     2.0
2020-01-17  1107.000000  1106.920505 -47.799660  5680124.0     2.0
         Date       Close
0  2019-12-31  293.920013
1  2020-01-02  291.489990
2  2020-01-03  291.730011
3  2020-01-06  291.809998
4  2020-01-07  291.350006
                 Close  previous_1d  EMA_7  willr_7      OBV_7
Date                                                          
2019-12-31  293.920013          NaN    NaN      NaN        NaN
2020-01-02  291.489990   293.920013    NaN      NaN  1550700.0
2020-01-03  291.730011   291.489990    NaN      NaN  -552900.0
2020-01-06  291.809998   291.730011    NaN      NaN  137310

HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


            previous_1d       EMA_7    willr_7      OBV_7  labels
Date                                                             
2020-01-10   299.420013  293.487148 -19.040320  7818800.0     2.0
2020-01-13   297.239990  294.425358 -35.382328  5794600.0     2.0
2020-01-14   299.869995  295.786518 -15.667143  7573200.0     2.0
2020-01-15   299.750000  296.777388 -16.566657  6113300.0     2.0
2020-01-16   300.820007  297.788043  -8.990416  7717300.0     2.0
         Date       Close
0  2019-12-31  124.900002
1  2020-01-02  123.410004
2  2020-01-03  122.580002
3  2020-01-06  122.750000
4  2020-01-07  121.989998
                 Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                           
2019-12-31  124.900002          NaN    NaN      NaN         NaN
2020-01-02  123.410004   124.900002    NaN      NaN   4798900.0
2020-01-03  122.580002   123.410004    NaN      NaN  -3331900.0
2020-01-06  122.750000   122.580002    NaN      NaN -11302400

HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


            previous_1d       EMA_7    willr_7       OBV_7  labels
Date                                                              
2020-01-10   123.849998  123.141429 -36.421738   -882300.0     2.0
2020-01-13   123.970001  123.348572 -26.480885   3044800.0     2.0
2020-01-14   124.879997  123.731429  -0.658035  11454000.0     2.0
2020-01-15   124.690002  123.971072 -15.315177   3624300.0     2.0
2020-01-16   125.959999  124.468304  -3.764792   8468000.0     2.0
         Date       Close
0  2020-01-02  132.080002
1  2020-01-03  130.550003
2  2020-01-06  129.199997
3  2020-01-07  129.369995
4  2020-01-08  128.889999
                 Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                           
2020-01-02  132.080002          NaN    NaN      NaN         NaN
2020-01-03  130.550003   132.080002    NaN      NaN  30667439.0
2020-01-06  129.199997   130.550003    NaN      NaN  10198191.0
2020-01-07  129.369995   129.199997    NaN      NaN -1

HBox(children=(FloatProgress(value=0.0, max=243.0), HTML(value='')))


            previous_1d       EMA_7    willr_7        OBV_7  labels
Date                                                               
2020-01-13   133.619995  130.639999  -7.166799   58927386.0     2.0
2020-01-14   139.660004  132.895000  -2.845158  108531580.0     2.0
2020-01-15   138.559998  134.311250 -16.785407   88624263.0     0.0
2020-01-16   140.800003  135.933438 -13.617854  112103167.0     2.0
2020-01-17   140.660004  137.115079 -14.566360   97567179.0     2.0
         Date       Close
0  2019-12-31  118.839996
1  2020-01-02  118.940002
2  2020-01-03  117.889999
3  2020-01-06  117.650002
4  2020-01-07  116.559998
                 Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                           
2019-12-31  118.839996          NaN    NaN      NaN         NaN
2020-01-02  118.940002   118.839996    NaN      NaN   4912000.0
2020-01-03  117.889999   118.940002    NaN      NaN  11676900.0
2020-01-06  117.650002   117.889999    NaN     

HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


            previous_1d       EMA_7    willr_7       OBV_7  labels
Date                                                              
2020-01-10   117.360001  117.628572 -60.094996  -7326800.0     2.0
2020-01-13   116.379997  117.316428 -83.372990 -13381600.0     2.0
2020-01-14   115.879997  116.957320 -85.087872 -19494200.0     2.0
2020-01-15   116.180000  116.762990 -64.526913 -12908400.0     2.0
2020-01-16   115.279999  116.392242 -74.666595 -20362600.0     2.0
         Date  Close
0  2019-12-31   3.30
1  2020-01-02   3.21
2  2020-01-03   2.98
3  2020-01-06   3.14
4  2020-01-07   3.15
            Close  previous_1d  EMA_7  willr_7      OBV_7
Date                                                     
2019-12-31   3.30          NaN    NaN      NaN        NaN
2020-01-02   3.21         3.30    NaN      NaN   835700.0
2020-01-03   2.98         3.21    NaN      NaN  -135400.0
2020-01-06   3.14         2.98    NaN      NaN -1340400.0
2020-01-07   3.15         3.14    NaN      NaN  -242200.

HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


            previous_1d     EMA_7    willr_7      OBV_7  labels
Date                                                           
2020-01-10         3.10  3.145714 -69.387785  -777100.0     2.0
2020-01-13         3.11  3.136786 -63.636403   -61300.0     2.0
2020-01-14         3.31  3.180089 -28.000021  1820400.0     2.0
2020-01-15         3.29  3.207567 -32.000017   401500.0     2.0
2020-01-16         3.35  3.243175 -24.390274  1388500.0     2.0
         Date  Close
0  2019-12-31   3.98
1  2020-01-02   4.49
2  2020-01-03   4.44
3  2020-01-06   4.58
4  2020-01-07   4.57
            Close  previous_1d  EMA_7  willr_7      OBV_7
Date                                                     
2019-12-31   3.98          NaN    NaN      NaN        NaN
2020-01-02   4.49         3.98    NaN      NaN  1324900.0
2020-01-03   4.44         4.49    NaN      NaN  7449100.0
2020-01-06   4.58         4.44    NaN      NaN  5740100.0
2020-01-07   4.57         4.58    NaN      NaN  7851800.0
<class 'pandas.core

HBox(children=(FloatProgress(value=0.0, max=254.0), HTML(value='')))


            previous_1d     EMA_7    willr_7      OBV_7  labels
Date                                                           
2020-01-10         4.01  4.335714 -82.558099  2942700.0     1.0
2020-01-13         3.93  4.234286 -91.860444  1260600.0     2.0
2020-01-14         3.95  4.163214 -71.962613  3027500.0     2.0
2020-01-15         3.99  4.119911 -68.224298  3952200.0     2.0
2020-01-16         4.38  4.184933 -27.722753  9059200.0     2.0
         Date      Close
0  2020-01-02  31.900000
1  2020-01-03  31.799999
2  2020-01-06  31.350000
3  2020-01-07  31.549999
4  2020-01-08  31.700001
                Close  previous_1d  EMA_7  willr_7      OBV_7
Date                                                         
2020-01-02  31.900000          NaN    NaN      NaN        NaN
2020-01-03  31.799999    31.900000    NaN      NaN  4161362.0
2020-01-06  31.350000    31.799999    NaN      NaN -2894111.0
2020-01-07  31.549999    31.350000    NaN      NaN -6208996.0
2020-01-08  31.700001    31.5

HBox(children=(FloatProgress(value=0.0, max=248.0), HTML(value='')))


            previous_1d      EMA_7    willr_7       OBV_7  labels
Date                                                             
2020-01-13    32.400002  31.900000 -28.571351  10062260.0     2.0
2020-01-14    32.599998  32.075000 -19.047688  18475032.0     2.0
2020-01-15    32.500000  32.181250 -23.809519  11486943.0     2.0
2020-01-16    32.400002  32.235938 -28.571351   7954600.0     0.0
2020-01-17    32.700001  32.351953 -14.285675  15390949.0     2.0
         Date      Close
0  2020-01-02  59.750000
1  2020-01-03  58.259998
2  2020-01-06  57.200001
3  2020-01-07  58.049999
4  2020-01-08  58.279999
                Close  previous_1d  EMA_7  willr_7       OBV_7
Date                                                          
2020-01-02  59.750000          NaN    NaN      NaN         NaN
2020-01-03  58.259998    59.750000    NaN      NaN  34622557.0
2020-01-06  57.200001    58.259998    NaN      NaN   7618302.0
2020-01-07  58.049999    57.200001    NaN      NaN -21224231.0
2020-01-0

HBox(children=(FloatProgress(value=0.0, max=243.0), HTML(value='')))


            previous_1d      EMA_7    willr_7        OBV_7  labels
Date                                                              
2020-01-13    59.990002  58.711429 -11.135853   84329331.0     2.0
2020-01-14    60.110001  59.061072 -11.802559  112280498.0     2.0
2020-01-15    59.880001  59.265804 -19.166632   96030463.0     2.0
2020-01-16    59.500000  59.324353 -36.111105   74540490.0     2.0
2020-01-17    59.660000  59.408265 -33.236131   94521888.0     0.0


## Normalize indicators

In [7]:
import pandas as pd
from sklearn import preprocessing

In [8]:
# normalize all data columns 

with open('cyclic_tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/cyclic_tickers/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    
    df.set_index('Date', inplace=True)
    df_target = pd.DataFrame(df['labels'])
    df.drop(['labels'], axis='columns', inplace=True)
    x = df.values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled, columns = df.columns, index=df.index)
    df_normalized = pd.concat([df,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/cyclic_tickers/normalized/{}_data_test.csv'.format(ticker))

            previous_1d     EMA_7   willr_7     OBV_7  labels
Date                                                         
2020-01-13     0.982578  0.994988  0.413793  0.986813     2.0
2020-01-14     0.982578  0.993136  0.413793  0.986813     2.0
2020-01-15     0.993031  0.994429  0.947367  0.989625     2.0
2020-01-16     0.996516  0.996293  0.833333  0.993438     2.0
2020-01-17     0.998258  0.998138  0.807692  0.996138     0.0
            previous_1d     EMA_7   willr_7     OBV_7  labels
Date                                                         
2020-01-10     0.977246  1.000000  0.337672  0.886684     2.0
2020-01-13     0.954325  0.989647  0.083137  0.854835     2.0
2020-01-14     0.973231  0.987405  0.446219  0.893448     0.0
2020-01-15     1.000000  0.993541  0.629525  0.971285     2.0
2020-01-16     0.965200  0.987980  0.217613  0.920364     2.0
            previous_1d     EMA_7   willr_7     OBV_7  labels
Date                                                         
2020-01-

In [9]:
# normalize all data columns 

with open('noncyclic_tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/noncyclic_tickers/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    
    df.set_index('Date', inplace=True)
    df_target = pd.DataFrame(df['labels'])
    df.drop(['labels'], axis='columns', inplace=True)
    x = df.values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled, columns = df.columns, index=df.index)
    df_normalized = pd.concat([df,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/noncyclic_tickers/normalized/{}_data_test.csv'.format(ticker))

            previous_1d     EMA_7   willr_7     OBV_7  labels
Date                                                         
2020-01-13     0.133984  0.069355  0.581275  0.294543     2.0
2020-01-14     0.147520  0.077688  1.000000  0.329875     2.0
2020-01-15     0.128118  0.078632  0.703879  0.297636     2.0
2020-01-16     0.133558  0.080828  0.746396  0.321513     2.0
2020-01-17     0.127658  0.080861  0.522003  0.300239     2.0
            previous_1d     EMA_7   willr_7     OBV_7  labels
Date                                                         
2020-01-10     0.174857  0.055570  0.810196  0.710431     2.0
2020-01-13     0.155379  0.065091  0.645837  0.682504     2.0
2020-01-14     0.178878  0.078904  0.844121  0.707042     2.0
2020-01-15     0.177806  0.088959  0.835075  0.686901     2.0
2020-01-16     0.187366  0.099215  0.911272  0.709031     2.0
            previous_1d     EMA_7   willr_7     OBV_7  labels
Date                                                         
2020-01-

## Drop one indicator data

In [16]:
# drop last 15 days closing price

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_target = pd.DataFrame(df['labels'])
    df.drop(['labels','previous_1d','previous_2d','previous_3d','previous_4d','previous_5d','previous_6d','previous_7d','previous_8d','previous_9d','previous_10d','previous_11d','previous_12d','previous_13d','previous_14d','previous_15d'], axis='columns', inplace=True)
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))

    df_padding_left.set_index('Date', inplace=True)

    df_cnn = pd.concat([df,df_padding_left,df_target],axis=1)
    print(df_cnn.head())
    
    df_cnn.to_csv('data/indicator_drop/close_price/{}_data_test.csv'.format(ticker))

                WMA_6      WMA_7      WMA_8      WMA_9     WMA_10     WMA_11  \
Date                                                                           
2020-03-25  17.738095  17.776071  17.841944  17.935333  18.052363  18.184242   
2020-03-26  18.208095  18.158571  18.141389  18.157555  18.205272  18.280303   
2020-03-27  18.507619  18.421071  18.358888  18.325111  18.321636  18.347727   
2020-03-30  18.779523  18.665714  18.580833  18.515111  18.473272  18.458030   
2020-03-31  18.728571  18.659642  18.584444  18.524666  18.475999  18.444393   

               WMA_12     WMA_13     WMA_14     WMA_15  ...  padding_left7  \
Date                                                    ...                  
2020-03-25  18.315256  18.461758  18.619524  18.783250  ...            0.0   
2020-03-26  18.374359  18.473077  18.589524  18.719583  ...            0.0   
2020-03-27  18.400256  18.472307  18.551333  18.648333  ...            0.0   
2020-03-30  18.469615  18.505934  18.561333  18.6

                WMA_6      WMA_7      WMA_8      WMA_9     WMA_10     WMA_11  \
Date                                                                           
2020-03-25  27.452857  27.490000  27.606111  27.669333  27.795455  27.991818   
2020-03-26  27.791429  27.832143  27.818889  27.878889  27.905818  27.991212   
2020-03-27  28.712857  28.568571  28.513889  28.435111  28.428182  28.404848   
2020-03-30  29.324762  29.104643  28.948889  28.867111  28.770546  28.736818   
2020-03-31  29.739048  29.473571  29.285833  29.143111  29.058546  28.962121   

               WMA_12     WMA_13     WMA_14     WMA_15  ...  padding_left7  \
Date                                                    ...                  
2020-03-25  28.187821  28.427802  28.705810  29.022417  ...            0.0   
2020-03-26  28.142308  28.299561  28.500095  28.738833  ...            0.0   
2020-03-27  28.438718  28.536264  28.646286  28.800083  ...            0.0   
2020-03-30  28.693333  28.701758  28.768762  28.8

                 WMA_6       WMA_7       WMA_8       WMA_9      WMA_10  \
Date                                                                     
2020-03-25  280.233807  281.528569  282.941664  283.508442  284.074725   
2020-03-26  279.934287  281.420358  282.295556  283.349333  283.776000   
2020-03-27  286.306193  286.185716  286.646946  286.824445  287.274910   
2020-03-30  290.040003  288.917145  288.533335  288.667556  288.629092   
2020-03-31  298.895242  296.562503  294.964447  294.052669  293.660729   

                WMA_11      WMA_12      WMA_13      WMA_14      WMA_15  ...  \
Date                                                                    ...   
2020-03-25  284.787422  285.448972  286.285932  287.206856  288.248916  ...   
2020-03-26  284.225606  284.817051  285.381978  286.111809  286.928500  ...   
2020-03-27  287.303334  287.412436  287.691758  287.989714  288.465334  ...   
2020-03-30  288.854091  288.756667  288.746374  288.899524  289.084750  ...   
2020-03

               WMA_6     WMA_7     WMA_8     WMA_9    WMA_10    WMA_11  \
Date                                                                     
2020-03-25  6.962381  6.947857  6.946389  7.002222  7.063636  7.084848   
2020-03-26  6.861428  6.891786  6.888333  6.893111  6.943636  6.999697   
2020-03-27  6.926190  6.933571  6.949167  6.940667  6.939818  6.978030   
2020-03-30  7.339048  7.274643  7.241667  7.223333  7.191454  7.169848   
2020-03-31  7.579048  7.509286  7.440278  7.397333  7.368182  7.329545   

              WMA_12    WMA_13    WMA_14    WMA_15  ...  padding_left7  \
Date                                                ...                  
2020-03-25  7.135000  7.247802  7.355905  7.443500  ...            0.0   
2020-03-26  7.022564  7.070000  7.172095  7.271417  ...            0.0   
2020-03-27  7.022820  7.040769  7.080667  7.169333  ...            0.0   
2020-03-30  7.184487  7.208132  7.211333  7.235583  ...            0.0   
2020-03-31  7.300641  7.303846  7.316

In [17]:
# drop last 15 days cci

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:120]
    df_second = df.iloc[:, 135:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))

    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    df_normalized.to_csv('data/indicator_drop/cci/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25   276.190002   269.190002   278.769989   288.019989   298.679993   
2020-03-26   284.980011   276.190002   269.190002   278.769989   288.019989   
2020-03-27   304.940002   284.980011   276.190002   269.190002   278.769989   
2020-03-30   296.750000   304.940002   284.980011   276.190002   269.190002   
2020-03-31   316.130005   296.750000   304.940002   284.980011   276.190002   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25   305.320007   281.720001   305.209991   268.880005    294.250000   
2020-03-26   298.679993   305.320007   281.720001   305.209991    268.880005   
2020-03-27   288.019989   298.679993   305.320007   281.720001    305.209991   
2020-03-30   278.769989   288.019989   298.679

In [18]:
# drop last 15 days cmo

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:135]
    df_second = df.iloc[:, 150:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))

    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/cmo/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    85.930000    81.129997    80.349998    83.500000    82.500000   
2020-03-26    84.470001    85.930000    81.129997    80.349998    83.500000   
2020-03-27    86.139999    84.470001    85.930000    81.129997    80.349998   
2020-03-30    84.010002    86.139999    84.470001    85.930000    81.129997   
2020-03-31    87.260002    84.010002    86.139999    84.470001    85.930000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    82.360001    75.559998    80.440002    76.709999     83.769997   
2020-03-26    82.500000    82.360001    75.559998    80.440002     76.709999   
2020-03-27    83.500000    82.500000    82.360001    75.559998     80.440002   
2020-03-30    80.349998    83.500000    82.500

In [19]:
with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:180]
    df_second = df.iloc[:, 195:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/dmi/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    85.930000    81.129997    80.349998    83.500000    82.500000   
2020-03-26    84.470001    85.930000    81.129997    80.349998    83.500000   
2020-03-27    86.139999    84.470001    85.930000    81.129997    80.349998   
2020-03-30    84.010002    86.139999    84.470001    85.930000    81.129997   
2020-03-31    87.260002    84.010002    86.139999    84.470001    85.930000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    82.360001    75.559998    80.440002    76.709999     83.769997   
2020-03-26    82.500000    82.360001    75.559998    80.440002     76.709999   
2020-03-27    83.500000    82.500000    82.360001    75.559998     80.440002   
2020-03-30    80.349998    83.500000    82.500

In [20]:
# drop last 15 days EMA

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:30]
    df_second = df.iloc[:, 45:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/ema/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    28.920000    25.250000    26.500000    28.290001    28.120001   
2020-03-26    28.969999    28.920000    25.250000    26.500000    28.290001   
2020-03-27    30.900000    28.969999    28.920000    25.250000    26.500000   
2020-03-30    30.280001    30.900000    28.969999    28.920000    25.250000   
2020-03-31    29.920000    30.280001    30.900000    28.969999    28.920000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    29.629999    26.500000    30.889999    27.200001     32.330002   
2020-03-26    28.120001    29.629999    26.500000    30.889999     27.200001   
2020-03-27    28.290001    28.120001    29.629999    26.500000     30.889999   
2020-03-30    26.500000    28.290001    28.120

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25   276.190002   269.190002   278.769989   288.019989   298.679993   
2020-03-26   284.980011   276.190002   269.190002   278.769989   288.019989   
2020-03-27   304.940002   284.980011   276.190002   269.190002   278.769989   
2020-03-30   296.750000   304.940002   284.980011   276.190002   269.190002   
2020-03-31   316.130005   296.750000   304.940002   284.980011   276.190002   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25   305.320007   281.720001   305.209991   268.880005    294.250000   
2020-03-26   298.679993   305.320007   281.720001   305.209991    268.880005   
2020-03-27   288.019989   298.679993   305.320007   281.720001    305.209991   
2020-03-30   278.769989   288.019989   298.679

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25         7.09         6.62         7.22         6.49         7.44   
2020-03-26         6.68         7.09         6.62         7.22         6.49   
2020-03-27         7.15         6.68         7.09         6.62         7.22   
2020-03-30         8.32         7.15         6.68         7.09         6.62   
2020-03-31         8.02         8.32         7.15         6.68         7.09   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25         7.34         6.13         7.20         9.50          8.37   
2020-03-26         7.44         7.34         6.13         7.20          9.50   
2020-03-27         6.49         7.44         7.34         6.13          7.20   
2020-03-30         7.22         6.49         7

In [21]:
# drop last 15 days KAMA

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:75]
    df_second = df.iloc[:, 90:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/kama/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    85.930000    81.129997    80.349998    83.500000    82.500000   
2020-03-26    84.470001    85.930000    81.129997    80.349998    83.500000   
2020-03-27    86.139999    84.470001    85.930000    81.129997    80.349998   
2020-03-30    84.010002    86.139999    84.470001    85.930000    81.129997   
2020-03-31    87.260002    84.010002    86.139999    84.470001    85.930000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    82.360001    75.559998    80.440002    76.709999     83.769997   
2020-03-26    82.500000    82.360001    75.559998    80.440002     76.709999   
2020-03-27    83.500000    82.500000    82.360001    75.559998     80.440002   
2020-03-30    80.349998    83.500000    82.500

In [23]:
# drop last 15 days Stochastic

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:195]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,30):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/Stochastic/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    21.030001    18.080000    19.670000    21.200001    20.790001   
2020-03-26    21.100000    21.030001    18.080000    19.670000    21.200001   
2020-03-27    22.719999    21.100000    21.030001    18.080000    19.670000   
2020-03-30    21.600000    22.719999    21.100000    21.030001    18.080000   
2020-03-31    22.040001    21.600000    22.719999    21.100000    21.030001   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    21.980000    20.440001    24.160000    20.510000     22.670000   
2020-03-26    20.790001    21.980000    20.440001    24.160000     20.510000   
2020-03-27    21.200001    20.790001    21.980000    20.440001     24.160000   
2020-03-30    19.670000    21.200001    20.790

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25        0.199        0.199        0.199        0.199        0.199   
2020-03-26        0.199        0.199        0.199        0.199        0.199   
2020-03-27        0.199        0.199        0.199        0.199        0.199   
2020-03-30        0.199        0.199        0.199        0.199        0.199   
2020-03-31        0.199        0.199        0.199        0.199        0.199   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25        0.199        0.199        0.199        0.199         0.199   
2020-03-26        0.199        0.199        0.199        0.199         0.199   
2020-03-27        0.199        0.199        0.199        0.199         0.199   
2020-03-30        0.199        0.199        0.

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    85.930000    81.129997    80.349998    83.500000    82.500000   
2020-03-26    84.470001    85.930000    81.129997    80.349998    83.500000   
2020-03-27    86.139999    84.470001    85.930000    81.129997    80.349998   
2020-03-30    84.010002    86.139999    84.470001    85.930000    81.129997   
2020-03-31    87.260002    84.010002    86.139999    84.470001    85.930000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    82.360001    75.559998    80.440002    76.709999     83.769997   
2020-03-26    82.500000    82.360001    75.559998    80.440002     76.709999   
2020-03-27    83.500000    82.500000    82.360001    75.559998     80.440002   
2020-03-30    80.349998    83.500000    82.500

In [24]:
# drop last 15 days MOM

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:165]
    df_second = df.iloc[:, 180:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/mom/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25   276.190002   269.190002   278.769989   288.019989   298.679993   
2020-03-26   284.980011   276.190002   269.190002   278.769989   288.019989   
2020-03-27   304.940002   284.980011   276.190002   269.190002   278.769989   
2020-03-30   296.750000   304.940002   284.980011   276.190002   269.190002   
2020-03-31   316.130005   296.750000   304.940002   284.980011   276.190002   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25   305.320007   281.720001   305.209991   268.880005    294.250000   
2020-03-26   298.679993   305.320007   281.720001   305.209991    268.880005   
2020-03-27   288.019989   298.679993   305.320007   281.720001    305.209991   
2020-03-30   278.769989   288.019989   298.679

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25         7.09         6.62         7.22         6.49         7.44   
2020-03-26         6.68         7.09         6.62         7.22         6.49   
2020-03-27         7.15         6.68         7.09         6.62         7.22   
2020-03-30         8.32         7.15         6.68         7.09         6.62   
2020-03-31         8.02         8.32         7.15         6.68         7.09   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25         7.34         6.13         7.20         9.50          8.37   
2020-03-26         7.44         7.34         6.13         7.20          9.50   
2020-03-27         6.49         7.44         7.34         6.13          7.20   
2020-03-30         7.22         6.49         7

In [25]:
# drop last 15 days ROC

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:150]
    df_second = df.iloc[:, 165:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/roc/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    85.930000    81.129997    80.349998    83.500000    82.500000   
2020-03-26    84.470001    85.930000    81.129997    80.349998    83.500000   
2020-03-27    86.139999    84.470001    85.930000    81.129997    80.349998   
2020-03-30    84.010002    86.139999    84.470001    85.930000    81.129997   
2020-03-31    87.260002    84.010002    86.139999    84.470001    85.930000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    82.360001    75.559998    80.440002    76.709999     83.769997   
2020-03-26    82.500000    82.360001    75.559998    80.440002     76.709999   
2020-03-27    83.500000    82.500000    82.360001    75.559998     80.440002   
2020-03-30    80.349998    83.500000    82.500

In [26]:
# drop last 15 days RSI

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:90]
    df_second = df.iloc[:, 105:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/rsi/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    85.930000    81.129997    80.349998    83.500000    82.500000   
2020-03-26    84.470001    85.930000    81.129997    80.349998    83.500000   
2020-03-27    86.139999    84.470001    85.930000    81.129997    80.349998   
2020-03-30    84.010002    86.139999    84.470001    85.930000    81.129997   
2020-03-31    87.260002    84.010002    86.139999    84.470001    85.930000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    82.360001    75.559998    80.440002    76.709999     83.769997   
2020-03-26    82.500000    82.360001    75.559998    80.440002     76.709999   
2020-03-27    83.500000    82.500000    82.360001    75.559998     80.440002   
2020-03-30    80.349998    83.500000    82.500

In [27]:
# drop last 15 days SMA

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:45]
    df_second = df.iloc[:, 60:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/sma/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25        0.189        0.185        0.199        0.199        0.205   
2020-03-26        0.190        0.189        0.185        0.199        0.199   
2020-03-27        0.197        0.190        0.189        0.185        0.199   
2020-03-30        0.200        0.197        0.190        0.189        0.185   
2020-03-31        0.196        0.200        0.197        0.190        0.189   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25        0.205        0.205        0.245        0.270         0.265   
2020-03-26        0.205        0.205        0.205        0.245         0.270   
2020-03-27        0.199        0.205        0.205        0.205         0.245   
2020-03-30        0.199        0.199        0.

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25        12.77        10.76        10.35        11.58        10.65   
2020-03-26        11.75        12.77        10.76        10.35        11.58   
2020-03-27        12.74        11.75        12.77        10.76        10.35   
2020-03-30        13.05        12.74        11.75        12.77        10.76   
2020-03-31        14.30        13.05        12.74        11.75        12.77   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25         9.50         6.91         8.41         9.29         10.51   
2020-03-26        10.65         9.50         6.91         8.41          9.29   
2020-03-27        11.58        10.65         9.50         6.91          8.41   
2020-03-30        10.35        11.58        10

In [28]:
# drop last 15 days TEMA

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:60]
    df_second = df.iloc[:, 75:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/tema/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    85.930000    81.129997    80.349998    83.500000    82.500000   
2020-03-26    84.470001    85.930000    81.129997    80.349998    83.500000   
2020-03-27    86.139999    84.470001    85.930000    81.129997    80.349998   
2020-03-30    84.010002    86.139999    84.470001    85.930000    81.129997   
2020-03-31    87.260002    84.010002    86.139999    84.470001    85.930000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    82.360001    75.559998    80.440002    76.709999     83.769997   
2020-03-26    82.500000    82.360001    75.559998    80.440002     76.709999   
2020-03-27    83.500000    82.500000    82.360001    75.559998     80.440002   
2020-03-30    80.349998    83.500000    82.500

In [29]:
# drop last 15 days WILLR

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:105]
    df_second = df.iloc[:, 120:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/willr/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25        0.189        0.185        0.199        0.199        0.205   
2020-03-26        0.190        0.189        0.185        0.199        0.199   
2020-03-27        0.197        0.190        0.189        0.185        0.199   
2020-03-30        0.200        0.197        0.190        0.189        0.185   
2020-03-31        0.196        0.200        0.197        0.190        0.189   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25        0.205        0.205        0.245        0.270         0.265   
2020-03-26        0.205        0.205        0.205        0.245         0.270   
2020-03-27        0.199        0.205        0.205        0.205         0.245   
2020-03-30        0.199        0.199        0.

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25         7.09         6.62         7.22         6.49         7.44   
2020-03-26         6.68         7.09         6.62         7.22         6.49   
2020-03-27         7.15         6.68         7.09         6.62         7.22   
2020-03-30         8.32         7.15         6.68         7.09         6.62   
2020-03-31         8.02         8.32         7.15         6.68         7.09   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25         7.34         6.13         7.20         9.50          8.37   
2020-03-26         7.44         7.34         6.13         7.20          9.50   
2020-03-27         6.49         7.44         7.34         6.13          7.20   
2020-03-30         7.22         6.49         7

In [30]:
# drop last 15 days WMA

with open('tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df_dates = df['Date']
    df.set_index('Date', inplace=True)
    df_first = df.iloc[:, 0:15]
    df_second = df.iloc[:, 30:-1]
    df_target = df.iloc[:, -1:]
    df_padding_left = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,15):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
    df_padding_left.set_index('Date', inplace=True)
    df_normalized = pd.concat([df_first,df_second,df_padding_left,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/indicator_drop/wma/{}_data_test.csv'.format(ticker))

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40        18

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    40.660000    35.389999    38.060001    39.639999    36.430000   
2020-03-26    41.860001    40.660000    35.389999    38.060001    39.639999   
2020-03-27    46.020000    41.860001    40.660000    35.389999    38.060001   
2020-03-30    43.799999    46.020000    41.860001    40.660000    35.389999   
2020-03-31    44.080002    43.799999    46.020000    41.860001    40.660000   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    40.250000    41.189999    51.040001    43.259998     50.790001   
2020-03-26    36.430000    40.250000    41.189999    51.040001     43.259998   
2020-03-27    39.639999    36.430000    40.250000    41.189999     51.040001   
2020-03-30    38.060001    39.639999    36.430

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25   276.190002   269.190002   278.769989   288.019989   298.679993   
2020-03-26   284.980011   276.190002   269.190002   278.769989   288.019989   
2020-03-27   304.940002   284.980011   276.190002   269.190002   278.769989   
2020-03-30   296.750000   304.940002   284.980011   276.190002   269.190002   
2020-03-31   316.130005   296.750000   304.940002   284.980011   276.190002   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25   305.320007   281.720001   305.209991   268.880005    294.250000   
2020-03-26   298.679993   305.320007   281.720001   305.209991    268.880005   
2020-03-27   288.019989   298.679993   305.320007   281.720001    305.209991   
2020-03-30   278.769989   288.019989   298.679

            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25         7.09         6.62         7.22         6.49         7.44   
2020-03-26         6.68         7.09         6.62         7.22         6.49   
2020-03-27         7.15         6.68         7.09         6.62         7.22   
2020-03-30         8.32         7.15         6.68         7.09         6.62   
2020-03-31         8.02         8.32         7.15         6.68         7.09   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25         7.34         6.13         7.20         9.50          8.37   
2020-03-26         7.44         7.34         6.13         7.20          9.50   
2020-03-27         6.49         7.44         7.34         6.13          7.20   
2020-03-30         7.22         6.49         7

## joining sentiment data to indicator data

In [22]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import requests
import lxml
import datetime as dt
import os

In [29]:
df = pd.read_csv('data/initial_indicators/C_data.csv')
df.set_index('Date', inplace=True)
df.head()

Unnamed: 0_level_0,Close,previous_1d,previous_2d,previous_3d,previous_4d,previous_5d,previous_6d,previous_7d,previous_8d,previous_9d,...,slowd_16,slowk_17,slowd_17,slowk_18,slowd_18,slowk_19,slowd_19,slowk_20,slowd_20,labels
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-03-23,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,374.53125,367.5,...,60.995122,62.01525,58.105881,59.690402,54.95012,56.463216,51.705643,53.240875,48.840942,2.0
2000-03-24,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,374.53125,...,64.148001,64.734516,61.766893,62.323149,59.033825,60.242711,56.10764,57.293077,53.120286,2.0
2000-03-27,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,...,66.452626,67.264608,64.671458,64.826349,62.279967,62.556955,59.754294,60.777305,57.103752,2.0
2000-03-28,457.5,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,...,68.239428,68.496556,66.831893,67.624594,64.924697,65.265789,62.688485,63.29635,60.455577,2.0
2000-03-29,455.625,457.5,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,...,70.218893,70.43931,68.733492,69.262212,67.237718,67.941864,65.254869,65.784272,63.285975,2.0


In [30]:
df_sentiments = pd.read_csv('sentiments/C_sentiments.csv')
df_sentiments['Date Published'] = pd.to_datetime(df_sentiments['Date Published'])
df_sentiments.head()

Unnamed: 0.1,Unnamed: 0,Company,Date Published,headline,neg,neu,pos,compound
0,0,C,2007-12-04,"Citigroup to slash 17,000 jobs, cut $4.6 bln c...",0.375,0.625,0.0,-0.4939
1,1,C,2007-02-07,Citigroup buys Automated Trading Desk,0.0,1.0,0.0,0.0
2,2,C,2007-12-10,Citigroup shakes up management,0.0,1.0,0.0,0.0
3,3,C,2007-05-11,Citigroup may face $11 billion writeoff,0.0,1.0,0.0,0.0
4,4,C,2007-05-11,TIMELINE: Citigroup CEO resigns amid turmoil,0.545,0.455,0.0,-0.5859


In [31]:
df_sentiments.set_index('Date Published', inplace=True)
df_sentiments.drop(['Unnamed: 0','Company','pos','neg','neu'], 1, inplace=True)
df_sentiments.head()

Unnamed: 0_level_0,headline,compound
Date Published,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-12-04,"Citigroup to slash 17,000 jobs, cut $4.6 bln c...",-0.4939
2007-02-07,Citigroup buys Automated Trading Desk,0.0
2007-12-10,Citigroup shakes up management,0.0
2007-05-11,Citigroup may face $11 billion writeoff,0.0
2007-05-11,TIMELINE: Citigroup CEO resigns amid turmoil,-0.5859


In [32]:
df = pd.merge(left=df, left_index=True,right=df_sentiments, right_index=True,how='left')
df.head()

Unnamed: 0,Close,previous_1d,previous_2d,previous_3d,previous_4d,previous_5d,previous_6d,previous_7d,previous_8d,previous_9d,...,slowd_17,slowk_18,slowd_18,slowk_19,slowd_19,slowk_20,slowd_20,labels,headline,compound
2000-03-23,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,374.53125,367.5,...,58.105881,59.690402,54.95012,56.463216,51.705643,53.240875,48.840942,2.0,,
2000-03-24,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,374.53125,...,61.766893,62.323149,59.033825,60.242711,56.10764,57.293077,53.120286,2.0,,
2000-03-27,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,...,64.671458,64.826349,62.279967,62.556955,59.754294,60.777305,57.103752,2.0,,
2000-03-28,457.5,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,...,66.831893,67.624594,64.924697,65.265789,62.688485,63.29635,60.455577,2.0,,
2000-03-29,455.625,457.5,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,...,68.733492,69.262212,67.237718,67.941864,65.254869,65.784272,63.285975,2.0,,


In [33]:
df.drop(['headline'], 1, inplace=True)
df.fillna(0,inplace=True)
df.head()

Unnamed: 0,Close,previous_1d,previous_2d,previous_3d,previous_4d,previous_5d,previous_6d,previous_7d,previous_8d,previous_9d,...,slowk_17,slowd_17,slowk_18,slowd_18,slowk_19,slowd_19,slowk_20,slowd_20,labels,compound
2000-03-23,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,374.53125,367.5,...,62.01525,58.105881,59.690402,54.95012,56.463216,51.705643,53.240875,48.840942,2.0,0.0
2000-03-24,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,374.53125,...,64.734516,61.766893,62.323149,59.033825,60.242711,56.10764,57.293077,53.120286,2.0,0.0
2000-03-27,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,373.125,...,67.264608,64.671458,64.826349,62.279967,62.556955,59.754294,60.777305,57.103752,2.0,0.0
2000-03-28,457.5,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,391.875,...,68.496556,66.831893,67.624594,64.924697,65.265789,62.688485,63.29635,60.455577,2.0,0.0
2000-03-29,455.625,457.5,450.46875,454.21875,462.65625,440.625,435.46875,421.40625,427.5,420.9375,...,70.43931,68.733492,69.262212,67.237718,67.941864,65.254869,65.784272,63.285975,2.0,0.0


In [51]:
df_sentiments = pd.read_csv('sentiments/Singapore Index_news_sentiments.csv')
df_sentiments['Date Published'] = pd.to_datetime(df_sentiments['Date Published'])
df_sentiments.set_index('Date Published', inplace=True)
df_sentiments.drop(['Unnamed: 0','Unnamed: 0.1','Company','pos','neg','neu'], 1, inplace=True)
df_sentiments.head()

Unnamed: 0_level_0,headline,compound
Date Published,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-01-31,SPH tweaks Singapore's benchmark stocks index,0.0
2007-06-09,UPDATE 3-Index glitch disrupts Singapore stock...,0.0
2008-10-01,New Singapore stock index hitch frustrates tra...,-0.4404
2008-10-01,New Singapore stock index hitch frustrates tra...,-0.4404
2008-09-16,Singapore STI falls 3 pct after Lehman collapse,-0.9531


In [52]:
df = pd.merge(left=df, left_index=True,right=df_sentiments, right_index=True,how='left')
df.drop(['headline'], 1, inplace=True)
df.head()

Unnamed: 0,Close,previous_1d,previous_2d,previous_3d,previous_4d,previous_5d,previous_6d,previous_7d,previous_8d,previous_9d,...,slowd_17,slowk_18,slowd_18,slowk_19,slowd_19,slowk_20,slowd_20,labels,compound_x,compound_y
2000-03-22,22.200001,21.9,21.0,21.200001,20.0,20.0,20.6,20.5,21.0,20.700001,...,51.257006,51.997024,49.405892,50.236223,47.405372,48.038522,45.200275,2.0,,
2000-03-23,22.200001,22.200001,21.9,21.0,21.200001,20.0,20.0,20.6,20.5,21.0,...,53.782919,54.483615,52.108135,52.407232,50.000949,50.771512,48.199902,2.0,,
2000-03-24,21.5,22.200001,22.200001,21.9,21.0,21.200001,20.0,20.0,20.6,20.5,...,55.487752,55.184765,53.888468,53.492532,52.045329,51.627074,50.145703,2.0,,
2000-03-27,22.799999,21.5,22.200001,22.200001,21.9,21.0,21.200001,20.0,20.0,20.6,...,57.46156,57.783331,55.817237,56.164858,54.021541,54.67333,52.357305,2.0,,
2000-03-28,22.9,22.799999,21.5,22.200001,22.200001,21.9,21.0,21.200001,20.0,20.0,...,60.13305,61.211228,58.059774,58.796437,56.151276,57.373331,54.557912,2.0,,


In [53]:
df['sentiment'] = df['compound_x'] + 0.5*df['compound_y']

In [54]:
df.drop(['compound_x','compound_y'], 1, inplace=True)
df.dropna(inplace=True)
df.head()

Unnamed: 0,Close,previous_1d,previous_2d,previous_3d,previous_4d,previous_5d,previous_6d,previous_7d,previous_8d,previous_9d,...,slowk_17,slowd_17,slowk_18,slowd_18,slowk_19,slowd_19,slowk_20,slowd_20,labels,sentiment
2012-03-16,14.31,14.37,14.46,14.33,14.25,14.25,14.3,13.88,14.0,14.19,...,79.255594,80.613418,80.849047,82.245279,82.484192,83.701458,83.966121,84.909956,2.0,0.0
2012-05-24,13.22,13.13,13.25,13.19,13.27,13.54,13.58,13.77,13.68,13.87,...,43.643216,48.417608,46.74027,50.221002,49.035348,51.063948,49.483105,51.238894,2.0,-0.6908
2012-05-24,13.22,13.13,13.25,13.19,13.27,13.54,13.58,13.77,13.68,13.87,...,43.643216,48.417608,46.74027,50.221002,49.035348,51.063948,49.483105,51.238894,2.0,-0.8388
2012-08-23,14.65,14.58,14.72,14.73,14.66,14.62,14.98,14.88,14.7,14.75,...,73.20788,76.334369,75.447192,77.978173,77.358684,79.477703,78.395996,80.351599,2.0,0.0
2012-12-10,14.8,14.79,14.61,14.57,14.46,14.38,14.45,14.25,14.2,14.17,...,66.156785,61.69513,62.588248,60.248974,61.283173,59.910476,61.528596,59.194054,2.0,0.5509


In [34]:
df.to_csv('data/sentiments/C_sentiments.csv')

## Company sentiments

In [34]:
df_sentiments = pd.read_csv('tweets_sentiments/D05.SI_sentiments_scores.csv'.format(ticker))
df_sentiments['date'] = pd.to_datetime(df_sentiments['date'])
df_sentiments.set_index('date', inplace=True)
df_sentiments.drop(['pos','neg','neu'], 1, inplace=True)
df_sentiments.rename(columns = {'compound': 'company_sentiment_score'}, inplace = True)
df_sentiments.head()

Unnamed: 0_level_0,company_sentiment_score
date,Unnamed: 1_level_1
2015-01-09,0.9822
2015-01-14,0.0
2015-01-29,0.9186
2015-02-02,0.9186
2015-02-03,0.0


In [39]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import requests
import lxml
import datetime as dt
import os

with open('sg_tickers_sentiment.txt') as f:
    
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    print(ticker)
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df.set_index('Date', inplace=True)
    
    df_sentiments = pd.read_csv('tweets_sentiments/{}_sentiments_scores_test.csv'.format(ticker))
    df_sentiments['date'] = pd.to_datetime(df_sentiments['date'])
    df_sentiments.set_index('date', inplace=True)
    df_sentiments.drop(['pos','neg','neu'], 1, inplace=True)
    df_sentiments.rename(columns = {'compound': 'company_sentiment_score'}, inplace = True)

    df = pd.merge(left=df, left_index=True,right=df_sentiments, right_index=True,how='left')
    
    df.fillna(0, inplace=True)
    print(df.head())
    
    df.to_csv('data/sentiments/{}_sentiments_test.csv'.format(ticker),index_label='Date')

D05.SI
            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40   

In [41]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import requests
import lxml
import datetime as dt
import os

with open('sg_tickers_sentiment.txt') as f:
    
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    print(ticker)
    df = pd.read_csv('data/sentiments/{}_sentiments_test.csv'.format(ticker))
    df  = df.dropna()
    
    df_dates = df['Date']
    df_sentiments = df['company_sentiment_score']
    df_labels = df['labels']
    df.set_index('Date', inplace=True)
    
    df.drop(['company_sentiment_score','labels'], 1, inplace=True)

    df_padding_left = pd.DataFrame()
    df_padding_right = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    df_padding_right['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,7):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
        df_padding_right['padding_right{}'.format(i+1)] = np.zeros(len(df_padding_right))

    df_padding_left['company_sentiment_score'] = df_sentiments
    df_padding_right['labels'] = df_labels
    df_padding_left.set_index('Date', inplace=True)
    df_padding_right.set_index('Date', inplace=True)

    df_cnn = pd.concat([df,df_padding_left,df_padding_right],axis=1)
    
    df_temp = df_cnn.iloc[:, 0:15]
    df_temp['padding_0'] = np.zeros(len(df_temp))
    
    for i in range(1,16):
        df_temp = pd.concat([df_temp,df_cnn.iloc[:,i*15:(i+1)*15]],axis=1)
        df_temp['padding_{}'.format(i)] = np.zeros(len(df_temp))
        
    df_temp['labels'] = df_cnn['labels']

    df.dropna(inplace=True)
    print(df_temp.head())
    
    df_temp.to_csv('data/sentiments/{}_sentiments_test_padded.csv'.format(ticker),index_label='Date')

D05.SI
            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40   

## Singapore exhange sentiments

In [42]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import requests
import lxml
import datetime as dt
import os

with open('sg_tickers_sentiment.txt') as f:
    
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    print(ticker)
    df = pd.read_csv('data/initial_indicators/{}_data_test.csv'.format(ticker))
    df  = df.dropna()
    df.set_index('Date', inplace=True)
    
    df_sentiments = pd.read_csv('tweets_sentiments/SG_sentiments_test_sentiments_scores_test.csv')
    df_sentiments['date'] = pd.to_datetime(df_sentiments['date'])
    df_sentiments.set_index('date', inplace=True)
    df_sentiments.drop(['pos','neg','neu'], 1, inplace=True)
    df_sentiments.rename(columns = {'compound': 'company_sentiment_score'}, inplace = True)

    df = pd.merge(left=df, left_index=True,right=df_sentiments, right_index=True,how='left')
    
    df.fillna(0,inplace=True)
    print(df.head())
    
    df.to_csv('data/market_sentiments/{}_sentiments_test.csv'.format(ticker),index_label='Date')

D05.SI
            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40   

In [43]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import requests
import lxml
import datetime as dt
import os

with open('sg_tickers_sentiment.txt') as f:
    
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    print(ticker)
    df = pd.read_csv('data/market_sentiments/{}_sentiments_test.csv'.format(ticker))
    df  = df.dropna()
    
    df_dates = df['Date']
    df_sentiments = df['company_sentiment_score']
    df_labels = df['labels']
    df.set_index('Date', inplace=True)
    
    df.drop(['company_sentiment_score','labels'], 1, inplace=True)

    df_padding_left = pd.DataFrame()
    df_padding_right = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    df_padding_right['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,7):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
        df_padding_right['padding_right{}'.format(i+1)] = np.zeros(len(df_padding_right))

    df_padding_left['company_sentiment_score'] = df_sentiments
    df_padding_right['labels'] = df_labels
    df_padding_left.set_index('Date', inplace=True)
    df_padding_right.set_index('Date', inplace=True)

    df_cnn = pd.concat([df,df_padding_left,df_padding_right],axis=1)
    
    df_temp = df_cnn.iloc[:, 0:15]
    df_temp['padding_0'] = np.zeros(len(df_temp))
    
    for i in range(1,16):
        df_temp = pd.concat([df_temp,df_cnn.iloc[:,i*15:(i+1)*15]],axis=1)
        df_temp['padding_{}'.format(i)] = np.zeros(len(df_temp))
        
    df_temp['labels'] = df_cnn['labels']

    df.dropna(inplace=True)
    print(df_temp.head())
    
    df_temp.to_csv('data/market_sentiments/{}_sentiments_test_padded.csv'.format(ticker),index_label='Date')

D05.SI
            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25    18.209999    16.879999    18.160000    17.400000    18.000000   
2020-03-26    19.420000    18.209999    16.879999    18.160000    17.400000   
2020-03-27    19.059999    19.420000    18.209999    16.879999    18.160000   
2020-03-30    19.139999    19.059999    19.420000    18.209999    16.879999   
2020-03-31    18.299999    19.139999    19.059999    19.420000    18.209999   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25    18.000000        18.58        19.35    20.200001     21.010000   
2020-03-26    18.000000        18.00        18.58    19.350000     20.200001   
2020-03-27    17.400000        18.00        18.00    18.580000     19.350000   
2020-03-30    18.160000        17.40   

5OT.SI
            previous_1d  previous_2d  previous_3d  previous_4d  previous_5d  \
Date                                                                          
2020-03-25        0.189        0.185        0.199        0.199        0.205   
2020-03-26        0.190        0.189        0.185        0.199        0.199   
2020-03-27        0.197        0.190        0.189        0.185        0.199   
2020-03-30        0.200        0.197        0.190        0.189        0.185   
2020-03-31        0.196        0.200        0.197        0.190        0.189   

            previous_6d  previous_7d  previous_8d  previous_9d  previous_10d  \
Date                                                                           
2020-03-25        0.205        0.205        0.245        0.270         0.265   
2020-03-26        0.205        0.205        0.205        0.245         0.270   
2020-03-27        0.199        0.205        0.205        0.205         0.245   
2020-03-30        0.199        0.199   

## US exhange sentiments

In [40]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import requests
import lxml
import datetime as dt
import os

with open('us_tickers_sentiment.txt') as f:
    
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    print(ticker)
    df = pd.read_csv('data/initial_indicators/{}_data.csv'.format(ticker))
    df  = df.dropna()
    df.set_index('Date', inplace=True)
    
    df_sentiments = pd.read_csv('sentiments/US stocks_news_sentiments.csv')
    df_sentiments['Date Published'] = pd.to_datetime(df_sentiments['Date Published'])
    df_sentiments.set_index('Date Published', inplace=True)
    df_sentiments.drop(['Unnamed: 0','Unnamed: 0.1','Company','pos','neg','neu'], 1, inplace=True)
    df_sentiments.rename(columns = {'compound': 'company_sentiment_score'}, inplace = True)

    df = pd.merge(left=df, left_index=True,right=df_sentiments, right_index=True,how='left')
    
    df.drop(['headline'], 1, inplace=True)
    df.dropna(inplace=True)
    print(df.head())
    
    df.to_csv('data/market_sentiments/{}_sentiments.csv'.format(ticker),index_label='Date')

JPM
                 Close  previous_1d  previous_2d  previous_3d  previous_4d  \
2017-06-30   91.400002    91.150002    89.820000    88.050003    87.239998   
2017-08-18   90.739998    90.650002    92.089996    92.730003    92.489998   
2019-01-11   99.910004   100.389999   100.400002   100.570000   100.760002   
2019-05-16  111.309998   109.900002   110.320000   109.449997   112.510002   
2019-06-26  108.480003   107.760002   108.660004   109.440002   110.190002   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
2017-06-30    86.860001    87.019997    87.120003    87.519997    88.070000   
2017-08-18    91.419998    92.190002    93.529999    93.680000    94.019997   
2019-01-11   100.690002    97.110001    99.309998    97.620003    96.830002   
2019-05-16   112.519997   112.610001   113.209999   115.089996   116.120003   
2019-06-26   109.910004   110.709999   109.220001   109.820000   109.540001   

            ...   slowk_17   slowd_17   slowk_18   s

                 Close  previous_1d  previous_2d  previous_3d  previous_4d  \
2017-06-30  121.669998   122.430000   125.809998   122.110001   125.089996   
2017-08-18  133.399994   134.630005   136.550003   135.210007   133.960007   
2019-01-11  111.129997   110.930000   109.959999   108.610001   107.940002   
2019-05-16  131.399994   128.410004   127.930000   125.919998   131.690002   
2019-06-26  125.339996   125.150002   126.980003   132.389999   128.830002   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
2017-06-30   126.070000   126.230003   123.580002   119.050003   116.160004   
2017-08-18   132.929993   132.300003   136.850006   136.039993   138.330002   
2019-01-11   106.000000   100.209999    98.050003    97.360001    97.209999   
2019-05-16   131.770004   133.649994   134.520004   136.960007   136.690002   
2019-06-26   123.769997   122.370003   120.139999   117.930000   117.239998   

            ...   slowk_17   slowd_17   slowk_18   slowd

In [41]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import requests
import lxml
import datetime as dt
import os

with open('us_tickers_sentiment.txt') as f:
    
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    print(ticker)
    df = pd.read_csv('data/market_sentiments/{}_sentiments.csv'.format(ticker))
    df  = df.dropna()
    
    df_dates = df['Date']
    df_sentiments = df['company_sentiment_score']
    df_labels = df['labels']
    df.set_index('Date', inplace=True)
    
    df.drop(['company_sentiment_score','labels'], 1, inplace=True)

    df_padding_left = pd.DataFrame()
    df_padding_right = pd.DataFrame()
    df_padding_left['Date'] = df_dates
    df_padding_right['Date'] = df_dates
    #print(df_padding_left.info())
    for i in range(0,7):
        df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
        df_padding_right['padding_right{}'.format(i+1)] = np.zeros(len(df_padding_right))

    df_padding_left['company_sentiment_score'] = df_sentiments
    df_padding_right['labels'] = df_labels
    df_padding_left.set_index('Date', inplace=True)
    df_padding_right.set_index('Date', inplace=True)

    df_cnn = pd.concat([df,df_padding_left,df_padding_right],axis=1)
    
    df_temp = df_cnn.iloc[:, 0:15]
    df_temp['padding_0'] = np.zeros(len(df_temp))
    
    for i in range(1,16):
        df_temp = pd.concat([df_temp,df_cnn.iloc[:,i*15:(i+1)*15]],axis=1)
        df_temp['padding_{}'.format(i)] = np.zeros(len(df_temp))
        
    df_temp['labels'] = df_cnn['labels']

    df.dropna(inplace=True)
    print(df_temp.head())
    
    df_temp.to_csv('data/market_sentiments/{}_sentiments_padded.csv'.format(ticker),index_label='Date')

JPM
                 Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                         
2017-06-30   91.400002    91.150002    89.820000    88.050003    87.239998   
2017-08-18   90.739998    90.650002    92.089996    92.730003    92.489998   
2019-01-11   99.910004   100.389999   100.400002   100.570000   100.760002   
2019-05-16  111.309998   109.900002   110.320000   109.449997   112.510002   
2019-06-26  108.480003   107.760002   108.660004   109.440002   110.190002   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2017-06-30    86.860001    87.019997    87.120003    87.519997    88.070000   
2017-08-18    91.419998    92.190002    93.529999    93.680000    94.019997   
2019-01-11   100.690002    97.110001    99.309998    97.620003    96.830002   
2019-05-16   112.519997   112.610001   113.209999   11

                 Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                         
2017-06-30  271.359985   271.880005   274.640015   271.250000   279.809998   
2017-08-18  281.769989   284.420013   294.079987   287.920013   286.049988   
2019-01-11  333.209991   331.190002   326.630005   324.440002   322.399994   
2019-05-16  230.869995   230.589996   229.179993   225.660004   226.229996   
2019-06-26  238.070007   240.289993   235.910004   239.089996   234.970001   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2017-06-30   280.320007   280.570007   278.600006   266.000000   260.540009   
2017-08-18   283.679993   281.149994   288.739990   289.260010   291.010010   
2019-01-11   318.329987   307.000000   304.690002   300.920013   293.519989   
2019-05-16   228.470001   230.979996   229.020004   236.07

## Using only selected technical indicators

In [1]:
import os
import re
from operator import itemgetter

import pandas as pd
import pickle
import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.utils import compute_class_weight
from tqdm.auto import tqdm

def create_labels(df, col_name, window_size=15):
        """
        Data is labeled as per the logic in research paper
        Label code : BUY => 1, SELL => 0, HOLD => 2
        params :
            df => Dataframe with data
            col_name => name of column which should be used to determine strategy
        returns : numpy array with integer codes for labels with
                  size = total-(window_size)+1
        """
        row_counter = 0
        total_rows = len(df)
        labels = np.zeros(total_rows)
        labels[:] = np.nan
        print("Calculating labels")
        pbar = tqdm(total=total_rows)

        while row_counter < total_rows:
            if row_counter >= window_size - 1:
                window_begin = row_counter - (window_size - 1)
                window_end = row_counter
                window_middle = (window_begin + window_end) / 2
                window_middle = int(window_middle)

                min_ = np.inf
                min_index = -1
                max_ = -np.inf
                max_index = -1
                for i in range(window_begin, window_end + 1):
                    price = df.iloc[i][col_name]
                    if price < min_:
                        min_ = price
                        min_index = i
                    if price > max_:
                        max_ = price
                        max_index = i
                        
                end_price = df.iloc[window_end][col_name]
                mid_price = df.iloc[window_middle][col_name]

                if max_index == window_middle:
                    labels[window_middle] = 0
                elif min_index == window_middle:
                    labels[window_middle] = 1
                else:
                    labels[window_middle] = 2

            row_counter = row_counter + 1
            pbar.update(1)

        pbar.close()
        return labels

In [22]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
from talib import RSI,BBANDS,WILLR,WMA,SMA,EMA,TEMA,KAMA,CCI,CMO,MACD,PPO,ROC,APO,DX,MOM,STOCH,AROON,ADX,SAR,OBV
import matplotlib.pyplot as plt
import requests
import lxml
import datetime as dt
import os

In [38]:
# technical indicators using only past 15 days closing price, past 15 days EMA, past 15 days CCI, past 15 days RSI, and past 15 days Bolinger Bands
# past 15 days OBV, SAR
def compile_data():
    df_dict = {}
    with open('tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        print(ticker)
        df = pd.read_csv('data/initial/{}.csv'.format(ticker))
        df  = df.dropna()
        
        if(ticker == 'MIJ.SI'):
            df['Close'] = df['Close'].astype(float)
            df['Volume'] = df['Volume'].astype(float)
            print(df.info())
            
        high_0 = df['High'].values
        high_1 = df['High'].shift(1).values
        high_2 = df['High'].shift(2).values
        high_3 = df['High'].shift(3).values
        high_4 = df['High'].shift(4).values
        high_5 = df['High'].shift(5).values
        high_6 = df['High'].shift(6).values
        high_7 = df['High'].shift(7).values
        high_8 = df['High'].shift(8).values
        high_9 = df['High'].shift(9).values
        high_10 = df['High'].shift(10).values
        high_11 = df['High'].shift(11).values
        high_12 = df['High'].shift(12).values
        high_13 = df['High'].shift(13).values
        high_14 = df['High'].shift(14).values
        
        low_0 = df['Low'].values
        low_1 = df['Low'].shift(1).values
        low_2 = df['Low'].shift(2).values
        low_3 = df['Low'].shift(3).values
        low_4 = df['Low'].shift(4).values
        low_5 = df['Low'].shift(5).values
        low_6 = df['Low'].shift(6).values
        low_7 = df['Low'].shift(7).values
        low_8 = df['Low'].shift(8).values
        low_9 = df['Low'].shift(9).values
        low_10 = df['Low'].shift(10).values
        low_11 = df['Low'].shift(11).values
        low_12 = df['Low'].shift(12).values
        low_13 = df['Low'].shift(13).values
        low_14 = df['Low'].shift(14).values
        
        close_0 = df['Close'].values
        close_1 = df['Close'].shift(1).values
        close_2 = df['Close'].shift(2).values
        close_3 = df['Close'].shift(3).values
        close_4 = df['Close'].shift(4).values
        close_5 = df['Close'].shift(5).values
        close_6 = df['Close'].shift(6).values
        close_7 = df['Close'].shift(7).values
        close_8 = df['Close'].shift(8).values
        close_9 = df['Close'].shift(9).values
        close_10 = df['Close'].shift(10).values
        close_11 = df['Close'].shift(11).values
        close_12 = df['Close'].shift(12).values
        close_13 = df['Close'].shift(13).values
        close_14 = df['Close'].shift(14).values
        
        volume_0 = df['Volume'].values
        volume_1 = df['Volume'].shift(1).values
        volume_2 = df['Volume'].shift(2).values
        volume_3 = df['Volume'].shift(3).values
        volume_4 = df['Volume'].shift(4).values
        volume_5 = df['Volume'].shift(5).values
        volume_6 = df['Volume'].shift(6).values
        volume_7 = df['Volume'].shift(7).values
        volume_8 = df['Volume'].shift(8).values
        volume_9 = df['Volume'].shift(9).values
        volume_10 = df['Volume'].shift(10).values
        volume_11 = df['Volume'].shift(11).values
        volume_12 = df['Volume'].shift(12).values
        volume_13 = df['Volume'].shift(13).values
        volume_14 = df['Volume'].shift(14).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)
        df['previous_2d'] = df['Close'].shift(2)
        df['previous_3d'] = df['Close'].shift(3)
        df['previous_4d'] = df['Close'].shift(4)
        df['previous_5d'] = df['Close'].shift(5)
        df['previous_6d'] = df['Close'].shift(6)
        df['previous_7d'] = df['Close'].shift(7)
        df['previous_8d'] = df['Close'].shift(8)
        df['previous_9d'] = df['Close'].shift(9)
        df['previous_10d'] = df['Close'].shift(10)
        df['previous_11d'] = df['Close'].shift(11)
        df['previous_12d'] = df['Close'].shift(12)
        df['previous_13d'] = df['Close'].shift(13)
        df['previous_14d'] = df['Close'].shift(14)

        for i in range(6,21):
            ema = EMA(close_0, timeperiod=i)
            arr = np.array(ema)
            df['EMA_{}'.format(i)] = arr

        for i in range(6,21):
            rsi = RSI(close_0, timeperiod=i)
            arr = np.array(rsi)
            df['rsi_{}'.format(i)] = arr

        for i in range(6,21):
            cci = CCI(high_0, low_0, close_0, timeperiod=i)
            arr = np.array(cci)
            df['CCI_{}'.format(i)] = arr
            
        for i in range(6,21):
            upperband, middleband, lowerband = BBANDS(close_0, timeperiod=i, nbdevup=2, nbdevdn=2, matype=0)
            arr_1 = np.array(upperband)
            arr_2 = np.array(middleband)
            arr_3 = np.array(lowerband)
            df['BBAND_upper_{}'.format(i)] = arr_1
            df['BBAND_middle_{}'.format(i)] = arr_2
            df['BBAND_lower_{}'.format(i)] = arr_3
            

        obv = OBV(close_0, volume_0)
        arr_1 = np.array(obv)
        df['OBV_0'] = arr_1
        obv = OBV(close_1, volume_1)
        arr_1 = np.array(obv)
        df['OBV_1'] = arr_1
        obv = OBV(close_2, volume_2)
        arr_1 = np.array(obv)
        df['OBV_2'] = arr_1
        obv = OBV(close_3, volume_3)
        arr_1 = np.array(obv)
        df['OBV_3'] = arr_1
        obv = OBV(close_4, volume_4)
        arr_1 = np.array(obv)
        df['OBV_4'] = arr_1
        obv = OBV(close_5, volume_5)
        arr_1 = np.array(obv)
        df['OBV_5'] = arr_1
        obv = OBV(close_6, volume_6)
        arr_1 = np.array(obv)
        df['OBV_6'] = arr_1
        obv = OBV(close_7, volume_7)
        arr_1 = np.array(obv)
        df['OBV_7'] = arr_1
        obv = OBV(close_8, volume_8)
        arr_1 = np.array(obv)
        df['OBV_8'] = arr_1
        obv = OBV(close_9, volume_9)
        arr_1 = np.array(obv)
        df['OBV_9'] = arr_1
        obv = OBV(close_10, volume_10)
        arr_1 = np.array(obv)
        df['OBV_10'] = arr_1
        obv = OBV(close_11, volume_11) 
        arr_1 = np.array(obv)
        df['OBV_11'] = arr_1
        obv = OBV(close_12, volume_12)
        arr_1 = np.array(obv)
        df['OBV_12'] = arr_1
        obv = OBV(close_13, volume_13)
        arr_1 = np.array(obv)
        df['OBV_13'] = arr_1
        obv = OBV(close_14, volume_14)
        arr_1 = np.array(obv)
        df['OBV_14'] = arr_1
        
        sar = SAR(high_0, low_0, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_0'] = arr_2
        sar = SAR(high_1, low_1, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_1'] = arr_2
        sar = SAR(high_2, low_2, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_2'] = arr_2
        sar = SAR(high_3, low_3, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_3'] = arr_2
        sar = SAR(high_4, low_4, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_4'] = arr_2
        sar = SAR(high_5, low_5, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_5'] = arr_2
        sar = SAR(high_6, low_6, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_6'] = arr_2
        sar = SAR(high_7, low_7, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_7'] = arr_2
        sar = SAR(high_8, low_8, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_8'] = arr_2
        sar = SAR(high_9, low_9, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_9'] = arr_2
        sar = SAR(high_10, low_10, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_10'] = arr_2
        sar = SAR(high_11, low_11, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_11'] = arr_2
        sar = SAR(high_12, low_12, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_12'] = arr_2
        sar = SAR(high_13, low_13, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_13'] = arr_2
        sar = SAR(high_14, low_14, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_14'] = arr_2
        
            
        df_dates = df['Date']
        df.set_index('Date', inplace=True)
        
        df_padding_left = pd.DataFrame()
        df_padding_right = pd.DataFrame()
        df_padding_left['Date'] = df_dates
        df_padding_right['Date'] = df_dates
        #print(df_padding_left.info())
        for i in range(0,45):
            df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
            df_padding_right['padding_right{}'.format(i+1)] = np.zeros(len(df_padding_right))
            
        df_padding_left.set_index('Date', inplace=True)
        df_padding_right.set_index('Date', inplace=True)
        
        df_cnn = pd.concat([df_padding_left,df,df_padding_right],axis=1)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df_cnn['labels'] = labels
        df  = df.dropna()
        df_cnn = df_cnn.dropna()
        
        print(df.head())
        print(df_cnn.head())
        df.to_csv('data/selected_indicators_1/LSTM/{}_data.csv'.format(ticker))
        df_cnn.to_csv('data/selected_indicators_1/CNN/{}_data.csv'.format(ticker))
    
compile_data()

D05.SI
         Date      Close
0  2000-01-03  27.299999
1  2000-01-04  26.299999
2  2000-01-05  24.600000
3  2000-01-06  24.100000
4  2000-01-07  25.000000
                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-03  27.299999          NaN          NaN          NaN          NaN   
2000-01-04  26.299999    27.299999          NaN          NaN          NaN   
2000-01-05  24.600000    26.299999    27.299999          NaN          NaN   
2000-01-06  24.100000    24.600000    26.299999    27.299999          NaN   
2000-01-07  25.000000    24.100000    24.600000    26.299999    27.299999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-03          NaN          NaN          NaN          NaN          NaN   
2000-01-04          NaN          NaN          NaN          NaN    

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  22.799999    22.799999    23.700001    22.400000    21.700001   
2000-02-01  22.900000    22.799999    22.799999    23.700001    22.400000   
2000-02-02  23.700001    22.900000    22.799999    22.799999    23.700001   
2000-02-03  23.799999    23.700001    22.900000    22.799999    22.799999   
2000-02-04  23.500000    23.799999    23.700001    22.900000    22.799999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    21.400000    21.900000    22.700001    22.000000    22.600000   
2000-02-01    21.700001    21.400000    21.900000    22.700001    22.000000   
2000-02-02    22.400000    21.700001    21.400000    21.900000    22.700001   
2000-02-03    23.700001    22.400000    21.700001    21.400000  

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-31  7.03642      7.27906      7.23053      6.89084      6.74526   
2000-02-01  6.79379      7.03642      7.27906      7.23053      6.89084   
2000-02-02  6.89084      6.79379      7.03642      7.27906      7.23053   
2000-02-03  6.69673      6.89084      6.79379      7.03642      7.27906   
2000-02-04  6.79379      6.69673      6.89084      6.79379      7.03642   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      6.79379      6.89084      6.79379      6.69673      6.98790   
2000-02-01      6.74526      6.79379      6.89084      6.79379      6.69673   
2000-02-02      6.89084      6.74526      6.79379      6.89084      6.79379   
2000-02-03      7.23053      6.89084      6.74526      6.79379      6.89084   

HBox(children=(FloatProgress(value=0.0, max=5068.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-31   12.5         13.0         13.5         13.1         12.8   
2000-02-01   12.5         12.5         13.0         13.5         13.1   
2000-02-02   12.9         12.5         12.5         13.0         13.5   
2000-02-03   12.7         12.9         12.5         12.5         13.0   
2000-02-04   12.6         12.7         12.9         12.5         12.5   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31         13.4         14.0         14.2         13.6         14.2   
2000-02-01         12.8         13.4         14.0         14.2         13.6   
2000-02-02         13.1         12.8         13.4         14.0         14.2   
2000-02-03         13.5         13.1         12.8         13.4         14.0   
2000-02-04   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  53.791668    51.125000    53.791668    52.291668    49.750000   
2000-02-01  55.833332    53.791668    51.125000    53.791668    52.291668   
2000-02-02  54.916668    55.833332    53.791668    51.125000    53.791668   
2000-02-03  55.458332    54.916668    55.833332    53.791668    51.125000   
2000-02-04  54.500000    55.458332    54.916668    55.833332    53.791668   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    48.666668    48.625000    49.375000    49.416668    47.333332   
2000-02-01    49.750000    48.666668    48.625000    49.375000    49.416668   
2000-02-02    52.291668    49.750000    48.666668    48.625000    49.375000   
2000-02-03    53.791668    52.291668    49.750000    48.666668  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  24.21875     22.87500     24.06250     23.56250     22.56250   
2000-02-01  24.50000     24.21875     22.87500     24.06250     23.56250   
2000-02-02  23.87500     24.50000     24.21875     22.87500     24.06250   
2000-02-03  23.21875     23.87500     24.50000     24.21875     22.87500   
2000-02-04  22.93750     23.21875     23.87500     24.50000     24.21875   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31     22.28125     22.84375    23.539062    24.000000    24.000000   
2000-02-01     22.56250     22.28125    22.843750    23.539062    24.000000   
2000-02-02     23.56250     22.56250    22.281250    22.843750    23.539062   
2000-02-03     24.06250     23.56250    22.562500    22.281250    22.84

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  20.00000     19.37500     19.50000     19.00000      18.8125   
2000-02-01  20.21875     20.00000     19.37500     19.50000      19.0000   
2000-02-02  19.50000     20.21875     20.00000     19.37500      19.5000   
2000-02-03  19.43750     19.50000     20.21875     20.00000      19.3750   
2000-02-04  19.28125     19.43750     19.50000     20.21875      20.0000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      17.9375      18.3125      18.1875      18.3750      19.4375   
2000-02-01      18.8125      17.9375      18.3125      18.1875      18.3750   
2000-02-02      19.0000      18.8125      17.9375      18.3125      18.1875   
2000-02-03      19.5000      19.0000      18.8125      17.9375      18.

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  427.50000    420.93750     442.5000     435.9375     419.0625   
2000-02-01  436.87500    427.50000     420.9375     442.5000     435.9375   
2000-02-02  433.12500    436.87500     427.5000     420.9375     442.5000   
2000-02-03  422.34375    433.12500     436.8750     427.5000     420.9375   
2000-02-04  410.62500    422.34375     433.1250     436.8750     427.5000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    413.90625    423.28125   439.570312   448.125000   438.750000   
2000-02-01    419.06250    413.90625   423.281250   439.570312   448.125000   
2000-02-02    435.93750    419.06250   413.906250   423.281250   439.570312   
2000-02-03    442.50000    435.93750   419.062500   413.906250  

HBox(children=(FloatProgress(value=0.0, max=5070.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-31  0.23500      0.24000      0.23833      0.23333      0.23000   
2000-02-01  0.23833      0.23500      0.24000      0.23833      0.23333   
2000-02-02  0.23500      0.23833      0.23500      0.24000      0.23833   
2000-02-03  0.24000      0.23500      0.23833      0.23500      0.24000   
2000-02-04  0.24333      0.24000      0.23500      0.23833      0.23500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      0.23167      0.23833      0.23833      0.24000      0.24000   
2000-02-01      0.23000      0.23167      0.23833      0.23833      0.24000   
2000-02-02      0.23333      0.23000      0.23167      0.23833      0.23833   
2000-02-03      0.23833      0.23333      0.23000      0.23167      0.23833   

HBox(children=(FloatProgress(value=0.0, max=149.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2019-07-01   0.19        0.195        0.195        0.190        0.198   
2019-07-02   0.19        0.190        0.195        0.195        0.190   
2019-07-03   0.19        0.190        0.190        0.195        0.195   
2019-07-04   0.19        0.190        0.190        0.190        0.195   
2019-07-05   0.19        0.190        0.190        0.190        0.190   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2019-07-01        0.200        0.200        0.200        0.200          0.2   
2019-07-02        0.198        0.200        0.200        0.200          0.2   
2019-07-03        0.190        0.198        0.200        0.200          0.2   
2019-07-04        0.195        0.190        0.198        0.200          0.2   
2019-07-05   

HBox(children=(FloatProgress(value=0.0, max=2885.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2008-08-01  0.13365      0.13365      0.12933      0.12933      0.12933   
2008-08-04  0.12502      0.13365      0.13365      0.12933      0.12933   
2008-08-05  0.12933      0.12502      0.13365      0.13365      0.12933   
2008-08-06  0.12502      0.12933      0.12502      0.13365      0.13365   
2008-08-07  0.12071      0.12502      0.12933      0.12502      0.13365   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2008-08-01      0.13365      0.13365      0.14227      0.12933      0.13365   
2008-08-04      0.12933      0.13365      0.13365      0.14227      0.12933   
2008-08-05      0.12933      0.12933      0.13365      0.13365      0.14227   
2008-08-06      0.12933      0.12933      0.12933      0.13365      0.13365   

HBox(children=(FloatProgress(value=0.0, max=2520.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2009-08-21  0.22194      0.22667      0.22194      0.21722      0.22194   
2009-08-24  0.22194      0.22194      0.22667      0.22194      0.21722   
2009-08-25  0.23611      0.22194      0.22194      0.22667      0.22194   
2009-08-26  0.23139      0.23611      0.22194      0.22194      0.22667   
2009-08-27  0.23611      0.23139      0.23611      0.22194      0.22194   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2009-08-21      0.24083      0.24556      0.24556      0.25500      0.25028   
2009-08-24      0.22194      0.24083      0.24556      0.24556      0.25500   
2009-08-25      0.21722      0.22194      0.24083      0.24556      0.24556   
2009-08-26      0.22194      0.21722      0.22194      0.24083      0.24556   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  42.062500    42.750000    43.416668    47.791668    45.291668   
2000-02-01  44.312500    42.062500    42.750000    43.416668    47.791668   
2000-02-02  35.333332    44.312500    42.062500    42.750000    43.416668   
2000-02-03  34.375000    35.333332    44.312500    42.062500    42.750000   
2000-02-04  36.500000    34.375000    35.333332    44.312500    42.062500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    45.770832    49.250000    40.895832    40.333332    35.687500   
2000-02-01    45.291668    45.770832    49.250000    40.895832    40.333332   
2000-02-02    47.791668    45.291668    45.770832    49.250000    40.895832   
2000-02-03    43.416668    47.791668    45.291668    45.770832  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  10.93750     11.81250    11.890625    12.156250    11.906250   
2000-02-01  11.09375     10.93750    11.812500    11.890625    12.156250   
2000-02-02  12.62500     11.09375    10.937500    11.812500    11.890625   
2000-02-03  14.00000     12.62500    11.093750    10.937500    11.812500   
2000-02-04  13.65625     14.00000    12.625000    11.093750    10.937500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    11.765625    11.031250     9.968750     8.531250     8.265625   
2000-02-01    11.906250    11.765625    11.031250     9.968750     8.531250   
2000-02-02    12.156250    11.906250    11.765625    11.031250     9.968750   
2000-02-03    11.890625    12.156250    11.906250    11.765625    11.03

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  20.500000    20.859375    21.031250    22.125000    22.125000   
2000-02-01  19.796875    20.500000    20.859375    21.031250    22.125000   
2000-02-02  21.000000    19.796875    20.500000    20.859375    21.031250   
2000-02-03  22.218750    21.000000    19.796875    20.500000    20.859375   
2000-02-04  23.750000    22.218750    21.000000    19.796875    20.500000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    24.093750     24.59375    22.140625    21.093750    21.218750   
2000-02-01    22.125000     24.09375    24.593750    22.140625    21.093750   
2000-02-02    22.125000     22.12500    24.093750    24.593750    22.140625   
2000-02-03    21.031250     22.12500    22.125000    24.093750  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


             Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                     
2000-01-31  130.00        135.0       138.75       140.00       132.50   
2000-02-01  132.50        130.0       135.00       138.75       140.00   
2000-02-02  140.00        132.5       130.00       135.00       138.75   
2000-02-03  137.50        140.0       132.50       130.00       135.00   
2000-02-04  143.75        137.5       140.00       132.50       130.00   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31       130.00       145.00       123.75       107.50       105.00   
2000-02-01       132.50       130.00       145.00       123.75       107.50   
2000-02-02       140.00       132.50       130.00       145.00       123.75   
2000-02-03       138.75       140.00       132.50       130.00       145.00   
2000-0

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-31   55.0         53.0         56.0         57.0         59.0   
2000-02-01   56.0         55.0         53.0         56.0         57.0   
2000-02-02   60.0         56.0         55.0         53.0         56.0   
2000-02-03   69.0         60.0         56.0         55.0         53.0   
2000-02-04   66.0         69.0         60.0         56.0         55.0   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31         58.0         56.0         51.0         51.0         60.0   
2000-02-01         59.0         58.0         56.0         51.0         51.0   
2000-02-02         57.0         59.0         58.0         56.0         51.0   
2000-02-03         56.0         57.0         59.0         58.0         56.0   
2000-02-04   

In [40]:
# technical indicators using only past 15 days closing price, past 15 days EMA, past 15 days RSI, and past 15 days Bolinger Bands
# past 15 days OBV, SAR
def compile_data():
    df_dict = {}
    with open('tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        print(ticker)
        df = pd.read_csv('data/initial/{}.csv'.format(ticker))
        df  = df.dropna()
        
        if(ticker == 'MIJ.SI'):
            df['Close'] = df['Close'].astype(float)
            df['Volume'] = df['Volume'].astype(float)
            print(df.info())
            
        high_0 = df['High'].values
        high_1 = df['High'].shift(1).values
        high_2 = df['High'].shift(2).values
        high_3 = df['High'].shift(3).values
        high_4 = df['High'].shift(4).values
        high_5 = df['High'].shift(5).values
        high_6 = df['High'].shift(6).values
        high_7 = df['High'].shift(7).values
        high_8 = df['High'].shift(8).values
        high_9 = df['High'].shift(9).values
        high_10 = df['High'].shift(10).values
        high_11 = df['High'].shift(11).values
        high_12 = df['High'].shift(12).values
        high_13 = df['High'].shift(13).values
        high_14 = df['High'].shift(14).values
        
        low_0 = df['Low'].values
        low_1 = df['Low'].shift(1).values
        low_2 = df['Low'].shift(2).values
        low_3 = df['Low'].shift(3).values
        low_4 = df['Low'].shift(4).values
        low_5 = df['Low'].shift(5).values
        low_6 = df['Low'].shift(6).values
        low_7 = df['Low'].shift(7).values
        low_8 = df['Low'].shift(8).values
        low_9 = df['Low'].shift(9).values
        low_10 = df['Low'].shift(10).values
        low_11 = df['Low'].shift(11).values
        low_12 = df['Low'].shift(12).values
        low_13 = df['Low'].shift(13).values
        low_14 = df['Low'].shift(14).values
        
        close_0 = df['Close'].values
        close_1 = df['Close'].shift(1).values
        close_2 = df['Close'].shift(2).values
        close_3 = df['Close'].shift(3).values
        close_4 = df['Close'].shift(4).values
        close_5 = df['Close'].shift(5).values
        close_6 = df['Close'].shift(6).values
        close_7 = df['Close'].shift(7).values
        close_8 = df['Close'].shift(8).values
        close_9 = df['Close'].shift(9).values
        close_10 = df['Close'].shift(10).values
        close_11 = df['Close'].shift(11).values
        close_12 = df['Close'].shift(12).values
        close_13 = df['Close'].shift(13).values
        close_14 = df['Close'].shift(14).values
        
        volume_0 = df['Volume'].values
        volume_1 = df['Volume'].shift(1).values
        volume_2 = df['Volume'].shift(2).values
        volume_3 = df['Volume'].shift(3).values
        volume_4 = df['Volume'].shift(4).values
        volume_5 = df['Volume'].shift(5).values
        volume_6 = df['Volume'].shift(6).values
        volume_7 = df['Volume'].shift(7).values
        volume_8 = df['Volume'].shift(8).values
        volume_9 = df['Volume'].shift(9).values
        volume_10 = df['Volume'].shift(10).values
        volume_11 = df['Volume'].shift(11).values
        volume_12 = df['Volume'].shift(12).values
        volume_13 = df['Volume'].shift(13).values
        volume_14 = df['Volume'].shift(14).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)
        df['previous_2d'] = df['Close'].shift(2)
        df['previous_3d'] = df['Close'].shift(3)
        df['previous_4d'] = df['Close'].shift(4)
        df['previous_5d'] = df['Close'].shift(5)
        df['previous_6d'] = df['Close'].shift(6)
        df['previous_7d'] = df['Close'].shift(7)
        df['previous_8d'] = df['Close'].shift(8)
        df['previous_9d'] = df['Close'].shift(9)
        df['previous_10d'] = df['Close'].shift(10)
        df['previous_11d'] = df['Close'].shift(11)
        df['previous_12d'] = df['Close'].shift(12)
        df['previous_13d'] = df['Close'].shift(13)
        df['previous_14d'] = df['Close'].shift(14)

        for i in range(6,21):
            ema = EMA(close_0, timeperiod=i)
            arr = np.array(ema)
            df['EMA_{}'.format(i)] = arr

        for i in range(6,21):
            rsi = RSI(close_0, timeperiod=i)
            arr = np.array(rsi)
            df['rsi_{}'.format(i)] = arr
            
        for i in range(6,21):
            upperband, middleband, lowerband = BBANDS(close_0, timeperiod=i, nbdevup=2, nbdevdn=2, matype=0)
            arr_1 = np.array(upperband)
            arr_2 = np.array(middleband)
            arr_3 = np.array(lowerband)
            df['BBAND_upper_{}'.format(i)] = arr_1
            df['BBAND_middle_{}'.format(i)] = arr_2
            df['BBAND_lower_{}'.format(i)] = arr_3
            

        obv = OBV(close_0, volume_0)
        arr_1 = np.array(obv)
        df['OBV_0'] = arr_1
        obv = OBV(close_1, volume_1)
        arr_1 = np.array(obv)
        df['OBV_1'] = arr_1
        obv = OBV(close_2, volume_2)
        arr_1 = np.array(obv)
        df['OBV_2'] = arr_1
        obv = OBV(close_3, volume_3)
        arr_1 = np.array(obv)
        df['OBV_3'] = arr_1
        obv = OBV(close_4, volume_4)
        arr_1 = np.array(obv)
        df['OBV_4'] = arr_1
        obv = OBV(close_5, volume_5)
        arr_1 = np.array(obv)
        df['OBV_5'] = arr_1
        obv = OBV(close_6, volume_6)
        arr_1 = np.array(obv)
        df['OBV_6'] = arr_1
        obv = OBV(close_7, volume_7)
        arr_1 = np.array(obv)
        df['OBV_7'] = arr_1
        obv = OBV(close_8, volume_8)
        arr_1 = np.array(obv)
        df['OBV_8'] = arr_1
        obv = OBV(close_9, volume_9)
        arr_1 = np.array(obv)
        df['OBV_9'] = arr_1
        obv = OBV(close_10, volume_10)
        arr_1 = np.array(obv)
        df['OBV_10'] = arr_1
        obv = OBV(close_11, volume_11) 
        arr_1 = np.array(obv)
        df['OBV_11'] = arr_1
        obv = OBV(close_12, volume_12)
        arr_1 = np.array(obv)
        df['OBV_12'] = arr_1
        obv = OBV(close_13, volume_13)
        arr_1 = np.array(obv)
        df['OBV_13'] = arr_1
        obv = OBV(close_14, volume_14)
        arr_1 = np.array(obv)
        df['OBV_14'] = arr_1
        
        sar = SAR(high_0, low_0, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_0'] = arr_2
        sar = SAR(high_1, low_1, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_1'] = arr_2
        sar = SAR(high_2, low_2, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_2'] = arr_2
        sar = SAR(high_3, low_3, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_3'] = arr_2
        sar = SAR(high_4, low_4, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_4'] = arr_2
        sar = SAR(high_5, low_5, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_5'] = arr_2
        sar = SAR(high_6, low_6, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_6'] = arr_2
        sar = SAR(high_7, low_7, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_7'] = arr_2
        sar = SAR(high_8, low_8, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_8'] = arr_2
        sar = SAR(high_9, low_9, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_9'] = arr_2
        sar = SAR(high_10, low_10, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_10'] = arr_2
        sar = SAR(high_11, low_11, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_11'] = arr_2
        sar = SAR(high_12, low_12, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_12'] = arr_2
        sar = SAR(high_13, low_13, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_13'] = arr_2
        sar = SAR(high_14, low_14, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_14'] = arr_2
        
            
        df_dates = df['Date']
        df.set_index('Date', inplace=True)
        
        df_padding_left = pd.DataFrame()
        df_padding_right = pd.DataFrame()
        df_padding_left['Date'] = df_dates
        df_padding_right['Date'] = df_dates
        #print(df_padding_left.info())
        for i in range(0,52):
            df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
            df_padding_right['padding_right{}'.format(i+1)] = np.zeros(len(df_padding_right))
            
        df_padding_right['padding_right53'] = np.zeros(len(df_padding_right))
            
        df_padding_left.set_index('Date', inplace=True)
        df_padding_right.set_index('Date', inplace=True)
        
        df_cnn = pd.concat([df_padding_left,df,df_padding_right],axis=1)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df_cnn['labels'] = labels
        df  = df.dropna()
        df_cnn = df_cnn.dropna()
        
        print(df.head())
        print(df_cnn.head())
        df.to_csv('data/selected_indicators_2/LSTM/{}_data.csv'.format(ticker))
        df_cnn.to_csv('data/selected_indicators_2/CNN/{}_data.csv'.format(ticker))
    
compile_data()

D05.SI
         Date      Close
0  2000-01-03  27.299999
1  2000-01-04  26.299999
2  2000-01-05  24.600000
3  2000-01-06  24.100000
4  2000-01-07  25.000000
                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-03  27.299999          NaN          NaN          NaN          NaN   
2000-01-04  26.299999    27.299999          NaN          NaN          NaN   
2000-01-05  24.600000    26.299999    27.299999          NaN          NaN   
2000-01-06  24.100000    24.600000    26.299999    27.299999          NaN   
2000-01-07  25.000000    24.100000    24.600000    26.299999    27.299999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-03          NaN          NaN          NaN          NaN          NaN   
2000-01-04          NaN          NaN          NaN          NaN    

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  22.799999    22.799999    23.700001    22.400000    21.700001   
2000-02-01  22.900000    22.799999    22.799999    23.700001    22.400000   
2000-02-02  23.700001    22.900000    22.799999    22.799999    23.700001   
2000-02-03  23.799999    23.700001    22.900000    22.799999    22.799999   
2000-02-04  23.500000    23.799999    23.700001    22.900000    22.799999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    21.400000    21.900000    22.700001    22.000000    22.600000   
2000-02-01    21.700001    21.400000    21.900000    22.700001    22.000000   
2000-02-02    22.400000    21.700001    21.400000    21.900000    22.700001   
2000-02-03    23.700001    22.400000    21.700001    21.400000  

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-31  7.03642      7.27906      7.23053      6.89084      6.74526   
2000-02-01  6.79379      7.03642      7.27906      7.23053      6.89084   
2000-02-02  6.89084      6.79379      7.03642      7.27906      7.23053   
2000-02-03  6.69673      6.89084      6.79379      7.03642      7.27906   
2000-02-04  6.79379      6.69673      6.89084      6.79379      7.03642   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      6.79379      6.89084      6.79379      6.69673      6.98790   
2000-02-01      6.74526      6.79379      6.89084      6.79379      6.69673   
2000-02-02      6.89084      6.74526      6.79379      6.89084      6.79379   
2000-02-03      7.23053      6.89084      6.74526      6.79379      6.89084   

HBox(children=(FloatProgress(value=0.0, max=5068.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-31   12.5         13.0         13.5         13.1         12.8   
2000-02-01   12.5         12.5         13.0         13.5         13.1   
2000-02-02   12.9         12.5         12.5         13.0         13.5   
2000-02-03   12.7         12.9         12.5         12.5         13.0   
2000-02-04   12.6         12.7         12.9         12.5         12.5   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31         13.4         14.0         14.2         13.6         14.2   
2000-02-01         12.8         13.4         14.0         14.2         13.6   
2000-02-02         13.1         12.8         13.4         14.0         14.2   
2000-02-03         13.5         13.1         12.8         13.4         14.0   
2000-02-04   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  53.791668    51.125000    53.791668    52.291668    49.750000   
2000-02-01  55.833332    53.791668    51.125000    53.791668    52.291668   
2000-02-02  54.916668    55.833332    53.791668    51.125000    53.791668   
2000-02-03  55.458332    54.916668    55.833332    53.791668    51.125000   
2000-02-04  54.500000    55.458332    54.916668    55.833332    53.791668   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    48.666668    48.625000    49.375000    49.416668    47.333332   
2000-02-01    49.750000    48.666668    48.625000    49.375000    49.416668   
2000-02-02    52.291668    49.750000    48.666668    48.625000    49.375000   
2000-02-03    53.791668    52.291668    49.750000    48.666668  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  24.21875     22.87500     24.06250     23.56250     22.56250   
2000-02-01  24.50000     24.21875     22.87500     24.06250     23.56250   
2000-02-02  23.87500     24.50000     24.21875     22.87500     24.06250   
2000-02-03  23.21875     23.87500     24.50000     24.21875     22.87500   
2000-02-04  22.93750     23.21875     23.87500     24.50000     24.21875   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31     22.28125     22.84375    23.539062    24.000000    24.000000   
2000-02-01     22.56250     22.28125    22.843750    23.539062    24.000000   
2000-02-02     23.56250     22.56250    22.281250    22.843750    23.539062   
2000-02-03     24.06250     23.56250    22.562500    22.281250    22.84

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  20.00000     19.37500     19.50000     19.00000      18.8125   
2000-02-01  20.21875     20.00000     19.37500     19.50000      19.0000   
2000-02-02  19.50000     20.21875     20.00000     19.37500      19.5000   
2000-02-03  19.43750     19.50000     20.21875     20.00000      19.3750   
2000-02-04  19.28125     19.43750     19.50000     20.21875      20.0000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      17.9375      18.3125      18.1875      18.3750      19.4375   
2000-02-01      18.8125      17.9375      18.3125      18.1875      18.3750   
2000-02-02      19.0000      18.8125      17.9375      18.3125      18.1875   
2000-02-03      19.5000      19.0000      18.8125      17.9375      18.

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  427.50000    420.93750     442.5000     435.9375     419.0625   
2000-02-01  436.87500    427.50000     420.9375     442.5000     435.9375   
2000-02-02  433.12500    436.87500     427.5000     420.9375     442.5000   
2000-02-03  422.34375    433.12500     436.8750     427.5000     420.9375   
2000-02-04  410.62500    422.34375     433.1250     436.8750     427.5000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    413.90625    423.28125   439.570312   448.125000   438.750000   
2000-02-01    419.06250    413.90625   423.281250   439.570312   448.125000   
2000-02-02    435.93750    419.06250   413.906250   423.281250   439.570312   
2000-02-03    442.50000    435.93750   419.062500   413.906250  

HBox(children=(FloatProgress(value=0.0, max=5070.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-31  0.23500      0.24000      0.23833      0.23333      0.23000   
2000-02-01  0.23833      0.23500      0.24000      0.23833      0.23333   
2000-02-02  0.23500      0.23833      0.23500      0.24000      0.23833   
2000-02-03  0.24000      0.23500      0.23833      0.23500      0.24000   
2000-02-04  0.24333      0.24000      0.23500      0.23833      0.23500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      0.23167      0.23833      0.23833      0.24000      0.24000   
2000-02-01      0.23000      0.23167      0.23833      0.23833      0.24000   
2000-02-02      0.23333      0.23000      0.23167      0.23833      0.23833   
2000-02-03      0.23833      0.23333      0.23000      0.23167      0.23833   

HBox(children=(FloatProgress(value=0.0, max=149.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2019-07-01   0.19        0.195        0.195        0.190        0.198   
2019-07-02   0.19        0.190        0.195        0.195        0.190   
2019-07-03   0.19        0.190        0.190        0.195        0.195   
2019-07-04   0.19        0.190        0.190        0.190        0.195   
2019-07-05   0.19        0.190        0.190        0.190        0.190   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2019-07-01        0.200        0.200        0.200        0.200          0.2   
2019-07-02        0.198        0.200        0.200        0.200          0.2   
2019-07-03        0.190        0.198        0.200        0.200          0.2   
2019-07-04        0.195        0.190        0.198        0.200          0.2   
2019-07-05   

HBox(children=(FloatProgress(value=0.0, max=2885.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2008-08-01  0.13365      0.13365      0.12933      0.12933      0.12933   
2008-08-04  0.12502      0.13365      0.13365      0.12933      0.12933   
2008-08-05  0.12933      0.12502      0.13365      0.13365      0.12933   
2008-08-06  0.12502      0.12933      0.12502      0.13365      0.13365   
2008-08-07  0.12071      0.12502      0.12933      0.12502      0.13365   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2008-08-01      0.13365      0.13365      0.14227      0.12933      0.13365   
2008-08-04      0.12933      0.13365      0.13365      0.14227      0.12933   
2008-08-05      0.12933      0.12933      0.13365      0.13365      0.14227   
2008-08-06      0.12933      0.12933      0.12933      0.13365      0.13365   

HBox(children=(FloatProgress(value=0.0, max=2520.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2009-08-21  0.22194      0.22667      0.22194      0.21722      0.22194   
2009-08-24  0.22194      0.22194      0.22667      0.22194      0.21722   
2009-08-25  0.23611      0.22194      0.22194      0.22667      0.22194   
2009-08-26  0.23139      0.23611      0.22194      0.22194      0.22667   
2009-08-27  0.23611      0.23139      0.23611      0.22194      0.22194   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2009-08-21      0.24083      0.24556      0.24556      0.25500      0.25028   
2009-08-24      0.22194      0.24083      0.24556      0.24556      0.25500   
2009-08-25      0.21722      0.22194      0.24083      0.24556      0.24556   
2009-08-26      0.22194      0.21722      0.22194      0.24083      0.24556   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  42.062500    42.750000    43.416668    47.791668    45.291668   
2000-02-01  44.312500    42.062500    42.750000    43.416668    47.791668   
2000-02-02  35.333332    44.312500    42.062500    42.750000    43.416668   
2000-02-03  34.375000    35.333332    44.312500    42.062500    42.750000   
2000-02-04  36.500000    34.375000    35.333332    44.312500    42.062500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    45.770832    49.250000    40.895832    40.333332    35.687500   
2000-02-01    45.291668    45.770832    49.250000    40.895832    40.333332   
2000-02-02    47.791668    45.291668    45.770832    49.250000    40.895832   
2000-02-03    43.416668    47.791668    45.291668    45.770832  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  10.93750     11.81250    11.890625    12.156250    11.906250   
2000-02-01  11.09375     10.93750    11.812500    11.890625    12.156250   
2000-02-02  12.62500     11.09375    10.937500    11.812500    11.890625   
2000-02-03  14.00000     12.62500    11.093750    10.937500    11.812500   
2000-02-04  13.65625     14.00000    12.625000    11.093750    10.937500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    11.765625    11.031250     9.968750     8.531250     8.265625   
2000-02-01    11.906250    11.765625    11.031250     9.968750     8.531250   
2000-02-02    12.156250    11.906250    11.765625    11.031250     9.968750   
2000-02-03    11.890625    12.156250    11.906250    11.765625    11.03

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  20.500000    20.859375    21.031250    22.125000    22.125000   
2000-02-01  19.796875    20.500000    20.859375    21.031250    22.125000   
2000-02-02  21.000000    19.796875    20.500000    20.859375    21.031250   
2000-02-03  22.218750    21.000000    19.796875    20.500000    20.859375   
2000-02-04  23.750000    22.218750    21.000000    19.796875    20.500000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    24.093750     24.59375    22.140625    21.093750    21.218750   
2000-02-01    22.125000     24.09375    24.593750    22.140625    21.093750   
2000-02-02    22.125000     22.12500    24.093750    24.593750    22.140625   
2000-02-03    21.031250     22.12500    22.125000    24.093750  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


             Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                     
2000-01-31  130.00        135.0       138.75       140.00       132.50   
2000-02-01  132.50        130.0       135.00       138.75       140.00   
2000-02-02  140.00        132.5       130.00       135.00       138.75   
2000-02-03  137.50        140.0       132.50       130.00       135.00   
2000-02-04  143.75        137.5       140.00       132.50       130.00   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31       130.00       145.00       123.75       107.50       105.00   
2000-02-01       132.50       130.00       145.00       123.75       107.50   
2000-02-02       140.00       132.50       130.00       145.00       123.75   
2000-02-03       138.75       140.00       132.50       130.00       145.00   
2000-0

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-31   55.0         53.0         56.0         57.0         59.0   
2000-02-01   56.0         55.0         53.0         56.0         57.0   
2000-02-02   60.0         56.0         55.0         53.0         56.0   
2000-02-03   69.0         60.0         56.0         55.0         53.0   
2000-02-04   66.0         69.0         60.0         56.0         55.0   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31         58.0         56.0         51.0         51.0         60.0   
2000-02-01         59.0         58.0         56.0         51.0         51.0   
2000-02-02         57.0         59.0         58.0         56.0         51.0   
2000-02-03         56.0         57.0         59.0         58.0         56.0   
2000-02-04   

In [41]:
# technical indicators using only past 15 days closing price, past 15 days RSI
# past 15 days OBV, SAR
def compile_data():
    df_dict = {}
    with open('tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        print(ticker)
        df = pd.read_csv('data/initial/{}.csv'.format(ticker))
        df  = df.dropna()
        
        if(ticker == 'MIJ.SI'):
            df['Close'] = df['Close'].astype(float)
            df['Volume'] = df['Volume'].astype(float)
            print(df.info())
            
        high_0 = df['High'].values
        high_1 = df['High'].shift(1).values
        high_2 = df['High'].shift(2).values
        high_3 = df['High'].shift(3).values
        high_4 = df['High'].shift(4).values
        high_5 = df['High'].shift(5).values
        high_6 = df['High'].shift(6).values
        high_7 = df['High'].shift(7).values
        high_8 = df['High'].shift(8).values
        high_9 = df['High'].shift(9).values
        high_10 = df['High'].shift(10).values
        high_11 = df['High'].shift(11).values
        high_12 = df['High'].shift(12).values
        high_13 = df['High'].shift(13).values
        high_14 = df['High'].shift(14).values
        
        low_0 = df['Low'].values
        low_1 = df['Low'].shift(1).values
        low_2 = df['Low'].shift(2).values
        low_3 = df['Low'].shift(3).values
        low_4 = df['Low'].shift(4).values
        low_5 = df['Low'].shift(5).values
        low_6 = df['Low'].shift(6).values
        low_7 = df['Low'].shift(7).values
        low_8 = df['Low'].shift(8).values
        low_9 = df['Low'].shift(9).values
        low_10 = df['Low'].shift(10).values
        low_11 = df['Low'].shift(11).values
        low_12 = df['Low'].shift(12).values
        low_13 = df['Low'].shift(13).values
        low_14 = df['Low'].shift(14).values
        
        close_0 = df['Close'].values
        close_1 = df['Close'].shift(1).values
        close_2 = df['Close'].shift(2).values
        close_3 = df['Close'].shift(3).values
        close_4 = df['Close'].shift(4).values
        close_5 = df['Close'].shift(5).values
        close_6 = df['Close'].shift(6).values
        close_7 = df['Close'].shift(7).values
        close_8 = df['Close'].shift(8).values
        close_9 = df['Close'].shift(9).values
        close_10 = df['Close'].shift(10).values
        close_11 = df['Close'].shift(11).values
        close_12 = df['Close'].shift(12).values
        close_13 = df['Close'].shift(13).values
        close_14 = df['Close'].shift(14).values
        
        volume_0 = df['Volume'].values
        volume_1 = df['Volume'].shift(1).values
        volume_2 = df['Volume'].shift(2).values
        volume_3 = df['Volume'].shift(3).values
        volume_4 = df['Volume'].shift(4).values
        volume_5 = df['Volume'].shift(5).values
        volume_6 = df['Volume'].shift(6).values
        volume_7 = df['Volume'].shift(7).values
        volume_8 = df['Volume'].shift(8).values
        volume_9 = df['Volume'].shift(9).values
        volume_10 = df['Volume'].shift(10).values
        volume_11 = df['Volume'].shift(11).values
        volume_12 = df['Volume'].shift(12).values
        volume_13 = df['Volume'].shift(13).values
        volume_14 = df['Volume'].shift(14).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)
        df['previous_2d'] = df['Close'].shift(2)
        df['previous_3d'] = df['Close'].shift(3)
        df['previous_4d'] = df['Close'].shift(4)
        df['previous_5d'] = df['Close'].shift(5)
        df['previous_6d'] = df['Close'].shift(6)
        df['previous_7d'] = df['Close'].shift(7)
        df['previous_8d'] = df['Close'].shift(8)
        df['previous_9d'] = df['Close'].shift(9)
        df['previous_10d'] = df['Close'].shift(10)
        df['previous_11d'] = df['Close'].shift(11)
        df['previous_12d'] = df['Close'].shift(12)
        df['previous_13d'] = df['Close'].shift(13)
        df['previous_14d'] = df['Close'].shift(14)

        for i in range(6,21):
            rsi = RSI(close_0, timeperiod=i)
            arr = np.array(rsi)
            df['rsi_{}'.format(i)] = arr

        obv = OBV(close_0, volume_0)
        arr_1 = np.array(obv)
        df['OBV_0'] = arr_1
        obv = OBV(close_1, volume_1)
        arr_1 = np.array(obv)
        df['OBV_1'] = arr_1
        obv = OBV(close_2, volume_2)
        arr_1 = np.array(obv)
        df['OBV_2'] = arr_1
        obv = OBV(close_3, volume_3)
        arr_1 = np.array(obv)
        df['OBV_3'] = arr_1
        obv = OBV(close_4, volume_4)
        arr_1 = np.array(obv)
        df['OBV_4'] = arr_1
        obv = OBV(close_5, volume_5)
        arr_1 = np.array(obv)
        df['OBV_5'] = arr_1
        obv = OBV(close_6, volume_6)
        arr_1 = np.array(obv)
        df['OBV_6'] = arr_1
        obv = OBV(close_7, volume_7)
        arr_1 = np.array(obv)
        df['OBV_7'] = arr_1
        obv = OBV(close_8, volume_8)
        arr_1 = np.array(obv)
        df['OBV_8'] = arr_1
        obv = OBV(close_9, volume_9)
        arr_1 = np.array(obv)
        df['OBV_9'] = arr_1
        obv = OBV(close_10, volume_10)
        arr_1 = np.array(obv)
        df['OBV_10'] = arr_1
        obv = OBV(close_11, volume_11) 
        arr_1 = np.array(obv)
        df['OBV_11'] = arr_1
        obv = OBV(close_12, volume_12)
        arr_1 = np.array(obv)
        df['OBV_12'] = arr_1
        obv = OBV(close_13, volume_13)
        arr_1 = np.array(obv)
        df['OBV_13'] = arr_1
        obv = OBV(close_14, volume_14)
        arr_1 = np.array(obv)
        df['OBV_14'] = arr_1
        
        sar = SAR(high_0, low_0, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_0'] = arr_2
        sar = SAR(high_1, low_1, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_1'] = arr_2
        sar = SAR(high_2, low_2, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_2'] = arr_2
        sar = SAR(high_3, low_3, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_3'] = arr_2
        sar = SAR(high_4, low_4, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_4'] = arr_2
        sar = SAR(high_5, low_5, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_5'] = arr_2
        sar = SAR(high_6, low_6, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_6'] = arr_2
        sar = SAR(high_7, low_7, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_7'] = arr_2
        sar = SAR(high_8, low_8, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_8'] = arr_2
        sar = SAR(high_9, low_9, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_9'] = arr_2
        sar = SAR(high_10, low_10, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_10'] = arr_2
        sar = SAR(high_11, low_11, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_11'] = arr_2
        sar = SAR(high_12, low_12, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_12'] = arr_2
        sar = SAR(high_13, low_13, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_13'] = arr_2
        sar = SAR(high_14, low_14, acceleration=0, maximum=0)
        arr_2 = np.array(sar)
        df['SAR_14'] = arr_2
        
            
        df_dates = df['Date']
        df.set_index('Date', inplace=True)
        
        df_padding_left = pd.DataFrame()
        df_padding_right = pd.DataFrame()
        df_padding_left['Date'] = df_dates
        df_padding_right['Date'] = df_dates
        #print(df_padding_left.info())
        for i in range(0,82):
            df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
            df_padding_right['padding_right{}'.format(i+1)] = np.zeros(len(df_padding_right))
            
        df_padding_right['padding_right83'] = np.zeros(len(df_padding_right))
            
        df_padding_left.set_index('Date', inplace=True)
        df_padding_right.set_index('Date', inplace=True)
        
        df_cnn = pd.concat([df_padding_left,df,df_padding_right],axis=1)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df_cnn['labels'] = labels
        df  = df.dropna()
        df_cnn = df_cnn.dropna()
        
        print(df.head())
        print(df_cnn.head())
        df.to_csv('data/selected_indicators_3/LSTM/{}_data.csv'.format(ticker))
        df_cnn.to_csv('data/selected_indicators_3/CNN/{}_data.csv'.format(ticker))
    
compile_data()

D05.SI
         Date      Close
0  2000-01-03  27.299999
1  2000-01-04  26.299999
2  2000-01-05  24.600000
3  2000-01-06  24.100000
4  2000-01-07  25.000000
                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-03  27.299999          NaN          NaN          NaN          NaN   
2000-01-04  26.299999    27.299999          NaN          NaN          NaN   
2000-01-05  24.600000    26.299999    27.299999          NaN          NaN   
2000-01-06  24.100000    24.600000    26.299999    27.299999          NaN   
2000-01-07  25.000000    24.100000    24.600000    26.299999    27.299999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-03          NaN          NaN          NaN          NaN          NaN   
2000-01-04          NaN          NaN          NaN          NaN    

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  22.799999    22.799999    23.700001    22.400000    21.700001   
2000-02-01  22.900000    22.799999    22.799999    23.700001    22.400000   
2000-02-02  23.700001    22.900000    22.799999    22.799999    23.700001   
2000-02-03  23.799999    23.700001    22.900000    22.799999    22.799999   
2000-02-04  23.500000    23.799999    23.700001    22.900000    22.799999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    21.400000    21.900000    22.700001    22.000000    22.600000   
2000-02-01    21.700001    21.400000    21.900000    22.700001    22.000000   
2000-02-02    22.400000    21.700001    21.400000    21.900000    22.700001   
2000-02-03    23.700001    22.400000    21.700001    21.400000  

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-31  7.03642      7.27906      7.23053      6.89084      6.74526   
2000-02-01  6.79379      7.03642      7.27906      7.23053      6.89084   
2000-02-02  6.89084      6.79379      7.03642      7.27906      7.23053   
2000-02-03  6.69673      6.89084      6.79379      7.03642      7.27906   
2000-02-04  6.79379      6.69673      6.89084      6.79379      7.03642   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      6.79379      6.89084      6.79379      6.69673      6.98790   
2000-02-01      6.74526      6.79379      6.89084      6.79379      6.69673   
2000-02-02      6.89084      6.74526      6.79379      6.89084      6.79379   
2000-02-03      7.23053      6.89084      6.74526      6.79379      6.89084   

HBox(children=(FloatProgress(value=0.0, max=5068.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-31   12.5         13.0         13.5         13.1         12.8   
2000-02-01   12.5         12.5         13.0         13.5         13.1   
2000-02-02   12.9         12.5         12.5         13.0         13.5   
2000-02-03   12.7         12.9         12.5         12.5         13.0   
2000-02-04   12.6         12.7         12.9         12.5         12.5   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31         13.4         14.0         14.2         13.6         14.2   
2000-02-01         12.8         13.4         14.0         14.2         13.6   
2000-02-02         13.1         12.8         13.4         14.0         14.2   
2000-02-03         13.5         13.1         12.8         13.4         14.0   
2000-02-04   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  53.791668    51.125000    53.791668    52.291668    49.750000   
2000-02-01  55.833332    53.791668    51.125000    53.791668    52.291668   
2000-02-02  54.916668    55.833332    53.791668    51.125000    53.791668   
2000-02-03  55.458332    54.916668    55.833332    53.791668    51.125000   
2000-02-04  54.500000    55.458332    54.916668    55.833332    53.791668   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    48.666668    48.625000    49.375000    49.416668    47.333332   
2000-02-01    49.750000    48.666668    48.625000    49.375000    49.416668   
2000-02-02    52.291668    49.750000    48.666668    48.625000    49.375000   
2000-02-03    53.791668    52.291668    49.750000    48.666668  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  24.21875     22.87500     24.06250     23.56250     22.56250   
2000-02-01  24.50000     24.21875     22.87500     24.06250     23.56250   
2000-02-02  23.87500     24.50000     24.21875     22.87500     24.06250   
2000-02-03  23.21875     23.87500     24.50000     24.21875     22.87500   
2000-02-04  22.93750     23.21875     23.87500     24.50000     24.21875   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31     22.28125     22.84375    23.539062    24.000000    24.000000   
2000-02-01     22.56250     22.28125    22.843750    23.539062    24.000000   
2000-02-02     23.56250     22.56250    22.281250    22.843750    23.539062   
2000-02-03     24.06250     23.56250    22.562500    22.281250    22.84

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  20.00000     19.37500     19.50000     19.00000      18.8125   
2000-02-01  20.21875     20.00000     19.37500     19.50000      19.0000   
2000-02-02  19.50000     20.21875     20.00000     19.37500      19.5000   
2000-02-03  19.43750     19.50000     20.21875     20.00000      19.3750   
2000-02-04  19.28125     19.43750     19.50000     20.21875      20.0000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      17.9375      18.3125      18.1875      18.3750      19.4375   
2000-02-01      18.8125      17.9375      18.3125      18.1875      18.3750   
2000-02-02      19.0000      18.8125      17.9375      18.3125      18.1875   
2000-02-03      19.5000      19.0000      18.8125      17.9375      18.

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  427.50000    420.93750     442.5000     435.9375     419.0625   
2000-02-01  436.87500    427.50000     420.9375     442.5000     435.9375   
2000-02-02  433.12500    436.87500     427.5000     420.9375     442.5000   
2000-02-03  422.34375    433.12500     436.8750     427.5000     420.9375   
2000-02-04  410.62500    422.34375     433.1250     436.8750     427.5000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    413.90625    423.28125   439.570312   448.125000   438.750000   
2000-02-01    419.06250    413.90625   423.281250   439.570312   448.125000   
2000-02-02    435.93750    419.06250   413.906250   423.281250   439.570312   
2000-02-03    442.50000    435.93750   419.062500   413.906250  

HBox(children=(FloatProgress(value=0.0, max=5070.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-31  0.23500      0.24000      0.23833      0.23333      0.23000   
2000-02-01  0.23833      0.23500      0.24000      0.23833      0.23333   
2000-02-02  0.23500      0.23833      0.23500      0.24000      0.23833   
2000-02-03  0.24000      0.23500      0.23833      0.23500      0.24000   
2000-02-04  0.24333      0.24000      0.23500      0.23833      0.23500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31      0.23167      0.23833      0.23833      0.24000      0.24000   
2000-02-01      0.23000      0.23167      0.23833      0.23833      0.24000   
2000-02-02      0.23333      0.23000      0.23167      0.23833      0.23833   
2000-02-03      0.23833      0.23333      0.23000      0.23167      0.23833   

HBox(children=(FloatProgress(value=0.0, max=149.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2019-07-01   0.19        0.195        0.195        0.190        0.198   
2019-07-02   0.19        0.190        0.195        0.195        0.190   
2019-07-03   0.19        0.190        0.190        0.195        0.195   
2019-07-04   0.19        0.190        0.190        0.190        0.195   
2019-07-05   0.19        0.190        0.190        0.190        0.190   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2019-07-01        0.200        0.200        0.200        0.200          0.2   
2019-07-02        0.198        0.200        0.200        0.200          0.2   
2019-07-03        0.190        0.198        0.200        0.200          0.2   
2019-07-04        0.195        0.190        0.198        0.200          0.2   
2019-07-05   

HBox(children=(FloatProgress(value=0.0, max=2885.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2008-08-01  0.13365      0.13365      0.12933      0.12933      0.12933   
2008-08-04  0.12502      0.13365      0.13365      0.12933      0.12933   
2008-08-05  0.12933      0.12502      0.13365      0.13365      0.12933   
2008-08-06  0.12502      0.12933      0.12502      0.13365      0.13365   
2008-08-07  0.12071      0.12502      0.12933      0.12502      0.13365   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2008-08-01      0.13365      0.13365      0.14227      0.12933      0.13365   
2008-08-04      0.12933      0.13365      0.13365      0.14227      0.12933   
2008-08-05      0.12933      0.12933      0.13365      0.13365      0.14227   
2008-08-06      0.12933      0.12933      0.12933      0.13365      0.13365   

HBox(children=(FloatProgress(value=0.0, max=2520.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2009-08-21  0.22194      0.22667      0.22194      0.21722      0.22194   
2009-08-24  0.22194      0.22194      0.22667      0.22194      0.21722   
2009-08-25  0.23611      0.22194      0.22194      0.22667      0.22194   
2009-08-26  0.23139      0.23611      0.22194      0.22194      0.22667   
2009-08-27  0.23611      0.23139      0.23611      0.22194      0.22194   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2009-08-21      0.24083      0.24556      0.24556      0.25500      0.25028   
2009-08-24      0.22194      0.24083      0.24556      0.24556      0.25500   
2009-08-25      0.21722      0.22194      0.24083      0.24556      0.24556   
2009-08-26      0.22194      0.21722      0.22194      0.24083      0.24556   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  42.062500    42.750000    43.416668    47.791668    45.291668   
2000-02-01  44.312500    42.062500    42.750000    43.416668    47.791668   
2000-02-02  35.333332    44.312500    42.062500    42.750000    43.416668   
2000-02-03  34.375000    35.333332    44.312500    42.062500    42.750000   
2000-02-04  36.500000    34.375000    35.333332    44.312500    42.062500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    45.770832    49.250000    40.895832    40.333332    35.687500   
2000-02-01    45.291668    45.770832    49.250000    40.895832    40.333332   
2000-02-02    47.791668    45.291668    45.770832    49.250000    40.895832   
2000-02-03    43.416668    47.791668    45.291668    45.770832  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-31  10.93750     11.81250    11.890625    12.156250    11.906250   
2000-02-01  11.09375     10.93750    11.812500    11.890625    12.156250   
2000-02-02  12.62500     11.09375    10.937500    11.812500    11.890625   
2000-02-03  14.00000     12.62500    11.093750    10.937500    11.812500   
2000-02-04  13.65625     14.00000    12.625000    11.093750    10.937500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    11.765625    11.031250     9.968750     8.531250     8.265625   
2000-02-01    11.906250    11.765625    11.031250     9.968750     8.531250   
2000-02-02    12.156250    11.906250    11.765625    11.031250     9.968750   
2000-02-03    11.890625    12.156250    11.906250    11.765625    11.03

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-31  20.500000    20.859375    21.031250    22.125000    22.125000   
2000-02-01  19.796875    20.500000    20.859375    21.031250    22.125000   
2000-02-02  21.000000    19.796875    20.500000    20.859375    21.031250   
2000-02-03  22.218750    21.000000    19.796875    20.500000    20.859375   
2000-02-04  23.750000    22.218750    21.000000    19.796875    20.500000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31    24.093750     24.59375    22.140625    21.093750    21.218750   
2000-02-01    22.125000     24.09375    24.593750    22.140625    21.093750   
2000-02-02    22.125000     22.12500    24.093750    24.593750    22.140625   
2000-02-03    21.031250     22.12500    22.125000    24.093750  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


             Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                     
2000-01-31  130.00        135.0       138.75       140.00       132.50   
2000-02-01  132.50        130.0       135.00       138.75       140.00   
2000-02-02  140.00        132.5       130.00       135.00       138.75   
2000-02-03  137.50        140.0       132.50       130.00       135.00   
2000-02-04  143.75        137.5       140.00       132.50       130.00   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31       130.00       145.00       123.75       107.50       105.00   
2000-02-01       132.50       130.00       145.00       123.75       107.50   
2000-02-02       140.00       132.50       130.00       145.00       123.75   
2000-02-03       138.75       140.00       132.50       130.00       145.00   
2000-0

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-31   55.0         53.0         56.0         57.0         59.0   
2000-02-01   56.0         55.0         53.0         56.0         57.0   
2000-02-02   60.0         56.0         55.0         53.0         56.0   
2000-02-03   69.0         60.0         56.0         55.0         53.0   
2000-02-04   66.0         69.0         60.0         56.0         55.0   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-31         58.0         56.0         51.0         51.0         60.0   
2000-02-01         59.0         58.0         56.0         51.0         51.0   
2000-02-02         57.0         59.0         58.0         56.0         51.0   
2000-02-03         56.0         57.0         59.0         58.0         56.0   
2000-02-04   

In [43]:
# technical indicators using only past 15 days closing price, past 15 days EMA, past 15 days RSI, and past 15 days Bolinger Bands
# past 15 days OBV, SAR
def compile_data():
    df_dict = {}
    with open('tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        print(ticker)
        df = pd.read_csv('data/initial/{}.csv'.format(ticker))
        df  = df.dropna()
        
        if(ticker == 'MIJ.SI'):
            df['Close'] = df['Close'].astype(float)
            df['Volume'] = df['Volume'].astype(float)
            print(df.info())
            
        high_0 = df['High'].values
        high_1 = df['High'].shift(1).values
        high_2 = df['High'].shift(2).values
        high_3 = df['High'].shift(3).values
        high_4 = df['High'].shift(4).values
        high_5 = df['High'].shift(5).values
        high_6 = df['High'].shift(6).values
        high_7 = df['High'].shift(7).values
        high_8 = df['High'].shift(8).values
        high_9 = df['High'].shift(9).values
        high_10 = df['High'].shift(10).values
        high_11 = df['High'].shift(11).values
        high_12 = df['High'].shift(12).values
        high_13 = df['High'].shift(13).values
        high_14 = df['High'].shift(14).values
        
        low_0 = df['Low'].values
        low_1 = df['Low'].shift(1).values
        low_2 = df['Low'].shift(2).values
        low_3 = df['Low'].shift(3).values
        low_4 = df['Low'].shift(4).values
        low_5 = df['Low'].shift(5).values
        low_6 = df['Low'].shift(6).values
        low_7 = df['Low'].shift(7).values
        low_8 = df['Low'].shift(8).values
        low_9 = df['Low'].shift(9).values
        low_10 = df['Low'].shift(10).values
        low_11 = df['Low'].shift(11).values
        low_12 = df['Low'].shift(12).values
        low_13 = df['Low'].shift(13).values
        low_14 = df['Low'].shift(14).values
        
        close_0 = df['Close'].values
        close_1 = df['Close'].shift(1).values
        close_2 = df['Close'].shift(2).values
        close_3 = df['Close'].shift(3).values
        close_4 = df['Close'].shift(4).values
        close_5 = df['Close'].shift(5).values
        close_6 = df['Close'].shift(6).values
        close_7 = df['Close'].shift(7).values
        close_8 = df['Close'].shift(8).values
        close_9 = df['Close'].shift(9).values
        close_10 = df['Close'].shift(10).values
        close_11 = df['Close'].shift(11).values
        close_12 = df['Close'].shift(12).values
        close_13 = df['Close'].shift(13).values
        close_14 = df['Close'].shift(14).values
        
        volume_0 = df['Volume'].values
        volume_1 = df['Volume'].shift(1).values
        volume_2 = df['Volume'].shift(2).values
        volume_3 = df['Volume'].shift(3).values
        volume_4 = df['Volume'].shift(4).values
        volume_5 = df['Volume'].shift(5).values
        volume_6 = df['Volume'].shift(6).values
        volume_7 = df['Volume'].shift(7).values
        volume_8 = df['Volume'].shift(8).values
        volume_9 = df['Volume'].shift(9).values
        volume_10 = df['Volume'].shift(10).values
        volume_11 = df['Volume'].shift(11).values
        volume_12 = df['Volume'].shift(12).values
        volume_13 = df['Volume'].shift(13).values
        volume_14 = df['Volume'].shift(14).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)
        df['previous_2d'] = df['Close'].shift(2)
        df['previous_3d'] = df['Close'].shift(3)
        df['previous_4d'] = df['Close'].shift(4)
        df['previous_5d'] = df['Close'].shift(5)
        df['previous_6d'] = df['Close'].shift(6)
        df['previous_7d'] = df['Close'].shift(7)
        df['previous_8d'] = df['Close'].shift(8)
        df['previous_9d'] = df['Close'].shift(9)
        df['previous_10d'] = df['Close'].shift(10)
        df['previous_11d'] = df['Close'].shift(11)
        df['previous_12d'] = df['Close'].shift(12)
        df['previous_13d'] = df['Close'].shift(13)
        df['previous_14d'] = df['Close'].shift(14)

        for i in range(6,21):
            ema = EMA(close_0, timeperiod=i)
            arr = np.array(ema)
            df['EMA_{}'.format(i)] = arr

            
        for i in range(6,21):
            upperband, middleband, lowerband = BBANDS(close_0, timeperiod=i, nbdevup=2, nbdevdn=2, matype=0)
            arr_1 = np.array(upperband)
            arr_2 = np.array(middleband)
            arr_3 = np.array(lowerband)
            df['BBAND_upper_{}'.format(i)] = arr_1
            df['BBAND_middle_{}'.format(i)] = arr_2
            df['BBAND_lower_{}'.format(i)] = arr_3
         
        df_dates = df['Date']
        df.set_index('Date', inplace=True)
        
        df_padding_left = pd.DataFrame()
        df_padding_right = pd.DataFrame()
        df_padding_left['Date'] = df_dates
        df_padding_right['Date'] = df_dates
        #print(df_padding_left.info())
        for i in range(0,75):
            df_padding_left['padding_left{}'.format(i+1)] = np.zeros(len(df_padding_left))
            df_padding_right['padding_right{}'.format(i+1)] = np.zeros(len(df_padding_right))
            
        df_padding_left.set_index('Date', inplace=True)
        df_padding_right.set_index('Date', inplace=True)
        
        df_cnn = pd.concat([df_padding_left,df,df_padding_right],axis=1)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df_cnn['labels'] = labels
        df  = df.dropna()
        df_cnn = df_cnn.dropna()
        
        print(df.head())
        print(df_cnn.head())
        df.to_csv('data/selected_indicators_4/LSTM/{}_data.csv'.format(ticker))
        df_cnn.to_csv('data/selected_indicators_4/CNN/{}_data.csv'.format(ticker))
    
compile_data()

D05.SI
         Date      Close
0  2000-01-03  27.299999
1  2000-01-04  26.299999
2  2000-01-05  24.600000
3  2000-01-06  24.100000
4  2000-01-07  25.000000
                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-03  27.299999          NaN          NaN          NaN          NaN   
2000-01-04  26.299999    27.299999          NaN          NaN          NaN   
2000-01-05  24.600000    26.299999    27.299999          NaN          NaN   
2000-01-06  24.100000    24.600000    26.299999    27.299999          NaN   
2000-01-07  25.000000    24.100000    24.600000    26.299999    27.299999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-03          NaN          NaN          NaN          NaN          NaN   
2000-01-04          NaN          NaN          NaN          NaN    

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-28  22.799999    23.700001    22.400000    21.700001    21.400000   
2000-01-31  22.799999    22.799999    23.700001    22.400000    21.700001   
2000-02-01  22.900000    22.799999    22.799999    23.700001    22.400000   
2000-02-02  23.700001    22.900000    22.799999    22.799999    23.700001   
2000-02-03  23.799999    23.700001    22.900000    22.799999    22.799999   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28    21.900000    22.700001    22.000000    22.600000    23.000000   
2000-01-31    21.400000    21.900000    22.700001    22.000000    22.600000   
2000-02-01    21.700001    21.400000    21.900000    22.700001    22.000000   
2000-02-02    22.400000    21.700001    21.400000    21.900000  

HBox(children=(FloatProgress(value=0.0, max=5066.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-28  7.27906      7.23053      6.89084      6.74526      6.79379   
2000-01-31  7.03642      7.27906      7.23053      6.89084      6.74526   
2000-02-01  6.79379      7.03642      7.27906      7.23053      6.89084   
2000-02-02  6.89084      6.79379      7.03642      7.27906      7.23053   
2000-02-03  6.69673      6.89084      6.79379      7.03642      7.27906   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28      6.89084      6.79379      6.69673      6.98790      7.03642   
2000-01-31      6.79379      6.89084      6.79379      6.69673      6.98790   
2000-02-01      6.74526      6.79379      6.89084      6.79379      6.69673   
2000-02-02      6.89084      6.74526      6.79379      6.89084      6.79379   

None
Calculating labels


HBox(children=(FloatProgress(value=0.0, max=5068.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-28   13.0         13.5         13.1         12.8         13.4   
2000-01-31   12.5         13.0         13.5         13.1         12.8   
2000-02-01   12.5         12.5         13.0         13.5         13.1   
2000-02-02   12.9         12.5         12.5         13.0         13.5   
2000-02-03   12.7         12.9         12.5         12.5         13.0   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28         14.0         14.2         13.6         14.2         14.3   
2000-01-31         13.4         14.0         14.2         13.6         14.2   
2000-02-01         12.8         13.4         14.0         14.2         13.6   
2000-02-02         13.1         12.8         13.4         14.0         14.2   
2000-02-03   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-28  51.125000    53.791668    52.291668    49.750000    48.666668   
2000-01-31  53.791668    51.125000    53.791668    52.291668    49.750000   
2000-02-01  55.833332    53.791668    51.125000    53.791668    52.291668   
2000-02-02  54.916668    55.833332    53.791668    51.125000    53.791668   
2000-02-03  55.458332    54.916668    55.833332    53.791668    51.125000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28    48.625000    49.375000    49.416668    47.333332    49.250000   
2000-01-31    48.666668    48.625000    49.375000    49.416668    47.333332   
2000-02-01    49.750000    48.666668    48.625000    49.375000    49.416668   
2000-02-02    52.291668    49.750000    48.666668    48.625000  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-28  22.87500     24.06250     23.56250     22.56250     22.28125   
2000-01-31  24.21875     22.87500     24.06250     23.56250     22.56250   
2000-02-01  24.50000     24.21875     22.87500     24.06250     23.56250   
2000-02-02  23.87500     24.50000     24.21875     22.87500     24.06250   
2000-02-03  23.21875     23.87500     24.50000     24.21875     22.87500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28     22.84375    23.539062    24.000000    24.000000    25.250000   
2000-01-31     22.28125    22.843750    23.539062    24.000000    24.000000   
2000-02-01     22.56250    22.281250    22.843750    23.539062    24.000000   
2000-02-02     23.56250    22.562500    22.281250    22.843750    23.53

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-28  19.37500     19.50000     19.00000      18.8125      17.9375   
2000-01-31  20.00000     19.37500     19.50000      19.0000      18.8125   
2000-02-01  20.21875     20.00000     19.37500      19.5000      19.0000   
2000-02-02  19.50000     20.21875     20.00000      19.3750      19.5000   
2000-02-03  19.43750     19.50000     20.21875      20.0000      19.3750   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28      18.3125      18.1875      18.3750      19.4375     20.84375   
2000-01-31      17.9375      18.3125      18.1875      18.3750     19.43750   
2000-02-01      18.8125      17.9375      18.3125      18.1875     18.37500   
2000-02-02      19.0000      18.8125      17.9375      18.3125     18.1

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-28  420.93750     442.5000     435.9375     419.0625    413.90625   
2000-01-31  427.50000     420.9375     442.5000     435.9375    419.06250   
2000-02-01  436.87500     427.5000     420.9375     442.5000    435.93750   
2000-02-02  433.12500     436.8750     427.5000     420.9375    442.50000   
2000-02-03  422.34375     433.1250     436.8750     427.5000    420.93750   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28    423.28125   439.570312   448.125000   438.750000   435.000000   
2000-01-31    413.90625   423.281250   439.570312   448.125000   438.750000   
2000-02-01    419.06250   413.906250   423.281250   439.570312   448.125000   
2000-02-02    435.93750   419.062500   413.906250   423.281250  

HBox(children=(FloatProgress(value=0.0, max=5070.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2000-01-28  0.24000      0.23833      0.23333      0.23000      0.23167   
2000-01-31  0.23500      0.24000      0.23833      0.23333      0.23000   
2000-02-01  0.23833      0.23500      0.24000      0.23833      0.23333   
2000-02-02  0.23500      0.23833      0.23500      0.24000      0.23833   
2000-02-03  0.24000      0.23500      0.23833      0.23500      0.24000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28      0.23833      0.23833      0.24000      0.24000      0.24167   
2000-01-31      0.23167      0.23833      0.23833      0.24000      0.24000   
2000-02-01      0.23000      0.23167      0.23833      0.23833      0.24000   
2000-02-02      0.23333      0.23000      0.23167      0.23833      0.23833   

HBox(children=(FloatProgress(value=0.0, max=149.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2019-06-28  0.195        0.195        0.190        0.198        0.200   
2019-07-01  0.190        0.195        0.195        0.190        0.198   
2019-07-02  0.190        0.190        0.195        0.195        0.190   
2019-07-03  0.190        0.190        0.190        0.195        0.195   
2019-07-04  0.190        0.190        0.190        0.190        0.195   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2019-06-28        0.200        0.200        0.200          0.2          0.2   
2019-07-01        0.200        0.200        0.200          0.2          0.2   
2019-07-02        0.198        0.200        0.200          0.2          0.2   
2019-07-03        0.190        0.198        0.200          0.2          0.2   
2019-07-04   

HBox(children=(FloatProgress(value=0.0, max=2885.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2008-07-31  0.13365      0.12933      0.12933      0.12933      0.13365   
2008-08-01  0.13365      0.13365      0.12933      0.12933      0.12933   
2008-08-04  0.12502      0.13365      0.13365      0.12933      0.12933   
2008-08-05  0.12933      0.12502      0.13365      0.13365      0.12933   
2008-08-06  0.12502      0.12933      0.12502      0.13365      0.13365   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2008-07-31      0.13365      0.14227      0.12933      0.13365      0.13796   
2008-08-01      0.13365      0.13365      0.14227      0.12933      0.13365   
2008-08-04      0.12933      0.13365      0.13365      0.14227      0.12933   
2008-08-05      0.12933      0.12933      0.13365      0.13365      0.14227   

HBox(children=(FloatProgress(value=0.0, max=2520.0), HTML(value='')))


              Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                      
2009-08-20  0.22667      0.22194      0.21722      0.22194      0.24083   
2009-08-21  0.22194      0.22667      0.22194      0.21722      0.22194   
2009-08-24  0.22194      0.22194      0.22667      0.22194      0.21722   
2009-08-25  0.23611      0.22194      0.22194      0.22667      0.22194   
2009-08-26  0.23139      0.23611      0.22194      0.22194      0.22667   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2009-08-20      0.24556      0.24556      0.25500      0.25028      0.25028   
2009-08-21      0.24083      0.24556      0.24556      0.25500      0.25028   
2009-08-24      0.22194      0.24083      0.24556      0.24556      0.25500   
2009-08-25      0.21722      0.22194      0.24083      0.24556      0.24556   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-28  42.750000    43.416668    47.791668    45.291668    45.770832   
2000-01-31  42.062500    42.750000    43.416668    47.791668    45.291668   
2000-02-01  44.312500    42.062500    42.750000    43.416668    47.791668   
2000-02-02  35.333332    44.312500    42.062500    42.750000    43.416668   
2000-02-03  34.375000    35.333332    44.312500    42.062500    42.750000   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28    49.250000    40.895832    40.333332    35.687500    38.500000   
2000-01-31    45.770832    49.250000    40.895832    40.333332    35.687500   
2000-02-01    45.291668    45.770832    49.250000    40.895832    40.333332   
2000-02-02    47.791668    45.291668    45.770832    49.250000  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


               Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                       
2000-01-28  11.81250    11.890625    12.156250    11.906250    11.765625   
2000-01-31  10.93750    11.812500    11.890625    12.156250    11.906250   
2000-02-01  11.09375    10.937500    11.812500    11.890625    12.156250   
2000-02-02  12.62500    11.093750    10.937500    11.812500    11.890625   
2000-02-03  14.00000    12.625000    11.093750    10.937500    11.812500   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28    11.031250     9.968750     8.531250     8.265625     8.656250   
2000-01-31    11.765625    11.031250     9.968750     8.531250     8.265625   
2000-02-01    11.906250    11.765625    11.031250     9.968750     8.531250   
2000-02-02    12.156250    11.906250    11.765625    11.031250     9.96

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


                Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                        
2000-01-28  20.859375    21.031250    22.125000    22.125000    24.093750   
2000-01-31  20.500000    20.859375    21.031250    22.125000    22.125000   
2000-02-01  19.796875    20.500000    20.859375    21.031250    22.125000   
2000-02-02  21.000000    19.796875    20.500000    20.859375    21.031250   
2000-02-03  22.218750    21.000000    19.796875    20.500000    20.859375   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28     24.59375    22.140625    21.093750    21.218750    22.000000   
2000-01-31     24.09375    24.593750    22.140625    21.093750    21.218750   
2000-02-01     22.12500    24.093750    24.593750    22.140625    21.093750   
2000-02-02     22.12500    22.125000    24.093750    24.593750  

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-28  135.0       138.75       140.00       132.50       130.00   
2000-01-31  130.0       135.00       138.75       140.00       132.50   
2000-02-01  132.5       130.00       135.00       138.75       140.00   
2000-02-02  140.0       132.50       130.00       135.00       138.75   
2000-02-03  137.5       140.00       132.50       130.00       135.00   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28       145.00       123.75       107.50       105.00       103.75   
2000-01-31       130.00       145.00       123.75       107.50       105.00   
2000-02-01       132.50       130.00       145.00       123.75       107.50   
2000-02-02       140.00       132.50       130.00       145.00       123.75   
2000-02-03   

HBox(children=(FloatProgress(value=0.0, max=5032.0), HTML(value='')))


            Close  previous_1d  previous_2d  previous_3d  previous_4d  \
Date                                                                    
2000-01-28   53.0         56.0         57.0         59.0         58.0   
2000-01-31   55.0         53.0         56.0         57.0         59.0   
2000-02-01   56.0         55.0         53.0         56.0         57.0   
2000-02-02   60.0         56.0         55.0         53.0         56.0   
2000-02-03   69.0         60.0         56.0         55.0         53.0   

            previous_5d  previous_6d  previous_7d  previous_8d  previous_9d  \
Date                                                                          
2000-01-28         56.0         51.0         51.0         60.0         62.0   
2000-01-31         58.0         56.0         51.0         51.0         60.0   
2000-02-01         59.0         58.0         56.0         51.0         51.0   
2000-02-02         57.0         59.0         58.0         56.0         51.0   
2000-02-03   