In [2]:
import pandas_datareader.data as web
import pandas as pd
import numpy as np
from talib import RSI, BBANDS,WILLR,WMA,SMA,EMA,TEMA,KAMA,CCI,CMO,MACD,PPO,ROC,APO,DX,MOM,STOCH,AROON,ADX,OBV
import matplotlib.pyplot as plt
import requests
import lxml
import datetime as dt
import os


## retrieve stock tickers data

### Cyclic tickers

In [4]:
#use saved tickers in txt file to get the data of STI stocks from yahoo

def get_data_from_yahoo():
    
    with open('v6_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
        
    if not os.path.exists('data/variation 6/initial'):
        os.makedirs('data/variation 6/initial')
        
    start = dt.datetime(2000,1,1)
    end = dt.datetime(2019,1,1)
    
    for ticker in tickers:
        print(ticker)
        if not os.path.exists('data/variation 6/initial/{}.csv'.format(ticker)):
            df = web.DataReader(ticker,'yahoo',start,end)
            df.to_csv('data/variation 6/initial/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))
            
get_data_from_yahoo()

600519.SS
000858.SZ
600779.SS
NVAX
INO
VXRT
JPM
AFG
FBP
AAL
JBLU
HA


In [2]:
#use saved tickers in txt file to get the data of STI stocks from yahoo

def get_data_from_yahoo():
    
    with open('small_cyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
        
    if not os.path.exists('data/cyclic_tickers/initial'):
        os.makedirs('data/cyclic_tickers/initial')
        
    start = dt.datetime(2000,1,1)
    end = dt.datetime(2019,1,1)
    
    for ticker in tickers:
        print(ticker)
        if not os.path.exists('data/cyclic_tickers/initial/{}.csv'.format(ticker)):
            df = web.DataReader(ticker,'yahoo',start,end)
            df.to_csv('data/cyclic_tickers/initial/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))
            
get_data_from_yahoo()

ANF
CTB
AXL


### Non-Cyclic tickers

In [14]:
#use saved tickers in txt file to get the data of STI stocks from yahoo

def get_data_from_yahoo():
    
    with open('noncyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
        
    if not os.path.exists('data/noncyclic_tickers/initial'):
        os.makedirs('data/noncyclic_tickers/initial')
        
    start = dt.datetime(2000,1,1)
    end = dt.datetime(2019,1,1)
    
    for ticker in tickers:
        print(ticker)
        if not os.path.exists('data/noncyclic_tickers/initial/{}.csv'.format(ticker)):
            df = web.DataReader(ticker,'yahoo',start,end)
            df.to_csv('data/noncyclic_tickers/initial/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))
            
get_data_from_yahoo()

600519.SS
Already have 600519.SS
COST
Already have COST
PG
Already have PG
000858.SZ
Already have 000858.SZ
WMT
Already have WMT
INO
Already have INO
NVAX
Already have NVAX
2319.HK
Already have 2319.HK
000333.SZ
Already have 000333.SZ
TGT
Already have TGT
HD
Already have HD
KO
Already have KO
PEP


In [3]:
#use saved tickers in txt file to get the data of STI stocks from yahoo

def get_data_from_yahoo():
    
    with open('small_noncyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
        
    if not os.path.exists('data/noncyclic_tickers/initial'):
        os.makedirs('data/noncyclic_tickers/initial')
        
    start = dt.datetime(2000,1,1)
    end = dt.datetime(2019,1,1)
    
    for ticker in tickers:
        print(ticker)
        if not os.path.exists('data/noncyclic_tickers/initial/{}.csv'.format(ticker)):
            df = web.DataReader(ticker,'yahoo',start,end)
            df.to_csv('data/noncyclic_tickers/initial/{}.csv'.format(ticker))
        else:
            print('Already have {}'.format(ticker))
            
get_data_from_yahoo()

VGR
FDP
WMK


## Function to create labels

In [5]:
import os
import re
from operator import itemgetter

import pandas as pd
import pickle
import numpy as np
from sklearn.feature_selection import SelectKBest, f_classif, mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.utils import compute_class_weight
from tqdm.auto import tqdm

def create_labels(df, col_name, window_size=15):
        """
        Data is labeled as per the logic in research paper
        Label code : BUY => 1, SELL => 0, HOLD => 2
        params :
            df => Dataframe with data
            col_name => name of column which should be used to determine strategy
        returns : numpy array with integer codes for labels with
                  size = total-(window_size)+1
        """
        row_counter = 0
        total_rows = len(df)
        labels = np.zeros(total_rows)
        labels[:] = np.nan
        print("Calculating labels")
        pbar = tqdm(total=total_rows)

        while row_counter < total_rows:
            if row_counter >= window_size - 1:
                window_begin = row_counter - (window_size - 1)
                window_end = row_counter
                window_middle = (window_begin + window_end) / 2
                window_middle = int(window_middle)

                min_ = np.inf
                min_index = -1
                max_ = -np.inf
                max_index = -1
                for i in range(window_begin, window_end + 1):
                    price = df.iloc[i][col_name]
                    if price < min_:
                        min_ = price
                        min_index = i
                    if price > max_:
                        max_ = price
                        max_index = i
                        
                end_price = df.iloc[window_end][col_name]
                mid_price = df.iloc[window_middle][col_name]

                if max_index == window_middle:
                    labels[window_middle] = 0
                elif min_index == window_middle:
                    labels[window_middle] = 1
                else:
                    labels[window_middle] = 2

            row_counter = row_counter + 1
            pbar.update(1)

        pbar.close()
        return labels

## Generate technical indicators

### Cyclic indicators

In [6]:
def compile_data():
    with open('v6_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('data/variation 6/initial/{}.csv'.format(ticker))
        df  = df.dropna()
            
        high = df['High'].shift(1).values
        low = df['Low'].shift(1).values
        close = df['Close'].shift(1).values
        volume = df['Volume'].shift(1).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)


        ema = EMA(close, timeperiod=12)
        arr = np.array(ema)
        df['EMA_12'] = arr


        willr = WILLR(high, low, close, timeperiod=14)
        arr = np.array(willr)
        df['willr_14'] = arr
            
        obv = OBV(close, volume)
        arr = np.array(obv)
        df['OBV_7'] = arr

        df.set_index('Date', inplace=True)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df.drop(['Close'], 1, inplace=True)
        df  = df.dropna()
        
        print(df.head())
        df.to_csv('data/variation 6/initial_indicators/{}_data.csv'.format(ticker))
    
compile_data()

         Date     Close
0  2001-08-27  7.074920
1  2001-08-28  7.335628
2  2001-08-29  7.240101
3  2001-08-30  7.383391
4  2001-08-31  7.365480
               Close  previous_1d  EMA_12  willr_14        OBV_7
Date                                                            
2001-08-27  7.074920          NaN     NaN       NaN          NaN
2001-08-28  7.335628     7.074920     NaN       NaN  204166311.0
2001-08-29  7.240101     7.335628     NaN       NaN  269311618.0
2001-08-30  7.383391     7.240101     NaN       NaN  242553224.0
2001-08-31  7.365480     7.383391     NaN       NaN  266678784.0
<class 'pandas.core.frame.DataFrame'>
Index: 4141 entries, 2001-08-27 to 2018-12-28
Data columns (total 5 columns):
Close          4141 non-null float64
previous_1d    4140 non-null float64
EMA_12         4129 non-null float64
willr_14       4127 non-null float64
OBV_7          4140 non-null float64
dtypes: float64(5)
memory usage: 194.1+ KB
None
Calculating labels


HBox(children=(FloatProgress(value=0.0, max=4141.0), HTML(value='')))


            previous_1d    EMA_12   willr_14        OBV_7  labels
Date                                                             
2001-09-14     7.142585  7.257483 -41.873824  204624213.0     2.0
2001-09-17     7.224180  7.252359 -51.296847  208479649.0     2.0
2001-09-18     7.132634  7.233940 -72.964156  203965392.0     2.0
2001-09-19     7.214230  7.230908 -59.609037  209378730.0     2.0
2001-09-20     7.347568  7.248855 -37.785092  213725440.0     2.0
         Date     Close
0  2000-01-04  4.053544
1  2000-01-05  4.091251
2  2000-01-06  4.255548
3  2000-01-07  4.405031
4  2000-01-10  4.418498
               Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                           
2000-01-04  4.053544          NaN     NaN       NaN         NaN
2000-01-05  4.091251     4.053544     NaN       NaN   5211144.0
2000-01-06  4.255548     4.091251     NaN       NaN  16146401.0
2000-01-07  4.405031     4.255548     NaN       NaN  29138906.0
2000-01-1

HBox(children=(FloatProgress(value=0.0, max=4533.0), HTML(value='')))


            previous_1d    EMA_12   willr_14        OBV_7  labels
Date                                                             
2000-01-24     4.396951  4.259450 -20.237909   91957609.0     2.0
2000-01-25     4.523540  4.300079 -16.033642  115224048.0     2.0
2000-01-26     4.400991  4.315604 -35.231971  105394663.0     2.0
2000-01-27     4.399644  4.328533 -51.735021   77180955.0     2.0
2000-01-28     4.302682  4.324556 -63.091500   43145413.0     2.0
         Date      Close
0  2000-01-04  13.909090
1  2000-01-05  13.787878
2  2000-01-06  14.048484
3  2000-01-07  14.424242
4  2000-01-10  14.363636
                Close  previous_1d  EMA_12  willr_14      OBV_7
Date                                                           
2000-01-04  13.909090          NaN     NaN       NaN        NaN
2000-01-05  13.787878    13.909090     NaN       NaN  1496391.0
2000-01-06  14.048484    13.787878     NaN       NaN   492604.0
2000-01-07  14.424242    14.048484     NaN       NaN  2145098.0
200

HBox(children=(FloatProgress(value=0.0, max=4552.0), HTML(value='')))


            previous_1d     EMA_12   willr_14      OBV_7  labels
Date                                                            
2000-01-24    14.078787  14.004494 -39.930588 -2698175.0     2.0
2000-01-25    14.727272  14.115691  -3.361342  2489423.0     2.0
2000-01-26    14.787878  14.219104 -35.135148  7249607.0     2.0
2000-01-27    14.284848  14.229219 -57.567561  4991464.0     2.0
2000-01-28    14.412121  14.257357 -51.891890  7816168.0     2.0
         Date  Close
0  1999-12-31  112.5
1  2000-01-03  115.0
2  2000-01-04  110.0
3  2000-01-05  107.5
4  2000-01-06  112.5
            Close  previous_1d  EMA_12  willr_14   OBV_7
Date                                                    
1999-12-31  112.5          NaN     NaN       NaN     NaN
2000-01-03  115.0        112.5     NaN       NaN   115.0
2000-01-04  110.0        115.0     NaN       NaN  1240.0
2000-01-05  107.5        110.0     NaN       NaN   630.0
2000-01-06  112.5        107.5     NaN       NaN   175.0
<class 'pandas.core

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d      EMA_12   willr_14    OBV_7  labels
Date                                                           
2000-01-21       123.75  111.044132  -0.000000    330.0     0.0
2000-01-24       145.00  116.268112  -9.090909  10530.0     2.0
2000-01-25       130.00  118.380710 -36.363636   6930.0     2.0
2000-01-26       132.50  120.552908 -31.818182   9665.0     2.0
2000-01-27       140.00  123.544769 -18.181818  11870.0     2.0
         Date  Close
0  1999-12-31   52.0
1  2000-01-03   54.0
2  2000-01-04   50.0
3  2000-01-05   49.0
4  2000-01-06   48.0
            Close  previous_1d  EMA_12  willr_14   OBV_7
Date                                                    
1999-12-31   52.0          NaN     NaN       NaN     NaN
2000-01-03   54.0         52.0     NaN       NaN    50.0
2000-01-04   50.0         54.0     NaN       NaN  1306.0
2000-01-05   49.0         50.0     NaN       NaN -1794.0
2000-01-06   48.0         49.0     NaN       NaN -6013.0
<class 'pandas.core.frame.

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14    OBV_7  labels
Date                                                          
2000-01-21         51.0  54.535133 -80.952381 -17732.0     2.0
2000-01-24         56.0  54.760497 -57.142857 -10257.0     2.0
2000-01-25         58.0  55.258882 -47.619048  -8463.0     2.0
2000-01-26         59.0  55.834439 -42.857143  -4944.0     2.0
2000-01-27         57.0  56.013756 -52.380952  -5313.0     2.0
         Date   Close
0  1999-12-31  305.25
1  2000-01-03  313.50
2  2000-01-04  297.00
3  2000-01-05  330.00
4  2000-01-06  330.00
             Close  previous_1d  EMA_12  willr_14   OBV_7
Date                                                     
1999-12-31  305.25          NaN     NaN       NaN     NaN
2000-01-03  313.50       305.25     NaN       NaN  1298.0
2000-01-04  297.00       313.50     NaN       NaN  2357.0
2000-01-05  330.00       297.00     NaN       NaN  1219.0
2000-01-06  330.00       330.00     NaN       NaN  3369.0
<class 'pandas.core.

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d      EMA_12   willr_14    OBV_7  labels
Date                                                           
2000-01-21      437.250  350.035133  -6.976744  42909.0     0.0
2000-01-24      515.625  375.510497 -10.606061  67630.0     2.0
2000-01-25      424.875  383.105036 -43.939394  53407.0     2.0
2000-01-26      453.750  393.973492 -33.333333  57201.0     2.0
2000-01-27      420.750  398.092955 -50.000000  54669.0     2.0
         Date      Close
0  1999-12-31  51.791668
1  2000-01-03  48.583332
2  2000-01-04  47.250000
3  2000-01-05  46.958332
4  2000-01-06  47.625000
                Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                            
1999-12-31  51.791668          NaN     NaN       NaN         NaN
2000-01-03  48.583332    51.791668     NaN       NaN   1160400.0
2000-01-04  47.250000    48.583332     NaN       NaN -10858800.0
2000-01-05  46.958332    47.250000     NaN       NaN -22582200.0
2000-01-06 

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21    49.375000  48.388499 -42.592593  -2894250.0     2.0
2000-01-24    48.625000  48.424884 -39.516144  -8054850.0     2.0
2000-01-25    48.666668  48.462081 -38.709668  -1595700.0     2.0
2000-01-26    49.750000  48.660222 -17.741956   5984250.0     2.0
2000-01-27    52.291668  49.218906  -5.232557  14370900.0     2.0
         Date      Close
0  1999-12-31  17.583332
1  2000-01-03  16.875000
2  2000-01-04  16.500000
3  2000-01-05  16.083332
4  2000-01-06  16.500000
                Close  previous_1d  EMA_12  willr_14     OBV_7
Date                                                          
1999-12-31  17.583332          NaN     NaN       NaN       NaN
2000-01-03  16.875000    17.583332     NaN       NaN   48450.0
2000-01-04  16.500000    16.875000     NaN       NaN  -10500.0
2000-01-05  16.083332    16.500000     NaN       NaN  -83850.0
2000-01-0

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12    willr_14      OBV_7  labels
Date                                                             
2000-01-21    15.000000  16.189452 -100.000000  -550650.0     2.0
2000-01-24    15.166667  16.032100  -93.103439  -477300.0     2.0
2000-01-25    14.875000  15.854085  -92.187507  -762150.0     2.0
2000-01-26    14.708333  15.677815  -95.454544 -1036650.0     2.0
2000-01-27    14.666667  15.522254  -88.888864 -1157250.0     2.0
         Date     Close
0  1999-12-31  103.7500
1  2000-01-03  102.1875
2  2000-01-04  101.5625
3  2000-01-05  100.9375
4  2000-01-06  102.1875
               Close  previous_1d  EMA_12  willr_14    OBV_7
Date                                                        
1999-12-31  103.7500          NaN     NaN       NaN      NaN
2000-01-03  102.1875     103.7500     NaN       NaN   2520.0
2000-01-04  101.5625     102.1875     NaN       NaN   -140.0
2000-01-05  100.9375     101.5625     NaN       NaN  -5620.0
2000-01-06  102.1875     10

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d      EMA_12  willr_14    OBV_7  labels
Date                                                          
2000-01-21     100.0000  100.813609     -64.0 -21460.0     2.0
2000-01-24      97.1875  100.255746    -100.0 -28200.0     2.0
2000-01-25      96.2500   99.639478    -100.0 -32420.0     2.0
2000-01-26      93.1250   98.637250    -100.0 -42580.0     2.0
2000-01-27      94.0625   97.933443     -90.0 -36840.0     2.0
         Date      Close
0  2005-09-27  19.299999
1  2005-09-28  20.500000
2  2005-09-29  20.209999
3  2005-09-30  21.010000
4  2005-10-03  21.500000
                Close  previous_1d  EMA_12  willr_14      OBV_7
Date                                                           
2005-09-27  19.299999          NaN     NaN       NaN        NaN
2005-09-28  20.500000    19.299999     NaN       NaN   961200.0
2005-09-29  20.209999    20.500000     NaN       NaN  6709100.0
2005-09-30  21.010000    20.209999     NaN       NaN  5630900.0
2005-10-03  21.500000   

HBox(children=(FloatProgress(value=0.0, max=3338.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2005-10-17    22.270000  21.619546 -18.717939  12563700.0     2.0
2005-10-18    22.080000  21.690385 -24.210533  12099700.0     2.0
2005-10-19    21.490000  21.659557 -52.068980  10284200.0     2.0
2005-10-20    21.379999  21.616548 -57.446844   8818200.0     1.0
2005-10-21    21.020000  21.524771 -92.093018   8177300.0     2.0
         Date      Close
0  2002-04-12  13.333333
1  2002-04-15  13.395556
2  2002-04-16  13.573333
3  2002-04-17  13.362963
4  2002-04-18  13.096296
                Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                            
2002-04-12  13.333333          NaN     NaN       NaN         NaN
2002-04-15  13.395556    13.333333     NaN       NaN  47796750.0
2002-04-16  13.573333    13.395556     NaN       NaN  56012513.0
2002-04-17  13.362963    13.573333     NaN       NaN  59969363

HBox(children=(FloatProgress(value=0.0, max=4210.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2002-05-02    14.761481  13.724947  -5.141986  58104336.0     2.0
2002-05-03    14.562963  13.853872 -13.413429  57315261.0     2.0
2002-05-06    15.419259  14.094701  -0.086583  58757399.0     0.0
2002-05-07    16.148148  14.410616  -4.436861  61163774.0     2.0
2002-05-08    14.696296  14.454567 -37.883964  57784724.0     2.0
         Date   Close
0  1999-12-31  2.1250
1  2000-01-03  2.1250
2  2000-01-04  2.0625
3  2000-01-05  2.0625
4  2000-01-06  2.0000
             Close  previous_1d  EMA_12  willr_14     OBV_7
Date                                                       
1999-12-31  2.1250          NaN     NaN       NaN       NaN
2000-01-03  2.1250       2.1250     NaN       NaN  200900.0
2000-01-04  2.0625       2.1250     NaN       NaN  200900.0
2000-01-05  2.0625       2.0625     NaN       NaN  180900.0
2000-01-06  2.0000       2.0625     NaN      

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d    EMA_12    willr_14     OBV_7  labels
Date                                                           
2000-01-21       2.0625  2.055042  -66.666667  186300.0     2.0
2000-01-24       2.0000  2.046574 -100.000000  173400.0     2.0
2000-01-25       1.9375  2.029793 -100.000000  105600.0     2.0
2000-01-26       1.9375  2.015594  -75.000000  105600.0     2.0
2000-01-27       2.0000  2.013195  -50.000000  121200.0     2.0


In [5]:
def compile_data():
    with open('small_cyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('data/cyclic_tickers/initial/{}.csv'.format(ticker))
        df  = df.dropna()
            
        high = df['High'].shift(1).values
        low = df['Low'].shift(1).values
        close = df['Close'].shift(1).values
        volume = df['Volume'].shift(1).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)


        ema = EMA(close, timeperiod=12)
        arr = np.array(ema)
        df['EMA_12'] = arr


        willr = WILLR(high, low, close, timeperiod=14)
        arr = np.array(willr)
        df['willr_14'] = arr
            
        obv = OBV(close, volume)
        arr = np.array(obv)
        df['OBV_7'] = arr

        df.set_index('Date', inplace=True)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df.drop(['Close'], 1, inplace=True)
        df  = df.dropna()
        
        print(df.head())
        df.to_csv('data/cyclic_tickers/initial_indicators/{}_data.csv'.format(ticker))
    
compile_data()

         Date    Close
0  1999-12-31  26.6875
1  2000-01-03  27.5000
2  2000-01-04  25.9375
3  2000-01-05  26.6250
4  2000-01-06  25.2500
              Close  previous_1d  EMA_12  willr_14      OBV_7
Date                                                         
1999-12-31  26.6875          NaN     NaN       NaN        NaN
2000-01-03  27.5000      26.6875     NaN       NaN   480500.0
2000-01-04  25.9375      27.5000     NaN       NaN  1386600.0
2000-01-05  26.6250      25.9375     NaN       NaN   708100.0
2000-01-06  25.2500      26.6250     NaN       NaN  1480800.0
<class 'pandas.core.frame.DataFrame'>
Index: 4780 entries, 1999-12-31 to 2018-12-31
Data columns (total 5 columns):
Close          4780 non-null float64
previous_1d    4779 non-null float64
EMA_12         4768 non-null float64
willr_14       4766 non-null float64
OBV_7          4779 non-null float64
dtypes: float64(5)
memory usage: 224.1+ KB
None
Calculating labels


HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21      21.5000  24.211693 -82.142857  -6805500.0     2.0
2000-01-24      20.2500  23.602201 -92.465753  -8986800.0     1.0
2000-01-25      20.1875  23.076863 -92.307692 -11383100.0     2.0
2000-01-26      20.8750  22.738115 -82.786885  -9392500.0     2.0
2000-01-27      22.8750  22.759174 -56.557377  -6811000.0     2.0
         Date    Close
0  1999-12-31  15.7500
1  2000-01-03  15.3125
2  2000-01-04  15.0625
3  2000-01-05  15.0625
4  2000-01-06  14.8750
              Close  previous_1d  EMA_12  willr_14     OBV_7
Date                                                        
1999-12-31  15.7500          NaN     NaN       NaN       NaN
2000-01-03  15.3125      15.7500     NaN       NaN  218300.0
2000-01-04  15.0625      15.3125     NaN       NaN   10000.0
2000-01-05  15.0625      15.0625     NaN       NaN -132300.0
2000-01-06  14.8750      15.0625 

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12    willr_14     OBV_7  labels
Date                                                            
2000-01-21      13.7500  14.817184  -97.297297 -827200.0     2.0
2000-01-24      14.0625  14.701079  -77.500000 -518900.0     2.0
2000-01-25      13.6875  14.545144  -92.500000 -906100.0     2.0
2000-01-26      13.7500  14.422814  -90.000000 -652700.0     2.0
2000-01-27      13.5000  14.280843 -100.000000 -848400.0     2.0
         Date    Close
0  1999-12-31  12.1250
1  2000-01-03  12.0000
2  2000-01-04  12.3125
3  2000-01-05  12.6875
4  2000-01-06  12.8750
              Close  previous_1d  EMA_12  willr_14    OBV_7
Date                                                       
1999-12-31  12.1250          NaN     NaN       NaN      NaN
2000-01-03  12.0000      12.1250     NaN       NaN  24800.0
2000-01-04  12.3125      12.0000     NaN       NaN  -1400.0
2000-01-05  12.6875      12.3125     NaN       NaN  37100.0
2000-01-06  12.8750      12.6875     NaN      

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14     OBV_7  labels
Date                                                           
2000-01-21      16.0625  14.895987 -18.478261  298700.0     2.0
2000-01-24      15.8750  15.046605 -22.222222  293100.0     2.0
2000-01-25      15.7500  15.154819 -26.190476  286300.0     2.0
2000-01-26      15.7500  15.246386 -29.729730  286300.0     2.0
2000-01-27      16.0000  15.362326 -25.000000  311700.0     0.0


### Non-cyclic Indicators

In [16]:
def compile_data():
    with open('noncyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('data/noncyclic_tickers/initial/{}.csv'.format(ticker))
        df  = df.dropna()
            
        high = df['High'].shift(1).values
        low = df['Low'].shift(1).values
        close = df['Close'].shift(1).values
        volume = df['Volume'].shift(1).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)


        ema = EMA(close, timeperiod=12)
        arr = np.array(ema)
        df['EMA_12'] = arr


        willr = WILLR(high, low, close, timeperiod=14)
        arr = np.array(willr)
        df['willr_14'] = arr
            
        obv = OBV(close, volume)
        arr = np.array(obv)
        df['OBV_7'] = arr

        df.set_index('Date', inplace=True)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df.drop(['Close'], 1, inplace=True)
        df  = df.dropna()
        
        print(df.head())
        df.to_csv('data/noncyclic_tickers/initial_indicators/{}_data.csv'.format(ticker))
    
compile_data()

         Date     Close
0  2001-08-27  7.074920
1  2001-08-28  7.335628
2  2001-08-29  7.240101
3  2001-08-30  7.383391
4  2001-08-31  7.365480
               Close  previous_1d  EMA_12  willr_14        OBV_7
Date                                                            
2001-08-27  7.074920          NaN     NaN       NaN          NaN
2001-08-28  7.335628     7.074920     NaN       NaN  204166311.0
2001-08-29  7.240101     7.335628     NaN       NaN  269311618.0
2001-08-30  7.383391     7.240101     NaN       NaN  242553224.0
2001-08-31  7.365480     7.383391     NaN       NaN  266678784.0
<class 'pandas.core.frame.DataFrame'>
Index: 4141 entries, 2001-08-27 to 2018-12-28
Data columns (total 5 columns):
Close          4141 non-null float64
previous_1d    4140 non-null float64
EMA_12         4129 non-null float64
willr_14       4127 non-null float64
OBV_7          4140 non-null float64
dtypes: float64(5)
memory usage: 194.1+ KB
None
Calculating labels


HBox(children=(FloatProgress(value=0.0, max=4141.0), HTML(value='')))


            previous_1d    EMA_12   willr_14        OBV_7  labels
Date                                                             
2001-09-14     7.142585  7.257483 -41.873824  204624213.0     2.0
2001-09-17     7.224180  7.252359 -51.296847  208479649.0     2.0
2001-09-18     7.132634  7.233940 -72.964156  203965392.0     2.0
2001-09-19     7.214230  7.230908 -59.609037  209378730.0     2.0
2001-09-20     7.347568  7.248855 -37.785092  213725440.0     2.0
         Date      Close
0  1999-12-31  45.625000
1  2000-01-03  44.500000
2  2000-01-04  42.062500
3  2000-01-05  42.781250
4  2000-01-06  43.640625
                Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                            
1999-12-31  45.625000          NaN     NaN       NaN         NaN
2000-01-03  44.500000     45.62500     NaN       NaN   1163000.0
2000-01-04  42.062500     44.50000     NaN       NaN  -4894400.0
2000-01-05  42.781250     42.06250     NaN       NaN -10617200

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21      50.0000  47.379245 -27.700831  21139400.0     2.0
2000-01-24      48.5000  47.551669 -40.997230  17619200.0     2.0
2000-01-25      47.0000  47.466797 -54.293629  12613700.0     2.0
2000-01-26      49.6875  47.808444 -30.470914  16344500.0     2.0
2000-01-27      50.4375  48.212914 -27.215190  20127300.0     2.0
         Date     Close
0  1999-12-31  54.78125
1  2000-01-03  53.59375
2  2000-01-04  52.56250
3  2000-01-05  51.56250
4  2000-01-06  53.93750
               Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                           
1999-12-31  54.78125          NaN     NaN       NaN         NaN
2000-01-03  53.59375     54.78125     NaN       NaN    614200.0
2000-01-04  52.56250     53.59375     NaN       NaN  -3660800.0
2000-01-05  51.56250     52.56250     NaN       NaN  -7931600.0
2000-01-0

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21     56.28125  56.412106 -36.614173 -25968400.0     2.0
2000-01-24     51.34375  55.632359 -83.112583 -50347200.0     2.0
2000-01-25     51.56250  55.006226 -64.721485 -21792600.0     1.0
2000-01-26     48.75000  54.043730 -88.594164 -38890400.0     2.0
2000-01-27     50.31250  53.469695 -75.331565 -28647200.0     2.0
         Date     Close
0  2000-01-04  4.053544
1  2000-01-05  4.091251
2  2000-01-06  4.255548
3  2000-01-07  4.405031
4  2000-01-10  4.418498
               Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                           
2000-01-04  4.053544          NaN     NaN       NaN         NaN
2000-01-05  4.091251     4.053544     NaN       NaN   5211144.0
2000-01-06  4.255548     4.091251     NaN       NaN  16146401.0
2000-01-07  4.405031     4.255548     NaN       NaN  29138906.0
2000-01-1

HBox(children=(FloatProgress(value=0.0, max=4533.0), HTML(value='')))


            previous_1d    EMA_12   willr_14        OBV_7  labels
Date                                                             
2000-01-24     4.396951  4.259450 -20.237909   91957609.0     2.0
2000-01-25     4.523540  4.300079 -16.033642  115224048.0     2.0
2000-01-26     4.400991  4.315604 -35.231971  105394663.0     2.0
2000-01-27     4.399644  4.328533 -51.735021   77180955.0     2.0
2000-01-28     4.302682  4.324556 -63.091500   43145413.0     2.0
         Date    Close
0  1999-12-31  69.1250
1  2000-01-03  66.8125
2  2000-01-04  64.3125
3  2000-01-05  63.0000
4  2000-01-06  63.6875
              Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                          
1999-12-31  69.1250          NaN     NaN       NaN         NaN
2000-01-03  66.8125      69.1250     NaN       NaN   2112700.0
2000-01-04  64.3125      66.8125     NaN       NaN  -6257200.0
2000-01-05  63.0000      64.3125     NaN       NaN -13002300.0
2000-01-06  63.6875  

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21      63.3750  65.176128 -85.271318 -26649200.0     2.0
2000-01-24      62.4375  64.754801 -94.594595 -33601700.0     2.0
2000-01-25      59.3750  63.927139 -83.606557 -41401600.0     2.0
2000-01-26      61.1250  63.496041 -68.306011 -34997800.0     2.0
2000-01-27      61.9375  63.256265 -61.202186 -30459400.0     2.0
         Date  Close
0  1999-12-31   52.0
1  2000-01-03   54.0
2  2000-01-04   50.0
3  2000-01-05   49.0
4  2000-01-06   48.0
            Close  previous_1d  EMA_12  willr_14   OBV_7
Date                                                    
1999-12-31   52.0          NaN     NaN       NaN     NaN
2000-01-03   54.0         52.0     NaN       NaN    50.0
2000-01-04   50.0         54.0     NaN       NaN  1306.0
2000-01-05   49.0         50.0     NaN       NaN -1794.0
2000-01-06   48.0         49.0     NaN       NaN -6013.0
<class 'pand

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14    OBV_7  labels
Date                                                          
2000-01-21         51.0  54.535133 -80.952381 -17732.0     2.0
2000-01-24         56.0  54.760497 -57.142857 -10257.0     2.0
2000-01-25         58.0  55.258882 -47.619048  -8463.0     2.0
2000-01-26         59.0  55.834439 -42.857143  -4944.0     2.0
2000-01-27         57.0  56.013756 -52.380952  -5313.0     2.0
         Date  Close
0  1999-12-31  112.5
1  2000-01-03  115.0
2  2000-01-04  110.0
3  2000-01-05  107.5
4  2000-01-06  112.5
            Close  previous_1d  EMA_12  willr_14   OBV_7
Date                                                    
1999-12-31  112.5          NaN     NaN       NaN     NaN
2000-01-03  115.0        112.5     NaN       NaN   115.0
2000-01-04  110.0        115.0     NaN       NaN  1240.0
2000-01-05  107.5        110.0     NaN       NaN   630.0
2000-01-06  112.5        107.5     NaN       NaN   175.0
<class 'pandas.core.frame.DataFra

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d      EMA_12   willr_14    OBV_7  labels
Date                                                           
2000-01-21       123.75  111.044132  -0.000000    330.0     0.0
2000-01-24       145.00  116.268112  -9.090909  10530.0     2.0
2000-01-25       130.00  118.380710 -36.363636   6930.0     2.0
2000-01-26       132.50  120.552908 -31.818182   9665.0     2.0
2000-01-27       140.00  123.544769 -18.181818  11870.0     2.0
         Date   Close
0  2004-06-10  2.4375
1  2004-06-11  2.3250
2  2004-06-14  2.3125
3  2004-06-15  2.2750
4  2004-06-16  2.3000
             Close  previous_1d  EMA_12  willr_14        OBV_7
Date                                                          
2004-06-10  2.4375          NaN     NaN       NaN          NaN
2004-06-11  2.3250       2.4375     NaN       NaN  741780948.0
2004-06-14  2.3125       2.3250     NaN       NaN  496140948.0
2004-06-15  2.2750       2.3125     NaN       NaN  369726948.0
2004-06-16  2.3000       2.2750     NaN  

HBox(children=(FloatProgress(value=0.0, max=3588.0), HTML(value='')))


            previous_1d    EMA_12   willr_14        OBV_7  labels
Date                                                             
2004-06-30        2.575  2.436754 -19.047606  495757896.0     2.0
2004-07-01        2.525  2.450330 -33.333298  480565876.0     2.0
2004-07-02        2.525  2.461818 -33.333298  480565876.0     2.0
2004-07-05        2.550  2.475385 -27.777775  505227876.0     2.0
2004-07-06        2.550  2.486864 -27.777775  505227876.0     2.0
         Date      Close
0  2013-09-18  11.264000
1  2013-09-23  12.320000
2  2013-09-24  11.896000
3  2013-09-25  12.013333
4  2013-09-26  11.466666
                Close  previous_1d  EMA_12  willr_14        OBV_7
Date                                                             
2013-09-18  11.264000          NaN     NaN       NaN          NaN
2013-09-23  12.320000    11.264000     NaN       NaN  145836577.0
2013-09-24  11.896000    12.320000     NaN       NaN  265280099.0
2013-09-25  12.013333    11.896000     NaN       NaN  190

HBox(children=(FloatProgress(value=0.0, max=1288.0), HTML(value='')))


            previous_1d     EMA_12   willr_14        OBV_7  labels
Date                                                              
2013-10-17    11.912000  11.900182 -34.022038  235647532.0     1.0
2013-10-18    11.896000  11.899539 -39.103542  214825024.0     2.0
2013-10-21    12.269333  11.956430 -24.093825  253903732.0     2.0
2013-10-22    12.760000  12.080056  -4.424713  304775737.0     2.0
2013-10-23    12.645333  12.167022 -15.187657  273970459.0     2.0
         Date     Close
0  1999-12-31  36.71875
1  2000-01-03  36.03125
2  2000-01-04  34.46875
3  2000-01-05  33.68750
4  2000-01-06  32.09375
               Close  previous_1d  EMA_12  willr_14      OBV_7
Date                                                          
1999-12-31  36.71875          NaN     NaN       NaN        NaN
2000-01-03  36.03125     36.71875     NaN       NaN  1152800.0
2000-01-04  34.46875     36.03125     NaN       NaN -1717600.0
2000-01-05  33.68750     34.46875     NaN       NaN -5574000.0
2000-01-

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21     33.84375  34.268013 -62.130178  -9160800.0     2.0
2000-01-24     32.40625  33.981588 -87.586207 -14531000.0     2.0
2000-01-25     31.75000  33.638267 -97.080292 -20160400.0     2.0
2000-01-26     32.56250  33.472764 -74.825175 -15399000.0     2.0
2000-01-27     34.06250  33.563493 -41.258741 -10312000.0     2.0
         Date    Close
0  1999-12-31  68.7500
1  2000-01-03  65.1875
2  2000-01-04  61.7500
3  2000-01-05  63.0000
4  2000-01-06  60.0000
              Close  previous_1d  EMA_12  willr_14       OBV_7
Date                                                          
1999-12-31  68.7500          NaN     NaN       NaN         NaN
2000-01-03  65.1875      68.7500     NaN       NaN   3097800.0
2000-01-04  61.7500      65.1875     NaN       NaN  -8933000.0
2000-01-05  63.0000      61.7500     NaN       NaN -17707500.0
2000-01-06  60.0000  

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21      60.8125  62.311914 -90.506329 -32999900.0     2.0
2000-01-24      60.0000  61.956235 -94.230769 -39682800.0     2.0
2000-01-25      58.0000  61.347584 -82.786885 -47022400.0     2.0
2000-01-26      56.1250  60.544109 -85.714286 -57274800.0     2.0
2000-01-27      57.3750  60.056554 -71.428571 -48806400.0     2.0
         Date     Close
0  1999-12-31  29.12500
1  2000-01-03  28.18750
2  2000-01-04  28.21875
3  2000-01-05  28.46875
4  2000-01-06  28.50000
               Close  previous_1d  EMA_12  willr_14      OBV_7
Date                                                          
1999-12-31  29.12500          NaN     NaN       NaN        NaN
2000-01-03  28.18750     29.12500     NaN       NaN  2829800.0
2000-01-04  28.21875     28.18750     NaN       NaN -8167200.0
2000-01-05  28.46875     28.21875     NaN       NaN  -859200.0
2000-01-06  28.

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14       OBV_7  labels
Date                                                             
2000-01-21     32.93750  30.599729  -0.584795  80376600.0     0.0
2000-01-24     33.43750  31.036309  -0.000000  91762400.0     2.0
2000-01-25     32.43750  31.251877 -17.777778  80456400.0     2.0
2000-01-26     32.93750  31.511203  -9.248555  90175000.0     2.0
2000-01-27     31.53125  31.514288 -36.969697  66451200.0     2.0
         Date    Close
0  1999-12-31  35.2500
1  2000-01-03  36.8750
2  2000-01-04  35.9375
3  2000-01-05  35.0625
4  2000-01-06  36.6250
              Close  previous_1d  EMA_12  willr_14      OBV_7
Date                                                         
1999-12-31  35.2500          NaN     NaN       NaN        NaN
2000-01-03  36.8750      35.2500     NaN       NaN  1223500.0
2000-01-04  35.9375      36.8750     NaN       NaN  6521900.0
2000-01-05  35.0625      35.9375     NaN       NaN  1966200.0
2000-01-06  36.6250      35

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12   willr_14      OBV_7  labels
Date                                                            
2000-01-21      36.1250  36.296197 -60.606061  5362300.0     2.0
2000-01-24      36.1250  36.269859 -60.606061  5362300.0     2.0
2000-01-25      35.3125  36.122573 -80.303030  1577300.0     2.0
2000-01-26      34.3125  35.844100 -90.789474 -2779200.0     2.0
2000-01-27      33.9375  35.550777 -96.153846 -5953300.0     2.0


In [6]:
def compile_data():
    with open('small_noncyclic_tickers.txt') as f:
        tickers = [i.strip() for i in f.readlines()]
    
    for count,ticker in enumerate(tickers):
        df = pd.read_csv('data/noncyclic_tickers/initial/{}.csv'.format(ticker))
        df  = df.dropna()
            
        high = df['High'].shift(1).values
        low = df['Low'].shift(1).values
        close = df['Close'].shift(1).values
        volume = df['Volume'].shift(1).values

        df.drop(['High','Low','Open','Volume','Adj Close'], 1, inplace=True)
        print(df.head())
            
        df['previous_1d'] = df['Close'].shift(1)


        ema = EMA(close, timeperiod=12)
        arr = np.array(ema)
        df['EMA_12'] = arr


        willr = WILLR(high, low, close, timeperiod=14)
        arr = np.array(willr)
        df['willr_14'] = arr
            
        obv = OBV(close, volume)
        arr = np.array(obv)
        df['OBV_7'] = arr

        df.set_index('Date', inplace=True)
        
        print(df.head())
        print(df.info())
        
        labels = create_labels(df, 'Close', 15)
        df['labels'] = labels
        df.drop(['Close'], 1, inplace=True)
        df  = df.dropna()
        
        print(df.head())
        df.to_csv('data/noncyclic_tickers/initial_indicators/{}_data.csv'.format(ticker))
    
compile_data()

         Date     Close
0  1999-12-31  5.629787
1  2000-01-03  5.464898
2  2000-01-04  5.417786
3  2000-01-05  5.252897
4  2000-01-06  5.370675
               Close  previous_1d  EMA_12  willr_14     OBV_7
Date                                                         
1999-12-31  5.629787          NaN     NaN       NaN       NaN
2000-01-03  5.464898     5.629787     NaN       NaN  122052.0
2000-01-04  5.417786     5.464898     NaN       NaN  110643.0
2000-01-05  5.252897     5.417786     NaN       NaN  -54392.0
2000-01-06  5.370675     5.252897     NaN       NaN -180954.0
<class 'pandas.core.frame.DataFrame'>
Index: 4780 entries, 1999-12-31 to 2018-12-31
Data columns (total 5 columns):
Close          4780 non-null float64
previous_1d    4779 non-null float64
EMA_12         4768 non-null float64
willr_14       4766 non-null float64
OBV_7          4779 non-null float64
dtypes: float64(5)
memory usage: 224.1+ KB
None
Calculating labels


HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d    EMA_12   willr_14     OBV_7  labels
Date                                                          
2000-01-21     5.464898  5.414302 -41.666477  -72699.0     1.0
2000-01-24     5.252897  5.389470 -75.000051 -137174.0     2.0
2000-01-25     5.276453  5.372083 -62.499937   23616.0     2.0
2000-01-26     5.606231  5.408106 -23.076911  295579.0     2.0
2000-01-27     5.936009  5.489322  -0.000000  339624.0     0.0
         Date  Close
0  1999-12-31  9.000
1  2000-01-03  9.000
2  2000-01-04  8.875
3  2000-01-05  8.500
4  2000-01-06  9.750
            Close  previous_1d  EMA_12  willr_14    OBV_7
Date                                                     
1999-12-31  9.000          NaN     NaN       NaN      NaN
2000-01-03  9.000        9.000     NaN       NaN  86100.0
2000-01-04  8.875        9.000     NaN       NaN  86100.0
2000-01-05  8.500        8.875     NaN       NaN  22100.0
2000-01-06  9.750        8.500     NaN       NaN -17300.0
<class 'pandas.core.frame.

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d    EMA_12   willr_14     OBV_7  labels
Date                                                          
2000-01-21       8.7500  8.946622 -66.666667   78200.0     2.0
2000-01-24       8.8750  8.935603 -58.333333   99900.0     2.0
2000-01-25       9.0000  8.945511 -50.000000  123600.0     2.0
2000-01-26       8.6875  8.905817 -70.833333   87300.0     2.0
2000-01-27       8.5625  8.852999 -76.000000   32400.0     2.0
         Date    Close
0  1999-12-31  43.5000
1  2000-01-03  43.0000
2  2000-01-04  42.3125
3  2000-01-05  42.8125
4  2000-01-06  43.3750
              Close  previous_1d  EMA_12  willr_14   OBV_7
Date                                                      
1999-12-31  43.5000          NaN     NaN       NaN     NaN
2000-01-03  43.0000      43.5000     NaN       NaN  6400.0
2000-01-04  42.3125      43.0000     NaN       NaN  2500.0
2000-01-05  42.8125      42.3125     NaN       NaN -4200.0
2000-01-06  43.3750      42.8125     NaN       NaN  1100.0
<class 

HBox(children=(FloatProgress(value=0.0, max=4780.0), HTML(value='')))


            previous_1d     EMA_12    willr_14    OBV_7  labels
Date                                                           
2000-01-21       41.375  42.619144  -78.481013 -43300.0     2.0
2000-01-24       40.500  42.293122  -96.202532 -61500.0     1.0
2000-01-25       38.000  41.632642 -100.000000 -81600.0     2.0
2000-01-26       40.125  41.400697  -70.689655 -63900.0     2.0
2000-01-27       40.500  41.262128  -65.517241 -56300.0     2.0


## Normalize indicators

In [8]:
import pandas as pd
from sklearn import preprocessing

In [9]:
# normalize all data columns 

with open('cyclic_tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/cyclic_tickers/initial_indicators/{}_data.csv'.format(ticker))
    df  = df.dropna()
    
    df.set_index('Date', inplace=True)
    df_target = pd.DataFrame(df['labels'])
    df.drop(['labels'], axis='columns', inplace=True)
    x = df.values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled, columns = df.columns, index=df.index)
    df_normalized = pd.concat([df,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/cyclic_tickers/normalized/{}_data.csv'.format(ticker))

            previous_1d    EMA_12  willr_14     OBV_7  labels
Date                                                         
2000-01-21     0.905882  0.956964  0.625000  0.643105     2.0
2000-01-24     0.858823  0.945782  0.500000  0.628544     2.0
2000-01-25     0.835294  0.932205  0.512195  0.621903     2.0
2000-01-26     0.803922  0.915230  0.142858  0.607233     2.0
2000-01-27     0.811765  0.902239  0.178571  0.616384     2.0
            previous_1d    EMA_12  willr_14     OBV_7  labels
Date                                                         
2000-01-21     0.328349  0.314429  0.574074  0.975610     2.0
2000-01-24     0.321090  0.314799  0.604839  0.973859     2.0
2000-01-25     0.321493  0.315176  0.612903  0.976050     2.0
2000-01-26     0.331978  0.317188  0.822580  0.978622     2.0
2000-01-27     0.356578  0.322859  0.947674  0.981467     2.0
            previous_1d    EMA_12  willr_14     OBV_7  labels
Date                                                         
2000-01-

In [10]:
# normalize all data columns 

with open('noncyclic_tickers.txt') as f:
    tickers = [i.strip() for i in f.readlines()]
    
for count,ticker in enumerate(tickers):
    df = pd.read_csv('data/noncyclic_tickers/initial_indicators/{}_data.csv'.format(ticker))
    df  = df.dropna()
    
    df.set_index('Date', inplace=True)
    df_target = pd.DataFrame(df['labels'])
    df.drop(['labels'], axis='columns', inplace=True)
    x = df.values #returns a numpy array
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled, columns = df.columns, index=df.index)
    df_normalized = pd.concat([df,df_target],axis=1)
    print(df_normalized.head())
    
    df_normalized.to_csv('data/noncyclic_tickers/normalized/{}_data.csv'.format(ticker))

            previous_1d    EMA_12  willr_14     OBV_7  labels
Date                                                         
2001-09-14     0.002663  0.002811  0.581262  0.027844     2.0
2001-09-17     0.002765  0.002805  0.487032  0.031152     2.0
2001-09-18     0.002650  0.002781  0.270358  0.027279     2.0
2001-09-19     0.002753  0.002777  0.403910  0.031923     2.0
2001-09-20     0.002921  0.002800  0.622149  0.035653     2.0
            previous_1d    EMA_12  willr_14     OBV_7  labels
Date                                                         
2000-01-21     0.104899  0.091802  0.722992  0.376076     2.0
2000-01-24     0.097986  0.092625  0.590028  0.364892     2.0
2000-01-25     0.091072  0.092220  0.457064  0.348988     2.0
2000-01-26     0.103459  0.093850  0.695291  0.360841     2.0
2000-01-27     0.106916  0.095779  0.727848  0.372860     2.0
            previous_1d    EMA_12  willr_14     OBV_7  labels
Date                                                         
2000-01-