# Table of Content

# Introduction

## Setup and Import

As always, the first step is to import the required libraries and data. Since we do not want to run the SQL query every time, we can simply import the csv file we created in the first notebook.

In [1]:
# Import packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import altair as alt
import numpy as np

from ipywidgets import HTML
from io import BytesIO
import base64

from decimal import ROUND_HALF_UP, Decimal

import warnings
warnings.simplefilter("ignore")

# Turn off the max column width so the images won't be truncated
pd.set_option('display.max_colwidth', None)
# Show all Columns
pd.set_option('display.max_columns', None)
pd. set_option('display.max_rows', None)
 
# Turning off the max column will display all the data
# if gathering into sets / array we might want to restrict to a few items
pd.set_option('display.max_seq_items', 50)
pd.set_option('display.width', 1000)


In [2]:
# Import dataframes for Stock Prices
stock_price_df = pd.read_csv('../data/train_files/stock_prices.csv', parse_dates=['Date'])
sec_df = pd.read_csv('../data/train_files/secondary_stock_prices.csv', parse_dates=['Date'])
tra_df = pd.read_csv('../data/train_files/trades.csv', parse_dates=['Date'])

stock_desc_df = pd.read_csv('../data/stock_price_spec.csv')
stock_list_desc_df = pd.read_csv('../data/stock_list_spec.csv')
stock_list = pd.read_csv('../data/stock_list.csv')

In [3]:
stock_price_df.head(5)

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.00073
1,20170104_1332,2017-01-04,1332,568.0,576.0,563.0,571.0,2798500,1.0,,False,0.012324
2,20170104_1333,2017-01-04,1333,3150.0,3210.0,3140.0,3210.0,270800,1.0,,False,0.006154
3,20170104_1376,2017-01-04,1376,1510.0,1550.0,1510.0,1550.0,11300,1.0,,False,0.011053
4,20170104_1377,2017-01-04,1377,3270.0,3350.0,3270.0,3330.0,150800,1.0,,False,0.003026


Frontfill Stockprices (last known Price)

In [4]:
stock_price_df.fillna(method='ffill', inplace=True)

In [5]:
stock_price_df.head()

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.00073
1,20170104_1332,2017-01-04,1332,568.0,576.0,563.0,571.0,2798500,1.0,,False,0.012324
2,20170104_1333,2017-01-04,1333,3150.0,3210.0,3140.0,3210.0,270800,1.0,,False,0.006154
3,20170104_1376,2017-01-04,1376,1510.0,1550.0,1510.0,1550.0,11300,1.0,,False,0.011053
4,20170104_1377,2017-01-04,1377,3270.0,3350.0,3270.0,3330.0,150800,1.0,,False,0.003026


In [6]:
stock_price_df.isnull().sum() * 100 / len(stock_price_df)

RowId               0.000000
Date                0.000000
SecuritiesCode      0.000000
Open                0.000000
High                0.000000
Low                 0.000000
Close               0.000000
Volume              0.000000
AdjustmentFactor    0.000000
ExpectedDividend    0.568867
SupervisionFlag     0.000000
Target              0.000000
dtype: float64

In [7]:
stock_price_df['ExpectedDividend'] = stock_price_df['ExpectedDividend'].fillna(0)

In [8]:
stock_price_df.isnull().sum() * 100 / len(stock_price_df)

RowId               0.0
Date                0.0
SecuritiesCode      0.0
Open                0.0
High                0.0
Low                 0.0
Close               0.0
Volume              0.0
AdjustmentFactor    0.0
ExpectedDividend    0.0
SupervisionFlag     0.0
Target              0.0
dtype: float64

In [9]:
def adjust_price(price):
    """
    Args:
        price (pd.DataFrame)  : pd.DataFrame include stock_price
    Returns:
        price DataFrame (pd.DataFrame): stock_price with generated AdjustedClose
    """
    

    def generate_adjusted_close(df):
        """
        Args:
            df (pd.DataFrame)  : stock_price for a single SecuritiesCode
        Returns:
            df (pd.DataFrame): stock_price with AdjustedClose for a single SecuritiesCode
        """
        # sort data to generate CumulativeAdjustmentFactor
        df = df.sort_values("Date", ascending=False)
        # generate CumulativeAdjustmentFactor
        df.loc[:, "CumulativeAdjustmentFactor"] = df["AdjustmentFactor"].cumprod()


        
        # generate AdjustedOpen
        df.loc[:, "AdjustedOpen"] = (
            df["CumulativeAdjustmentFactor"] * df["Open"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))

        # generate AdjustedHigh
        df.loc[:, "AdjustedHigh"] = (
            df["CumulativeAdjustmentFactor"] * df["High"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))

        # generate AdjustedLow
        df.loc[:, "AdjustedLow"] = (
            df["CumulativeAdjustmentFactor"] * df["Low"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))

        # generate AdjustedClose
        df.loc[:, "AdjustedClose"] = (
            df["CumulativeAdjustmentFactor"] * df["Close"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))

        # generate AdjustedVolume
        df.loc[:, "AdjustedVolume"] = (
            df["CumulativeAdjustmentFactor"] * df["Volume"]
        ).map(lambda x: float(
            Decimal(str(x)).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP)
        ))


        # reverse order
        df = df.sort_values("Date")
        # to fill Adjusted, replace 0 into np.nan
        df.loc[df["AdjustedClose"] == 0, "AdjustedClose"] = np.nan
        df.loc[df["AdjustedHigh"] == 0, "AdjustedHigh"] = np.nan
        df.loc[df["AdjustedLow"] == 0, "AdjustedLow"] = np.nan
        df.loc[df["AdjustedOpen"] == 0, "AdjustedOpen"] = np.nan
        df.loc[df["AdjustedVolume"] == 0, "AdjustedVolume"] = np.nan

        # forward fill Adjusted
        df.loc[:, "AdjustedClose"] = df.loc[:, "AdjustedClose"].ffill()
        df.loc[:, "AdjustedHigh"] = df.loc[:, "AdjustedHigh"].ffill()
        df.loc[:, "AdjustedLow"] = df.loc[:, "AdjustedLow"].ffill()
        df.loc[:, "AdjustedOpen"] = df.loc[:, "AdjustedOpen"].ffill()
        df.loc[:, "AdjustedVolume"] = df.loc[:, "AdjustedVolume"].ffill()

        # adjust target
        df.sort_values('Date')
        df['ad_Close_1'] = df['AdjustedClose'].shift(-1)
        df['ad_Close_2'] = df['AdjustedClose'].shift(-2)
        df['AdjustedTarget'] = (df['ad_Close_2']-df['ad_Close_1'])/df['ad_Close_1']
        df.drop(['ad_Close_1', 'ad_Close_2'], axis=1, inplace=True)

        return df


    # generate Adjusted
    price = price.sort_values(["SecuritiesCode", "Date"])

    price = price.groupby("SecuritiesCode").apply(generate_adjusted_close).reset_index(drop=True)

    
    return price

# transform Date column into datetime
    price.loc[: ,"Date"] = pd.to_datetime(price.loc[: ,"Date"], format="%Y-%m-%d")
    price.set_index("Date", inplace=True)

In [10]:
stock_price_df = adjust_price(stock_price_df)

In [11]:
stock_price_df.head()

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,CumulativeAdjustmentFactor,AdjustedOpen,AdjustedHigh,AdjustedLow,AdjustedClose,AdjustedVolume,AdjustedTarget
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,0.0,False,0.00073,1.0,2734.0,2755.0,2730.0,2742.0,31400.0,0.00073
1,20170105_1301,2017-01-05,1301,2743.0,2747.0,2735.0,2738.0,17900,1.0,0.0,False,0.00292,1.0,2743.0,2747.0,2735.0,2738.0,17900.0,0.00292
2,20170106_1301,2017-01-06,1301,2734.0,2744.0,2720.0,2740.0,19900,1.0,0.0,False,-0.001092,1.0,2734.0,2744.0,2720.0,2740.0,19900.0,-0.001092
3,20170110_1301,2017-01-10,1301,2745.0,2754.0,2735.0,2748.0,24200,1.0,0.0,False,-0.0051,1.0,2745.0,2754.0,2735.0,2748.0,24200.0,-0.0051
4,20170111_1301,2017-01-11,1301,2748.0,2752.0,2737.0,2745.0,9300,1.0,0.0,False,-0.003295,1.0,2748.0,2752.0,2737.0,2745.0,9300.0,-0.003295


In [12]:
stock_price_df = stock_price_df.drop(columns=['Open','High', 'Low', 'Close', 'Volume', 'AdjustmentFactor', 'CumulativeAdjustmentFactor', 'Target'])

In [13]:
stock_price_df.rename(columns = {'AdjustedOpen':'Open', 'AdjustedHigh':'High', 'AdjustedLow':'Low', 'AdjustedClose':'Close', 'AdjustedVolume':'Volume', 'AdjustedTarget':'Target'}, inplace = True)

In [14]:
stock_price_df.head()

Unnamed: 0,RowId,Date,SecuritiesCode,ExpectedDividend,SupervisionFlag,Open,High,Low,Close,Volume,Target
0,20170104_1301,2017-01-04,1301,0.0,False,2734.0,2755.0,2730.0,2742.0,31400.0,0.00073
1,20170105_1301,2017-01-05,1301,0.0,False,2743.0,2747.0,2735.0,2738.0,17900.0,0.00292
2,20170106_1301,2017-01-06,1301,0.0,False,2734.0,2744.0,2720.0,2740.0,19900.0,-0.001092
3,20170110_1301,2017-01-10,1301,0.0,False,2745.0,2754.0,2735.0,2748.0,24200.0,-0.0051
4,20170111_1301,2017-01-11,1301,0.0,False,2748.0,2752.0,2737.0,2745.0,9300.0,-0.003295


## Additional Features

### Metrics
BOP: Balance of Power = (Close price – Open price) / (High price – Low price) <br>
av: average = (Close price + High price + Low price) / 3 <br>
vwap: Volume-weighted average price = (Close price * Volume) / Volume <br>
HLr: High-Low-Range = High price – Low price <br>
OCr: Open-Close-Range = Close price – Open price <br>
OC: Open-Close = Close price * Open price <br>
relC: relative Change = (Close price – Open price)/Open Price <br>
HL: High-Low = High price * Low price <br>
logC: logarithmic scaled Close price <br>
logR: log(Close price) - log(Open price) <br>
OHLCstd: Open, High, Low, Close, standard deviated. <br>
OHLCskew: Open, High, Low, Close, skewed. <br>
OHLCkur: Open, High, Low, Close, kurtosis. <br>
Cpos: = (Close price – Low price) / (High price – Low price) -0.5 <br>
Opos: = (Open price – Low price) / (High price – Low price) -0.5 <br>
bsforce: = Cpos * Volume <br>
    
    
### Weekdays
Weekday = Day of the Week (1-5) <br>
Monday = if Monday == 1 <br>
Tuesday = if Tuesday == 1 <br>
Wednesday = if Wednesday == 1 <br>
Thursday = if Thursday == 1 <br>
Friday = if Fryday == 1 <br>
Date = Date

In [15]:
def FE(stock_price_df):
    stock_price_df['BOP'] = (stock_price_df['Open']-stock_price_df['Close'])/(stock_price_df['High']-stock_price_df['Low'])
    stock_price_df['Cpos'] = (stock_price_df['Close']-stock_price_df['Low'])/(stock_price_df['High']-stock_price_df['Low']) -0.5
    stock_price_df['bsforce'] = stock_price_df['Cpos'] * stock_price_df['Volume']
    stock_price_df['Opos'] = (stock_price_df['Open']-stock_price_df['Low'])/(stock_price_df['High']-stock_price_df['Low']) -0.5


    
    stock_price_df['av'] = (stock_price_df['Close']+stock_price_df['High']+stock_price_df['Low'])/3
    stock_price_df['vwap'] = (stock_price_df['Close'] * stock_price_df['Volume'])/ stock_price_df['Volume']
    stock_price_df['HLr'] = stock_price_df['High'] - stock_price_df['Low']
    stock_price_df['OCr'] = stock_price_df['Open'] - stock_price_df['Close']
    stock_price_df['relC'] = (stock_price_df['Close'] - stock_price_df['Open'])/stock_price_df['Open']
    stock_price_df['OC'] = stock_price_df['Open'] * stock_price_df['Close']
    stock_price_df['HL'] = stock_price_df['High'] * stock_price_df['Low']
    stock_price_df['logC'] = np.log(stock_price_df['Close']+1)
    stock_price_df['logR'] = np.log(stock_price_df['Close'])-np.log(stock_price_df['Open'])
    stock_price_df['OHLCstd'] = stock_price_df[['Open','High','Low','Close']].std(axis=1)
    stock_price_df['OHLCskew'] = stock_price_df[['Open','High','Low','Close']].skew(axis=1)
    stock_price_df['OHLCkur'] = stock_price_df[['Open','High','Low','Close']].kurtosis(axis=1)


    
    stock_price_df['weekday'] = stock_price_df['Date'].dt.weekday+1
    stock_price_df['Monday'] = np.where(stock_price_df['weekday']==1,1,0)
    stock_price_df['Tuesday'] = np.where(stock_price_df['weekday']==2,1,0)
    stock_price_df['Wednesday'] = np.where(stock_price_df['weekday']==3,1,0)
    stock_price_df['Thursday'] = np.where(stock_price_df['weekday']==4,1,0)
    stock_price_df['Friday'] = np.where(stock_price_df['weekday']==5,1,0)
    stock_price_df['Date'] = pd.to_datetime(stock_price_df['Date'])

    #rolling mean
    stock_price_df['SMA5'] = stock_price_df.Close.rolling(5).mean()
    stock_price_df['SMA20'] = stock_price_df.Close.rolling(20).mean()
    stock_price_df['SMA50'] = stock_price_df.Close.rolling(50).mean()
    stock_price_df['SMA200'] = stock_price_df.Close.rolling(200).mean()
    stock_price_df['SMA500'] = stock_price_df.Close.rolling(500).mean()

    #Exponentially Weighted average
    stock_price_df['EMA5'] = stock_price_df.Close.ewm(span=5, adjust=False).mean()
    stock_price_df['EMA20'] = stock_price_df.Close.ewm(span=20, adjust=False).mean()
    stock_price_df['EMA50'] = stock_price_df.Close.ewm(span=50, adjust=False).mean()
    stock_price_df['EMA200'] = stock_price_df.Close.ewm(span=200, adjust=False).mean()
    stock_price_df['EMA500'] = stock_price_df.Close.ewm(span=500, adjust=False).mean()

    

    return stock_price_df
stock_price_df = FE(stock_price_df)
stock_price_df = pd.merge(stock_price_df,stock_list, on='SecuritiesCode')

In [16]:
stock_price_df.head(10)

Unnamed: 0,RowId,Date,SecuritiesCode,ExpectedDividend,SupervisionFlag,Open,High,Low,Close_x,Volume,Target,BOP,Cpos,bsforce,Opos,av,vwap,HLr,OCr,relC,OC,HL,logC,logR,OHLCstd,OHLCskew,OHLCkur,weekday,Monday,Tuesday,Wednesday,Thursday,Friday,SMA5,SMA20,SMA50,SMA200,SMA500,EMA5,EMA20,EMA50,EMA200,EMA500,EffectiveDate,Name,Section/Products,NewMarketSegment,33SectorCode,33SectorName,17SectorCode,17SectorName,NewIndexSeriesSizeCode,NewIndexSeriesSize,TradeDate,Close_y,IssuedShares,MarketCapitalization,Universe0
0,20170104_1301,2017-01-04,1301,0.0,False,2734.0,2755.0,2730.0,2742.0,31400.0,0.00073,-0.32,-0.02,-628.0,-0.34,2742.333333,2742.0,25.0,-8.0,0.002926,7496628.0,7521150.0,7.916807,0.002922,11.026483,0.94153,0.008495,3,0,0,1,0,0,,,,,,2742.0,2742.0,2742.0,2742.0,2742.0,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
1,20170105_1301,2017-01-05,1301,0.0,False,2743.0,2747.0,2735.0,2738.0,17900.0,0.00292,0.416667,-0.25,-4475.0,0.166667,2740.0,2738.0,12.0,5.0,-0.001823,7510334.0,7513045.0,7.915348,-0.001824,5.315073,0.198134,-2.215052,4,0,0,0,1,0,,,,,,2740.666667,2741.619048,2741.843137,2741.960199,2741.984032,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
2,20170106_1301,2017-01-06,1301,0.0,False,2734.0,2744.0,2720.0,2740.0,19900.0,-0.001092,-0.25,0.333333,6633.333333,0.083333,2734.666667,2740.0,24.0,-6.0,0.002195,7491160.0,7463680.0,7.916078,0.002192,10.503968,-1.16486,1.085094,5,0,0,0,0,1,,,,,,2740.444444,2741.464853,2741.770857,2741.940695,2741.976112,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
3,20170110_1301,2017-01-10,1301,0.0,False,2745.0,2754.0,2735.0,2748.0,24200.0,-0.0051,-0.157895,0.184211,4457.894737,0.026316,2745.666667,2748.0,19.0,-3.0,0.001093,7543260.0,7532190.0,7.918992,0.001092,7.937254,-0.703934,1.12522,2,0,1,0,0,0,,,,,,2742.962963,2742.087248,2742.015137,2742.000986,2742.000159,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
4,20170111_1301,2017-01-11,1301,0.0,False,2748.0,2752.0,2737.0,2745.0,9300.0,-0.003295,0.2,0.033333,310.0,0.233333,2744.666667,2745.0,15.0,3.0,-0.001092,7543260.0,7532224.0,7.917901,-0.001092,6.350853,-0.843252,0.933953,3,0,0,1,0,0,2742.6,,,,,2743.641975,2742.364653,2742.132191,2742.030827,2742.012135,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
5,20170112_1301,2017-01-12,1301,0.0,False,2745.0,2747.0,2703.0,2731.0,28700.0,-0.006613,0.318182,0.136364,3913.636364,0.454545,2727.0,2731.0,44.0,14.0,-0.0051,7496595.0,7425141.0,7.912789,-0.005113,20.28957,-1.354079,1.2654,4,0,0,0,1,0,2740.4,,,,,2739.427984,2741.282305,2741.695634,2741.921068,2741.968174,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
6,20170113_1301,2017-01-13,1301,0.0,False,2707.0,2730.0,2707.0,2722.0,19400.0,-0.006657,-0.652174,0.152174,2952.173913,-0.5,2719.666667,2722.0,23.0,-15.0,0.005541,7368454.0,7390110.0,7.909489,0.005526,11.445523,0.405505,-3.706427,5,0,0,0,0,1,2737.2,,,,,2733.618656,2739.445895,2740.923257,2741.722848,2741.888461,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
7,20170116_1301,2017-01-16,1301,0.0,False,2725.0,2725.0,2696.0,2704.0,20100.0,0.002978,0.724138,-0.224138,-4505.172414,0.5,2708.333333,2704.0,29.0,21.0,-0.007706,7368400.0,7346600.0,7.902857,-0.007736,14.798649,-0.246845,-4.592189,1,1,0,0,0,0,2730.0,,,,,2723.74577,2736.070095,2739.475286,2741.347496,2741.737209,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
8,20170117_1301,2017-01-17,1301,0.0,False,2702.0,2704.0,2682.0,2686.0,18400.0,0.001856,0.727273,-0.318182,-5854.545455,0.409091,2690.666667,2686.0,22.0,16.0,-0.005922,7257572.0,7252128.0,7.896181,-0.005939,11.120552,-0.082895,-5.211209,2,0,1,0,0,0,2717.6,,,,,2711.163847,2731.301515,2737.378216,2740.796775,2741.514705,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True
9,20170118_1301,2017-01-18,1301,0.0,False,2689.0,2695.0,2681.0,2694.0,12100.0,0.014079,-0.357143,0.428571,5185.714286,0.071429,2690.0,2694.0,14.0,-5.0,0.001859,7244166.0,7225295.0,7.899153,0.001858,6.396614,-1.143362,0.333846,3,0,0,1,0,0,2707.4,,,,,2705.442565,2727.74899,2735.677109,2740.331135,2741.325026,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928283.0,33659110000.0,True


In [17]:
df = stock_price_df.drop(columns=['17SectorCode','NewIndexSeriesSizeCode', '33SectorCode', 'RowId', 'Close_y'])

In [18]:
df.rename(columns={"Close_x": "Close"}, inplace=True)

In [19]:
df.head()

Unnamed: 0,Date,SecuritiesCode,ExpectedDividend,SupervisionFlag,Open,High,Low,Close,Volume,Target,BOP,Cpos,bsforce,Opos,av,vwap,HLr,OCr,relC,OC,HL,logC,logR,OHLCstd,OHLCskew,OHLCkur,weekday,Monday,Tuesday,Wednesday,Thursday,Friday,SMA5,SMA20,SMA50,SMA200,SMA500,EMA5,EMA20,EMA50,EMA200,EMA500,EffectiveDate,Name,Section/Products,NewMarketSegment,33SectorName,17SectorName,NewIndexSeriesSize,TradeDate,IssuedShares,MarketCapitalization,Universe0
0,2017-01-04,1301,0.0,False,2734.0,2755.0,2730.0,2742.0,31400.0,0.00073,-0.32,-0.02,-628.0,-0.34,2742.333333,2742.0,25.0,-8.0,0.002926,7496628.0,7521150.0,7.916807,0.002922,11.026483,0.94153,0.008495,3,0,0,1,0,0,,,,,,2742.0,2742.0,2742.0,2742.0,2742.0,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True
1,2017-01-05,1301,0.0,False,2743.0,2747.0,2735.0,2738.0,17900.0,0.00292,0.416667,-0.25,-4475.0,0.166667,2740.0,2738.0,12.0,5.0,-0.001823,7510334.0,7513045.0,7.915348,-0.001824,5.315073,0.198134,-2.215052,4,0,0,0,1,0,,,,,,2740.666667,2741.619048,2741.843137,2741.960199,2741.984032,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True
2,2017-01-06,1301,0.0,False,2734.0,2744.0,2720.0,2740.0,19900.0,-0.001092,-0.25,0.333333,6633.333333,0.083333,2734.666667,2740.0,24.0,-6.0,0.002195,7491160.0,7463680.0,7.916078,0.002192,10.503968,-1.16486,1.085094,5,0,0,0,0,1,,,,,,2740.444444,2741.464853,2741.770857,2741.940695,2741.976112,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True
3,2017-01-10,1301,0.0,False,2745.0,2754.0,2735.0,2748.0,24200.0,-0.0051,-0.157895,0.184211,4457.894737,0.026316,2745.666667,2748.0,19.0,-3.0,0.001093,7543260.0,7532190.0,7.918992,0.001092,7.937254,-0.703934,1.12522,2,0,1,0,0,0,,,,,,2742.962963,2742.087248,2742.015137,2742.000986,2742.000159,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True
4,2017-01-11,1301,0.0,False,2748.0,2752.0,2737.0,2745.0,9300.0,-0.003295,0.2,0.033333,310.0,0.233333,2744.666667,2745.0,15.0,3.0,-0.001092,7543260.0,7532224.0,7.917901,-0.001092,6.350853,-0.843252,0.933953,3,0,0,1,0,0,2742.6,,,,,2743.641975,2742.364653,2742.132191,2742.030827,2742.012135,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True


In [20]:
df.shape

(2332531, 53)

### Additional features with ta library

In [21]:
import ta as ta
from ta import add_all_ta_features
from ta.utils import dropna

In [22]:
df = ta.add_all_ta_features(df, open="Open", high="High", low="Low", close="Close", volume="Volume", fillna=False)

In [23]:
df.isnull().sum() * 100 / len(df)

Date                          0.000000
SecuritiesCode                0.000000
ExpectedDividend              0.000000
SupervisionFlag               0.000000
Open                          0.000000
High                          0.000000
Low                           0.000000
Close                         0.000000
Volume                        0.011318
Target                        0.171488
BOP                           0.424818
Cpos                          0.424818
bsforce                       0.436136
Opos                          0.424818
av                            0.000000
vwap                          0.011318
HLr                           0.000000
OCr                           0.000000
relC                          0.000000
OC                            0.000000
HL                            0.000000
logC                          0.000000
logR                          0.000000
OHLCstd                       0.000000
OHLCskew                      0.000000
OHLCkur                  

In [24]:
df = df.drop(columns=['momentum_kama','trend_psar_up', 'trend_psar_down'])

In [25]:
df.head()

Unnamed: 0,Date,SecuritiesCode,ExpectedDividend,SupervisionFlag,Open,High,Low,Close,Volume,Target,BOP,Cpos,bsforce,Opos,av,vwap,HLr,OCr,relC,OC,HL,logC,logR,OHLCstd,OHLCskew,OHLCkur,weekday,Monday,Tuesday,Wednesday,Thursday,Friday,SMA5,SMA20,SMA50,SMA200,SMA500,EMA5,EMA20,EMA50,EMA200,EMA500,EffectiveDate,Name,Section/Products,NewMarketSegment,33SectorName,17SectorName,NewIndexSeriesSize,TradeDate,IssuedShares,MarketCapitalization,Universe0,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,volume_mfi,volume_nvi,volatility_bbm,volatility_bbh,volatility_bbl,volatility_bbw,volatility_bbp,volatility_bbhi,volatility_bbli,volatility_kcc,volatility_kch,volatility_kcl,volatility_kcw,volatility_kcp,volatility_kchi,volatility_kcli,volatility_dcl,volatility_dch,volatility_dcm,volatility_dcw,volatility_dcp,volatility_atr,volatility_ui,trend_macd,trend_macd_signal,trend_macd_diff,trend_sma_fast,trend_sma_slow,trend_ema_fast,trend_ema_slow,trend_vortex_ind_pos,trend_vortex_ind_neg,trend_vortex_ind_diff,trend_trix,trend_mass_index,trend_dpo,trend_kst,trend_kst_sig,trend_kst_diff,trend_ichimoku_conv,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b,trend_stc,trend_adx,trend_adx_pos,trend_adx_neg,trend_cci,trend_visual_ichimoku_a,trend_visual_ichimoku_b,trend_aroon_up,trend_aroon_down,trend_aroon_ind,trend_psar_up_indicator,trend_psar_down_indicator,momentum_rsi,momentum_stoch_rsi,momentum_stoch_rsi_k,momentum_stoch_rsi_d,momentum_tsi,momentum_uo,momentum_stoch,momentum_stoch_signal,momentum_wr,momentum_ao,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,others_dr,others_dlr,others_cr
0,2017-01-04,1301,0.0,False,2734.0,2755.0,2730.0,2742.0,31400.0,0.00073,-0.32,-0.02,-628.0,-0.34,2742.333333,2742.0,25.0,-8.0,0.002926,7496628.0,7521150.0,7.916807,0.002922,11.026483,0.94153,0.008495,3,0,0,1,0,0,,,,,,2742.0,2742.0,2742.0,2742.0,2742.0,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True,-1256.0,31400.0,,,,,17641.209629,,,1000.0,,,,,,0.0,0.0,,2767.333333,2717.333333,,0.493333,0.0,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,2742.5,,0.0,0.0,0.0,,2443.225507,2436.21906,,,,0.0,0.0,,,,,,,,,,,,,,,,,,11.813653,,0.0
1,2017-01-05,1301,0.0,False,2743.0,2747.0,2735.0,2738.0,17900.0,0.00292,0.416667,-0.25,-4475.0,0.166667,2740.0,2738.0,12.0,5.0,-0.001823,7510334.0,7513045.0,7.915348,-0.001824,5.315073,0.198134,-2.215052,4,0,0,0,1,0,,,,,,2740.666667,2741.619048,2741.843137,2741.960199,2741.984032,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True,-10206.0,13500.0,,,-100558.7,,3683.3748,,,998.541211,,,,,,0.0,0.0,,2759.666667,2722.666667,,0.414414,0.0,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,2742.5,,0.0,0.0,0.0,,2443.225507,2436.21906,,,,0.0,0.0,,,,,,,,,,,,,,,,,,-0.145879,-0.145985,-0.145879
2,2017-01-06,1301,0.0,False,2734.0,2744.0,2720.0,2740.0,19900.0,-0.001092,-0.25,0.333333,6633.333333,0.083333,2734.666667,2740.0,24.0,-6.0,0.002195,7491160.0,7463680.0,7.916078,0.002192,10.503968,-1.16486,1.085094,5,0,0,0,0,1,,,,,,2740.444444,2741.464853,2741.770857,2741.940695,2741.976112,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True,3060.666667,33400.0,,,-1085427.0,,-11.576169,,,998.541211,,,,,,0.0,0.0,,2759.333333,2718.666667,,0.52459,0.0,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,2737.5,,0.0,0.0,0.0,,2443.225507,2436.21906,,,,0.0,1.0,,,,,,,,,,,,,,,,,,0.073046,0.073019,-0.072939
3,2017-01-10,1301,0.0,False,2745.0,2754.0,2735.0,2748.0,24200.0,-0.0051,-0.157895,0.184211,4457.894737,0.026316,2745.666667,2748.0,19.0,-3.0,0.001093,7543260.0,7532190.0,7.918992,0.001092,7.937254,-0.703934,1.12522,2,0,1,0,0,0,,,,,,2742.962963,2742.087248,2742.015137,2742.000986,2742.000159,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True,11976.45614,57600.0,,,981405.0,,85.193092,,,998.541211,,,,,,0.0,0.0,,2760.666667,2720.666667,,0.683333,0.0,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,2737.5,,0.0,0.0,0.0,,2443.225507,2436.21906,,,,0.0,0.0,,,,,,,,,,,,,,,,,,0.291971,0.291545,0.218818
4,2017-01-11,1301,0.0,False,2748.0,2752.0,2737.0,2745.0,9300.0,-0.003295,0.2,0.033333,310.0,0.233333,2744.666667,2745.0,15.0,3.0,-0.001092,7543260.0,7532224.0,7.917901,-0.001092,6.350853,-0.843252,0.933953,3,0,0,1,0,0,2742.6,,,,,2743.641975,2742.364653,2742.132191,2742.030827,2742.012135,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,"Fishery, Agriculture and Forestry",FOODS,TOPIX Small 2,20211230.0,10928283.0,33659110000.0,True,12596.45614,48300.0,,,0.0,,60.504096,,,997.4511,,,,,,0.0,0.0,,2760.466667,2722.466667,,0.592982,0.0,0.0,,,,,,0.0,,,,,,,,,,,,,,,,,,,,,2737.5,,0.0,0.0,0.0,,2443.225507,2436.21906,,,,0.0,0.0,,,,,,,,,,,,,,,,,,-0.10917,-0.10923,0.109409


In [26]:
df.shape

(2332531, 136)

In [27]:
df.isnull().sum() * 100 / len(df)

Date                         0.000000
SecuritiesCode               0.000000
ExpectedDividend             0.000000
SupervisionFlag              0.000000
Open                         0.000000
High                         0.000000
Low                          0.000000
Close                        0.000000
Volume                       0.011318
Target                       0.171488
BOP                          0.424818
Cpos                         0.424818
bsforce                      0.436136
Opos                         0.424818
av                           0.000000
vwap                         0.011318
HLr                          0.000000
OCr                          0.000000
relC                         0.000000
OC                           0.000000
HL                           0.000000
logC                         0.000000
logR                         0.000000
OHLCstd                      0.000000
OHLCskew                     0.000000
OHLCkur                      0.000000
weekday     

In [28]:
df.fillna(method='ffill', inplace=True)

In [29]:
df.isnull().sum() * 100 / len(df)

Date                         0.000000
SecuritiesCode               0.000000
ExpectedDividend             0.000000
SupervisionFlag              0.000000
Open                         0.000000
High                         0.000000
Low                          0.000000
Close                        0.000000
Volume                       0.000000
Target                       0.000000
BOP                          0.000000
Cpos                         0.000000
bsforce                      0.000000
Opos                         0.000000
av                           0.000000
vwap                         0.000000
HLr                          0.000000
OCr                          0.000000
relC                         0.000000
OC                           0.000000
HL                           0.000000
logC                         0.000000
logR                         0.000000
OHLCstd                      0.000000
OHLCskew                     0.000000
OHLCkur                      0.000000
weekday     

In [30]:
df.fillna(method='backfill', inplace=True)

In [31]:
df.isnull().sum() * 100 / len(df)

Date                         0.0
SecuritiesCode               0.0
ExpectedDividend             0.0
SupervisionFlag              0.0
Open                         0.0
High                         0.0
Low                          0.0
Close                        0.0
Volume                       0.0
Target                       0.0
BOP                          0.0
Cpos                         0.0
bsforce                      0.0
Opos                         0.0
av                           0.0
vwap                         0.0
HLr                          0.0
OCr                          0.0
relC                         0.0
OC                           0.0
HL                           0.0
logC                         0.0
logR                         0.0
OHLCstd                      0.0
OHLCskew                     0.0
OHLCkur                      0.0
weekday                      0.0
Monday                       0.0
Tuesday                      0.0
Wednesday                    0.0
Thursday  

In [32]:
df = df.drop(columns=['EffectiveDate', 'Name', 'Section/Products', 'NewMarketSegment', '33SectorName', '17SectorName', 'NewIndexSeriesSize', 'TradeDate', 'IssuedShares', 'MarketCapitalization', 'Universe0'])

In [33]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder()

encoder.fit(df[['SupervisionFlag']])
transformed = encoder.transform(df[['SupervisionFlag']])
ohe_df = pd.DataFrame.sparse.from_spmatrix(transformed)
df = pd.concat([df.reset_index(), ohe_df], axis=1).drop(['SupervisionFlag'], axis=1)

In [34]:
df.head()

Unnamed: 0,index,Date,SecuritiesCode,ExpectedDividend,Open,High,Low,Close,Volume,Target,BOP,Cpos,bsforce,Opos,av,vwap,HLr,OCr,relC,OC,HL,logC,logR,OHLCstd,OHLCskew,OHLCkur,weekday,Monday,Tuesday,Wednesday,Thursday,Friday,SMA5,SMA20,SMA50,SMA200,SMA500,EMA5,EMA20,EMA50,EMA200,EMA500,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,volume_mfi,volume_nvi,volatility_bbm,volatility_bbh,volatility_bbl,volatility_bbw,volatility_bbp,volatility_bbhi,volatility_bbli,volatility_kcc,volatility_kch,volatility_kcl,volatility_kcw,volatility_kcp,volatility_kchi,volatility_kcli,volatility_dcl,volatility_dch,volatility_dcm,volatility_dcw,volatility_dcp,volatility_atr,volatility_ui,trend_macd,trend_macd_signal,trend_macd_diff,trend_sma_fast,trend_sma_slow,trend_ema_fast,trend_ema_slow,trend_vortex_ind_pos,trend_vortex_ind_neg,trend_vortex_ind_diff,trend_trix,trend_mass_index,trend_dpo,trend_kst,trend_kst_sig,trend_kst_diff,trend_ichimoku_conv,trend_ichimoku_base,trend_ichimoku_a,trend_ichimoku_b,trend_stc,trend_adx,trend_adx_pos,trend_adx_neg,trend_cci,trend_visual_ichimoku_a,trend_visual_ichimoku_b,trend_aroon_up,trend_aroon_down,trend_aroon_ind,trend_psar_up_indicator,trend_psar_down_indicator,momentum_rsi,momentum_stoch_rsi,momentum_stoch_rsi_k,momentum_stoch_rsi_d,momentum_tsi,momentum_uo,momentum_stoch,momentum_stoch_signal,momentum_wr,momentum_ao,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,others_dr,others_dlr,others_cr,0,1
0,0,2017-01-04,1301,0.0,2734.0,2755.0,2730.0,2742.0,31400.0,0.00073,-0.32,-0.02,-628.0,-0.34,2742.333333,2742.0,25.0,-8.0,0.002926,7496628.0,7521150.0,7.916807,0.002922,11.026483,0.94153,0.008495,3,0,0,1,0,0,2742.6,2725.75,2847.54,3088.225,3377.878,2742.0,2742.0,2742.0,2742.0,2742.0,-1256.0,31400.0,0.166846,39287.798568,-100558.7,-145816.395088,17641.209629,2724.766606,34.909101,1000.0,2725.75,2759.548669,2691.951331,2.479954,0.63684,0.0,0.0,2724.3,2767.333333,2717.333333,1.666483,0.493333,0.0,0.0,2681.0,2755.0,2718.0,2.714849,0.72973,0.0,1.127384,0.169958,17.491613,14.043442,2723.833333,2726.692308,2720.375581,2729.511689,0.939103,1.016026,-0.076923,0.331956,26.148689,-39.75,102.78617,102.78617,0.0,2718.5,2718.0,2724.5,2742.5,91.753071,0.0,0.0,0.0,23.76926,2443.225507,2436.21906,16.0,36.0,-20.0,0.0,0.0,49.268322,1.0,1.0,1.0,61.746435,63.328442,62.162162,67.296063,-37.837838,90.302941,-0.802334,0.006227,0.633344,0.503976,-9.943798,6.969853,6.072601,11.813653,-0.145985,0.0,1.0,0.0
1,1,2017-01-05,1301,0.0,2743.0,2747.0,2735.0,2738.0,17900.0,0.00292,0.416667,-0.25,-4475.0,0.166667,2740.0,2738.0,12.0,5.0,-0.001823,7510334.0,7513045.0,7.915348,-0.001824,5.315073,0.198134,-2.215052,4,0,0,0,1,0,2742.6,2725.75,2847.54,3088.225,3377.878,2740.666667,2741.619048,2741.843137,2741.960199,2741.984032,-10206.0,13500.0,0.166846,39287.798568,-100558.7,-145816.395088,3683.3748,2724.766606,34.909101,998.541211,2725.75,2759.548669,2691.951331,2.479954,0.63684,0.0,0.0,2724.3,2759.666667,2722.666667,1.666483,0.414414,0.0,0.0,2681.0,2755.0,2718.0,2.714849,0.72973,0.0,1.127384,0.169958,17.491613,14.043442,2723.833333,2726.692308,2720.375581,2729.511689,0.939103,1.016026,-0.076923,0.331956,26.148689,-39.75,102.78617,102.78617,0.0,2718.5,2718.0,2724.5,2742.5,91.753071,0.0,0.0,0.0,23.76926,2443.225507,2436.21906,16.0,36.0,-20.0,0.0,0.0,49.268322,1.0,1.0,1.0,61.746435,63.328442,62.162162,67.296063,-37.837838,90.302941,-0.802334,0.006227,0.633344,0.503976,-9.943798,6.969853,6.072601,-0.145879,-0.145985,-0.145879,1.0,0.0
2,2,2017-01-06,1301,0.0,2734.0,2744.0,2720.0,2740.0,19900.0,-0.001092,-0.25,0.333333,6633.333333,0.083333,2734.666667,2740.0,24.0,-6.0,0.002195,7491160.0,7463680.0,7.916078,0.002192,10.503968,-1.16486,1.085094,5,0,0,0,0,1,2742.6,2725.75,2847.54,3088.225,3377.878,2740.444444,2741.464853,2741.770857,2741.940695,2741.976112,3060.666667,33400.0,0.166846,39287.798568,-1085427.0,-145816.395088,-11.576169,2724.766606,34.909101,998.541211,2725.75,2759.548669,2691.951331,2.479954,0.63684,0.0,0.0,2724.3,2759.333333,2718.666667,1.666483,0.52459,0.0,0.0,2681.0,2755.0,2718.0,2.714849,0.72973,0.0,1.127384,0.169958,17.491613,14.043442,2723.833333,2726.692308,2720.375581,2729.511689,0.939103,1.016026,-0.076923,0.331956,26.148689,-39.75,102.78617,102.78617,0.0,2718.5,2718.0,2724.5,2737.5,91.753071,0.0,0.0,0.0,23.76926,2443.225507,2436.21906,16.0,36.0,-20.0,0.0,1.0,49.268322,1.0,1.0,1.0,61.746435,63.328442,62.162162,67.296063,-37.837838,90.302941,-0.802334,0.006227,0.633344,0.503976,-9.943798,6.969853,6.072601,0.073046,0.073019,-0.072939,1.0,0.0
3,3,2017-01-10,1301,0.0,2745.0,2754.0,2735.0,2748.0,24200.0,-0.0051,-0.157895,0.184211,4457.894737,0.026316,2745.666667,2748.0,19.0,-3.0,0.001093,7543260.0,7532190.0,7.918992,0.001092,7.937254,-0.703934,1.12522,2,0,1,0,0,0,2742.6,2725.75,2847.54,3088.225,3377.878,2742.962963,2742.087248,2742.015137,2742.000986,2742.000159,11976.45614,57600.0,0.166846,39287.798568,981405.0,-145816.395088,85.193092,2724.766606,34.909101,998.541211,2725.75,2759.548669,2691.951331,2.479954,0.63684,0.0,0.0,2724.3,2760.666667,2720.666667,1.666483,0.683333,0.0,0.0,2681.0,2755.0,2718.0,2.714849,0.72973,0.0,1.127384,0.169958,17.491613,14.043442,2723.833333,2726.692308,2720.375581,2729.511689,0.939103,1.016026,-0.076923,0.331956,26.148689,-39.75,102.78617,102.78617,0.0,2718.5,2718.0,2724.5,2737.5,91.753071,0.0,0.0,0.0,23.76926,2443.225507,2436.21906,16.0,36.0,-20.0,0.0,0.0,49.268322,1.0,1.0,1.0,61.746435,63.328442,62.162162,67.296063,-37.837838,90.302941,-0.802334,0.006227,0.633344,0.503976,-9.943798,6.969853,6.072601,0.291971,0.291545,0.218818,1.0,0.0
4,4,2017-01-11,1301,0.0,2748.0,2752.0,2737.0,2745.0,9300.0,-0.003295,0.2,0.033333,310.0,0.233333,2744.666667,2745.0,15.0,3.0,-0.001092,7543260.0,7532224.0,7.917901,-0.001092,6.350853,-0.843252,0.933953,3,0,0,1,0,0,2742.6,2725.75,2847.54,3088.225,3377.878,2743.641975,2742.364653,2742.132191,2742.030827,2742.012135,12596.45614,48300.0,0.166846,39287.798568,0.0,-145816.395088,60.504096,2724.766606,34.909101,997.4511,2725.75,2759.548669,2691.951331,2.479954,0.63684,0.0,0.0,2724.3,2760.466667,2722.466667,1.666483,0.592982,0.0,0.0,2681.0,2755.0,2718.0,2.714849,0.72973,0.0,1.127384,0.169958,17.491613,14.043442,2723.833333,2726.692308,2720.375581,2729.511689,0.939103,1.016026,-0.076923,0.331956,26.148689,-39.75,102.78617,102.78617,0.0,2718.5,2718.0,2724.5,2737.5,91.753071,0.0,0.0,0.0,23.76926,2443.225507,2436.21906,16.0,36.0,-20.0,0.0,0.0,49.268322,1.0,1.0,1.0,61.746435,63.328442,62.162162,67.296063,-37.837838,90.302941,-0.802334,0.006227,0.633344,0.503976,-9.943798,6.969853,6.072601,-0.10917,-0.10923,0.109409,1.0,0.0


In [35]:
df.shape

(2332531, 127)

In [36]:
df.isnull().sum() * 100 / len(df)

index                        0.0
Date                         0.0
SecuritiesCode               0.0
ExpectedDividend             0.0
Open                         0.0
High                         0.0
Low                          0.0
Close                        0.0
Volume                       0.0
Target                       0.0
BOP                          0.0
Cpos                         0.0
bsforce                      0.0
Opos                         0.0
av                           0.0
vwap                         0.0
HLr                          0.0
OCr                          0.0
relC                         0.0
OC                           0.0
HL                           0.0
logC                         0.0
logR                         0.0
OHLCstd                      0.0
OHLCskew                     0.0
OHLCkur                      0.0
weekday                      0.0
Monday                       0.0
Tuesday                      0.0
Wednesday                    0.0
Thursday  

In [37]:
df = df.drop(columns=['index'])

In [None]:
df.to_csv('out.csv')