Import dependencies

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
#Set up cell


#Setup plotly template
temp = dict(layout=go.Layout(font=dict(family="Franklin Gothic", size=12), width=800))

#Setup plotly colors
colors=px.colors.qualitative.Plotly

Import and visualize a summary of the data

In [3]:
#Read data
stock_prices = pd.read_csv('../train_files/stock_prices.csv')
stock_list = pd.read_csv('../stock_list.csv')

In [4]:
#8-value descriptor of the data
display(stock_prices.describe().style.format('{:,.2f}'))

Unnamed: 0,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,Target
count,2332531.0,2324923.0,2324923.0,2324923.0,2324923.0,2332531.0,2332531.0,18865.0,2332293.0
mean,5894.84,2594.51,2626.54,2561.23,2594.02,691936.56,1.0,22.02,0.0
std,2404.16,3577.19,3619.36,3533.49,3576.54,3911255.94,0.07,29.88,0.02
min,1301.0,14.0,15.0,13.0,14.0,0.0,0.1,0.0,-0.58
25%,3891.0,1022.0,1035.0,1009.0,1022.0,30300.0,1.0,5.0,-0.01
50%,6238.0,1812.0,1834.0,1790.0,1811.0,107100.0,1.0,15.0,0.0
75%,7965.0,3030.0,3070.0,2995.0,3030.0,402100.0,1.0,30.0,0.01
max,9997.0,109950.0,110500.0,107200.0,109550.0,643654000.0,20.0,1070.0,1.12


In [5]:
print(stock_prices.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2332531 entries, 0 to 2332530
Data columns (total 12 columns):
 #   Column            Dtype  
---  ------            -----  
 0   RowId             object 
 1   Date              object 
 2   SecuritiesCode    int64  
 3   Open              float64
 4   High              float64
 5   Low               float64
 6   Close             float64
 7   Volume            int64  
 8   AdjustmentFactor  float64
 9   ExpectedDividend  float64
 10  SupervisionFlag   bool   
 11  Target            float64
dtypes: bool(1), float64(7), int64(2), object(2)
memory usage: 198.0+ MB
None


In [6]:
stock_prices.head()

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.00073
1,20170104_1332,2017-01-04,1332,568.0,576.0,563.0,571.0,2798500,1.0,,False,0.012324
2,20170104_1333,2017-01-04,1333,3150.0,3210.0,3140.0,3210.0,270800,1.0,,False,0.006154
3,20170104_1376,2017-01-04,1376,1510.0,1550.0,1510.0,1550.0,11300,1.0,,False,0.011053
4,20170104_1377,2017-01-04,1377,3270.0,3350.0,3270.0,3330.0,150800,1.0,,False,0.003026


In [7]:
stock_prices.isna().sum()

RowId                     0
Date                      0
SecuritiesCode            0
Open                   7608
High                   7608
Low                    7608
Close                  7608
Volume                    0
AdjustmentFactor          0
ExpectedDividend    2313666
SupervisionFlag           0
Target                  238
dtype: int64

So we can get rid of ExpectedDividend

In [8]:
group_stock_prices_date_SecCod = stock_prices.groupby(['Date', 'SecuritiesCode'])
group_stock_prices_date_SecCod

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001DC8B2ADF10>

In [9]:
stock_list.head()

Unnamed: 0,SecuritiesCode,EffectiveDate,Name,Section/Products,NewMarketSegment,33SectorCode,33SectorName,17SectorCode,17SectorName,NewIndexSeriesSizeCode,NewIndexSeriesSize,TradeDate,Close,IssuedShares,MarketCapitalization,Universe0
0,1301,20211230,"KYOKUYO CO.,LTD.",First Section (Domestic),Prime Market,50,"Fishery, Agriculture and Forestry",1,FOODS,7,TOPIX Small 2,20211230.0,3080.0,10928280.0,33659110000.0,True
1,1305,20211230,Daiwa ETF-TOPIX,ETFs/ ETNs,,-,-,-,-,-,-,20211230.0,2097.0,3634636000.0,7621831000000.0,False
2,1306,20211230,NEXT FUNDS TOPIX Exchange Traded Fund,ETFs/ ETNs,,-,-,-,-,-,-,20211230.0,2073.5,7917718000.0,16417390000000.0,False
3,1308,20211230,Nikko Exchange Traded Index Fund TOPIX,ETFs/ ETNs,,-,-,-,-,-,-,20211230.0,2053.0,3736943000.0,7671945000000.0,False
4,1309,20211230,NEXT FUNDS ChinaAMC SSE50 Index Exchange Trade...,ETFs/ ETNs,,-,-,-,-,-,-,20211230.0,44280.0,72632.0,3216145000.0,False


In [10]:
#Data to plot
data_date = stock_prices.Date.unique()
returns = stock_prices.groupby('Date')['Target'].mean().mul(100).rename('Average Return')
close_avg = stock_prices.groupby('Date')['Close'].mean().rename('Closing Price')
vol_avg = stock_prices.groupby('Date')['Volume'].mean().rename('Volume')

In [11]:
#Use plotly make_subplots

fig = make_subplots(rows=3, cols=1,
                    shared_xaxes=True)

for i, j in enumerate([returns, close_avg, vol_avg]):
    fig.add_trace(go.Scatter(x=data_date, y=j, mode='lines',
                             name=j.name, marker_color=colors[i]), row=i+1, col=1)

fig.update_xaxes(rangeslider_visible=False,
                 rangeselector=dict(
                     buttons=list([
                         dict(count=6, label='6m', step='month', stepmode='backward'),
                         dict(count=1, label='1y', step='year', stepmode='backward'),
                         dict(count=2, label='2y', step='year', stepmode='backward'),
                         dict(step='all')])),
                         row=1, col=1)

fig.update_layout(template=temp, title='JPX Market Average Stock Return, Closing Price, and Shares Traded',
                  hovermode='x unified', height=700,
                  yaxis1=dict(title='Stock Return', ticksuffix='%'),
                  yaxis2_title='Closing Price', yaxis3_title='Shares Traded',
                  showlegend=False)

fig.show()