<div style="display:fill;
           background-color:#4F709C66;
           letter-spacing:0.5px;border-bottom: 2px solid black;">
<img src="https://raw.githubusercontent.com/IqmanS/Machine-Learning-Notebooks/main/optiver/optiver-banner.jpg">
<H2 style="padding: 20px; color:black; font-weight:600; font-family: 'Garamond', 'Lucida Sans', sans-serif; text-align: center; font-size: 38px;">📈 Optiver | Interactive Stock-Price Analysis </H2>
</div>


In [1]:
import numpy as np
import pandas as pd
import plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly import tools
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings("ignore")

In [2]:
train_data = pd.read_csv("/kaggle/input/optiver-trading-at-the-close/train.csv",nrows=200000)
train_data.head()

Unnamed: 0,stock_id,date_id,seconds_in_bucket,imbalance_size,imbalance_buy_sell_flag,reference_price,matched_size,far_price,near_price,bid_price,bid_size,ask_price,ask_size,wap,target,time_id,row_id
0,0,0,0,3180602.69,1,0.999812,13380276.64,,,0.999812,60651.5,1.000026,8493.03,1.0,-3.029704,0,0_0_0
1,1,0,0,166603.91,-1,0.999896,1642214.25,,,0.999896,3233.04,1.00066,20605.09,1.0,-5.519986,0,0_0_1
2,2,0,0,302879.87,-1,0.999561,1819368.03,,,0.999403,37956.0,1.000298,18995.0,1.0,-8.38995,0,0_0_2
3,3,0,0,11917682.27,-1,1.000171,18389745.62,,,0.999999,2324.9,1.000214,479032.4,1.0,-4.0102,0,0_0_3
4,4,0,0,447549.96,-1,0.999532,17860614.95,,,0.999394,16485.54,1.000016,434.1,1.0,-7.349849,0,0_0_4




<div class="alert alert-block alert-info">
<h2 style="text-align:center;">About 'train.csv'</h2>
<b> stock_id -</b> A unique identifier for the stock. Not all stock IDs exist in every time bucket.
<br><b> date_id -</b> A unique identifier for the date. Date IDs are sequential & consistent across all stocks.
<br><b> imbalance_size -</b> The amount unmatched at the current reference price (in USD).
<br><b> imbalance_buy_sell_flag -</b> An indicator reflecting the direction of auction imbalance.
<br><b> reference_price -</b> The price at which paired shares are maximized, the imbalance is minimized and the distance from the bid-ask midpoint is minimized, in that order. 
<br><b> matched_size -</b> The amount that can be matched at the current reference price (in USD).
<br><b> far_price -</b> The crossing price that will maximize the number of shares matched based on auction interest only. This calculation excludes continuous market orders.
<br><b> near_price -</b> The crossing price that will maximize the number of shares matched based auction and continuous market orders.
<br><b> [bid/ask]_price -</b> Price of the most competitive buy/sell level in the non-auction book.
<br><b> [bid/ask]_size -</b> The dollar notional amount on the most competitive buy/sell level in the non-auction book.
<br><b> wap -</b> The weighted average price in the non-auction book.
<br><b> seconds_in_bucket -</b> The number of seconds elapsed since the beginning of the day's closing auction, always starting from 0.
<br><b> target -</b> The 60 second future move in the wap of the stock, less the 60 second future move of the synthetic index.                   
</div>

<div>
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #263A29; font-weight: bold; font-size: 36px;">
    I. Correlation Heatmap
    </h1>
</div>
<hr>

In [3]:
corr = train_data.corr().round(2)  
mask = np.zeros_like(corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True
corr = corr.mask(mask).dropna(how='all')
fig = px.imshow(corr, text_auto=".2f", aspect=0.5,width=800, height=800,title='1. Correlation Plot')
fig.show()

<div>
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #263A29; font-weight: bold; font-size: 36px;">
    II. Imbalance Size vs Time
    </h1>
</div>
<hr>

In [4]:
stock678 = train_data.query("stock_id==6 | stock_id==7 | stock_id==8")
stock678["date_time"] = stock678["date_id"].astype(str)+"_"+stock678["time_id"].astype(str)
stock678.reset_index(inplace=True,drop=True)

fig = px.area(stock678, x=stock678.index//3, y=["imbalance_size"],color="stock_id",
              title="2. Imbalance Size of Stock #6,#7,#8 vs Time",line_shape='spline',
              color_discrete_map={6: "#EFB74F",7: "#247881",8:"#47207D"})

fig.add_hline(y=train_data.query("stock_id==6")["imbalance_size"].mean(), line_dash="dot", annotation_text="Average Imbalance #6", annotation_position="bottom right")
fig.add_hline(y=train_data.query("stock_id==7")["imbalance_size"].mean(), line_dash="dot", annotation_text="Average Imbalance #7", annotation_position="top right")
fig.add_hline(y=train_data.query("stock_id==8")["imbalance_size"].mean(), line_dash="dot", annotation_text="Average Imbalance #8", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

In [5]:
stock255075 = train_data.query("stock_id==25 | stock_id==50 | stock_id==75")
stock255075["date_time"] = stock255075["date_id"].astype(str)+"_"+stock255075["time_id"].astype(str)
stock255075.reset_index(inplace=True,drop=True)

fig = px.area(stock255075, x=stock255075.index//3, y=["imbalance_size"],color="stock_id",
              title="3. Imbalance Size of Stock #25,#50,#75 vs Time",line_shape='spline',
              color_discrete_map={25: "#F94C10",50: "#900C3F",75:"#071952"})
fig.add_hline(y=train_data.query("stock_id==25")["imbalance_size"].mean(), line_dash="dot", annotation_text="Average Imbalance #25", annotation_position="bottom right")
fig.add_hline(y=train_data.query("stock_id==50")["imbalance_size"].mean(), line_dash="dot", annotation_text="Average Imbalance #50", annotation_position="top left")
fig.add_hline(y=train_data.query("stock_id==75")["imbalance_size"].mean(), line_dash="dot", annotation_text="Average Imbalance #75", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

<div>
    <h1 style="font-family:  'Garamond', 'Lucida Sans', sans-serif; text-align: center; color: #263A29; font-weight: bold; font-size: 36px;">
    III. Matched Size vs Time
    </h1>
</div>
<hr>

In [6]:
fig = px.area(stock678, x=stock678.index//3, y=["matched_size"],color="stock_id",
              title="4. Matched Size of Stock #6,#7,#8 vs Time",line_shape='spline',
              color_discrete_map={6: "#EFB74F",7: "#247881",8:"#47207D"})

fig.add_hline(y=train_data.query("stock_id==6")["matched_size"].mean(), line_dash="dot", annotation_text="Average Matched Size #6", annotation_position="bottom right")
fig.add_hline(y=train_data.query("stock_id==7")["matched_size"].mean(), line_dash="dot", annotation_text="Average Matched Size #7", annotation_position="top right")
fig.add_hline(y=train_data.query("stock_id==8")["matched_size"].mean(), line_dash="dot", annotation_text="Average Matched Size #8", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

In [7]:
fig = px.area(stock255075, x=stock255075.index//3, y=["matched_size"],color="stock_id",
              title="4. Matched Size of Stock #25,#50,#75 vs Time",line_shape='spline',
              color_discrete_map={25: "#F94C10",50: "#900C3F",75:"#071952"})

fig.add_hline(y=stock255075.query("stock_id==25")["matched_size"].mean(), line_dash="dot", annotation_text="Average Matched Size #25", annotation_position="bottom right")
fig.add_hline(y=stock255075.query("stock_id==50")["matched_size"].mean(), line_dash="dot", annotation_text="Average Matched Size #50", annotation_position="top right")
fig.add_hline(y=stock255075.query("stock_id==75")["matched_size"].mean(), line_dash="dot", annotation_text="Average Matched Size #75", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

<div style="font-family:  'Garamond', sans-serif;  color: #263A29;">
    <h1 style="font-size: 36px;font-weight: bold;text-align: center;">
    IV. Bid Size vs Time
    </h1>
<hr>
</div>


In [8]:
fig = px.area(stock678, x=stock678.index//3, y=["bid_size"],color="stock_id",
              title="5. Bid Size of Stock  #6,#7,#8 vs Time",line_shape='spline',
              color_discrete_map={6: "#EFB74F",7: "#247881",8:"#47207D"})

fig.add_hline(y=stock678.query("stock_id==6")["bid_size"].mean(), line_dash="dot", annotation_text="Average Bid Size #6", annotation_position="bottom right")
fig.add_hline(y=stock678.query("stock_id==7")["bid_size"].mean(), line_dash="dot", annotation_text="Average Bid Size #7", annotation_position="top right")
fig.add_hline(y=stock678.query("stock_id==8")["bid_size"].mean(), line_dash="dot", annotation_text="Average Bid Size #8", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

In [9]:
fig = px.area(stock255075, x=stock255075.index//3, y=["bid_size"],color="stock_id",
              title="6. Bid Size of Stock #25,#50,#75 vs Time",line_shape='spline',
              color_discrete_map={25: "#F94C10",50: "#900C3F",75:"#071952"})

fig.add_hline(y=stock255075.query("stock_id==25")["bid_size"].mean(), line_dash="dot", annotation_text="Average Bid Size #25", annotation_position="bottom right")
fig.add_hline(y=stock255075.query("stock_id==50")["bid_size"].mean(), line_dash="dot", annotation_text="Average Bid Size #50", annotation_position="top right")
fig.add_hline(y=stock255075.query("stock_id==75")["bid_size"].mean(), line_dash="dot", annotation_text="Average Bid Size #75", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

<div style="font-family:  'Garamond', sans-serif;  color: #263A29;">
    <h1 style="font-size: 36px;font-weight: bold;text-align: center;">
    V. Ask Size vs Time
    </h1>
<hr>
</div>


In [10]:
fig = px.area(stock678, x=stock678.index//3, y=["ask_size"],color="stock_id",
              title="7. Ask Size of Stock  #6,#7,#8 vs Time",line_shape='spline',
              color_discrete_map={6: "#EFB74F",7: "#247881",8:"#47207D"})

fig.add_hline(y=stock678.query("stock_id==6")["ask_size"].mean(), line_dash="dot", annotation_text="Average Ask Size #6", annotation_position="bottom right")
fig.add_hline(y=stock678.query("stock_id==7")["ask_size"].mean(), line_dash="dot", annotation_text="Average Ask Size #7", annotation_position="top right")
fig.add_hline(y=stock678.query("stock_id==8")["ask_size"].mean(), line_dash="dot", annotation_text="Average Ask Size #8", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

In [11]:
fig = px.area(stock255075, x=stock255075.index//3, y=["ask_size"],color="stock_id",
              title="8. Ask Size of Stock  #25,#50,#75 vs Time",line_shape='spline',
              color_discrete_map={25: "#F94C10",50: "#900C3F",75:"#071952"})

fig.add_hline(y=stock255075.query("stock_id==25")["ask_size"].mean(), line_dash="dot", annotation_text="Average Ask Size #25", annotation_position="bottom right")
fig.add_hline(y=stock255075.query("stock_id==50")["ask_size"].mean(), line_dash="dot", annotation_text="Average Ask Size #50", annotation_position="top right")
fig.add_hline(y=stock255075.query("stock_id==75")["ask_size"].mean(), line_dash="dot", annotation_text="Average Ask Size #75", annotation_position="bottom right")
fig.update_layout(xaxis_title="Time", yaxis_title="Size (in USD).",legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
fig.show()

<div style="font-family:  'Garamond', sans-serif;  color: #263A29;">
    <h1 style="font-size: 36px;font-weight: bold;text-align: center;">
    VI. Count of Buy, Sell & No-Imbalance
    </h1>
<hr>
</div>


In [12]:
count = pd.DataFrame()
count["imb"] = ["buy-imbalance","sell-imbalance","no-imbalance"]
count["count"] = [len(train_data[train_data["imbalance_buy_sell_flag"] ==1]),len(train_data[train_data["imbalance_buy_sell_flag"] ==-1]),len(train_data[train_data["imbalance_buy_sell_flag"] ==0])]

fig = px.bar(count,x="imb",y="count",color="imb", title="9.  Count of Buy, Sell & No-Imbalance")
fig.show()