In [2]:
#Data & Frame
import pandas as pd
import numpy as np
import yfinance as yf

#Visualization
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import seaborn as sns

# For data modeling
from xgboost import XGBClassifier
from xgboost import XGBRegressor
from xgboost import plot_importance

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# For metrics and helpful functions
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score,\
f1_score, confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.tree import plot_tree

# For saving models
import pickle

import warnings


In [3]:
# Suppress all warnings
warnings.filterwarnings("ignore")

# Display all columns in a dataset
pd.set_option('display.max_columns',None)

In [4]:
sector_data = pd.read_csv('/Users/xonkar/Desktop/Data_Projects/Options/Technology/Structured_Data.csv')
sector_data

Unnamed: 0,CreatedDate,CreatedTime,Symbol,Type,Volume,Price,Side,Money,CallPut,Strike,Spot,Premium,ExpirationDate,Color,ImpliedVolatility,Dte,ER,StockEtf,Sector,Uoa,Weekly,MktCap,OI,1D After,3D After,5D After,1D Price,3D Price,5D Price,1D Performance,3D Performance,5D Performance,Distance,1PN,3PN,5PN,WeekDay
0,2023-01-09,3:59:57 PM,MSFT,SWEEP,200,6.75,A,OTM,CALL,235.0,227.14,135000,2023-02-17,WHITE,0.33,39,F,STOCK,Information Technology,F,F,1676733000000,16193,2023-01-10,2023-01-12,2023-01-16,228.85,238.51,240.35,0.75,5.01,5.82,3.46,positive,positive,positive,Monday
1,2023-01-09,3:59:44 PM,BABA,BLOCK,304,3.90,B,OTM,CALL,121.0,110.84,118560,2023-02-10,YELLOW,0.58,32,F,STOCK,Information Technology,F,F,291151000000,23,2023-01-10,2023-01-12,2023-01-16,114.88,113.15,115.19,3.65,2.09,3.93,9.17,positive,positive,positive,Monday
2,2023-01-09,3:58:22 PM,NVDA,BLOCK,250,8.46,B,ITM,CALL,150.0,156.19,211500,2023-01-13,MAGENTA,0.74,4,F,STOCK,Information Technology,F,T,365531000000,3918,2023-01-10,2023-01-12,2023-01-16,159.09,165.11,177.02,1.86,5.71,13.34,-3.96,positive,positive,positive,Monday
3,2023-01-09,3:58:16 PM,BABA,SWEEP,500,14.85,B,ITM,CALL,100.0,110.92,742522,2023-02-17,WHITE,0.59,39,F,STOCK,Information Technology,F,F,291151000000,9468,2023-01-10,2023-01-12,2023-01-16,114.88,113.15,115.19,3.57,2.01,3.85,-9.84,positive,positive,positive,Monday
4,2023-01-09,3:58:05 PM,BABA,SWEEP,250,14.88,B,ITM,CALL,100.0,110.92,371961,2023-02-17,WHITE,0.59,39,F,STOCK,Information Technology,F,F,291151000000,9468,2023-01-10,2023-01-12,2023-01-16,114.88,113.15,115.19,3.57,2.01,3.85,-9.84,positive,positive,positive,Monday
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,2023-01-09,9:30:02 AM,CRM,BLOCK,1000,21.90,B,OTM,CALL,150.0,143.53,2190000,2024-01-19,YELLOW,0.39,375,F,STOCK,Information Technology,F,F,140510000000,755,2023-01-10,2023-01-12,2023-01-16,147.44,149.60,148.47,2.72,4.23,3.44,4.51,positive,positive,positive,Monday
1299,2023-01-09,9:30:00 AM,AAPL,ML/,999,6.15,B,OTM,CALL,145.0,130.70,614555,2023-06-16,WHITE,0.32,157,F,STOCK,Information Technology,F,F,2062010000000,13919,2023-01-10,2023-01-12,2023-01-16,130.73,133.41,135.94,0.02,2.07,4.01,10.94,positive,positive,positive,Monday
1300,2023-01-09,9:30:00 AM,AAPL,ML/,999,10.25,BB,ATM,PUT,130.0,130.70,1023805,2023-06-16,WHITE,0.33,157,F,STOCK,Information Technology,F,F,2062010000000,71667,2023-01-10,2023-01-12,2023-01-16,130.73,133.41,135.94,0.02,2.07,4.01,-0.54,positive,positive,positive,Monday
1301,2023-01-09,9:30:00 AM,AMD,ML/,998,4.78,A,ITM,PUT,70.0,66.00,477041,2023-01-20,WHITE,0.49,11,F,STOCK,Information Technology,F,F,103126000000,61125,2023-01-10,2023-01-12,2023-01-16,68.05,70.80,71.59,3.11,7.27,8.47,6.06,positive,positive,positive,Monday


## Check Stats and Corresponding Flow

In [5]:
sector_data.describe()

Unnamed: 0,Volume,Price,Strike,Spot,Premium,ImpliedVolatility,Dte,MktCap,OI,1D Price,3D Price,5D Price,1D Performance,3D Performance,5D Performance,Distance
count,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0,1303.0
mean,831.721412,23.087475,146.280123,129.300384,1354432.0,0.591358,87.839601,600070600000.0,7317.810437,129.74317,133.201443,135.978358,0.588127,2.928281,5.270967,18.490629
std,1516.502288,34.497039,101.125413,91.405175,2848694.0,0.495703,144.4226,787302600000.0,12745.999311,90.893624,95.198603,96.410166,2.715707,3.990783,6.079894,49.565312
min,80.0,0.2,2.5,5.06,100000.0,0.0,4.0,0.0,0.0,5.75,6.16,6.18,-5.0,-15.17,-19.79,-90.77
25%,206.0,4.455,95.0,69.795,172736.0,0.35,11.0,46484000000.0,614.5,70.03,71.8,75.1,-0.96,0.94,2.605,-2.18
50%,398.0,9.75,130.0,122.4,351234.0,0.52,39.0,245899000000.0,2657.0,126.93,131.63,130.62,0.01,2.43,4.11,2.18
75%,845.5,28.24,165.0,150.86,1175628.0,0.665,101.0,1036536000000.0,8919.5,154.325,156.75,159.8,1.7,4.13,7.12,25.895
max,22046.0,303.4,925.0,917.8,31120900.0,4.91,892.0,2062010000000.0,100711.0,915.78,1024.31,1070.0,23.3,38.86,65.33,532.91


In [6]:
# Find the maximum value in the 'Distance' column
max_distance = sector_data['Distance'].max()

# Filter the DataFrame to retrieve rows where the 'Distance' column equals the maximum value
largest_value = sector_data[sector_data['Distance'] == max_distance]

# Print or display the filtered DataFrame
largest_value

Unnamed: 0,CreatedDate,CreatedTime,Symbol,Type,Volume,Price,Side,Money,CallPut,Strike,Spot,Premium,ExpirationDate,Color,ImpliedVolatility,Dte,ER,StockEtf,Sector,Uoa,Weekly,MktCap,OI,1D After,3D After,5D After,1D Price,3D Price,5D Price,1D Performance,3D Performance,5D Performance,Distance,1PN,3PN,5PN,WeekDay
376,2023-01-09,2:33:10 PM,DOCU,ML/,290,302.3,B,ITM,PUT,360.0,56.88,8766700,2023-01-20,WHITE,0.0,11,F,STOCK,Information Technology,F,F,10966000000,400,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.06,3.2,4.99,532.91,positive,positive,positive,Monday


## Visualize Performance Relation To Two Other Variables 

In [7]:
import plotly.graph_objects as go

# Define hover text with only the Symbol column
hover_text = sector_data['Symbol']

# Define colors based on '5D Performance' values
colors = ['red' if p < 0 else 'green' for p in sector_data['5D Performance']]

# Create a trace for the scatter plot
trace = go.Scatter3d(
    x=sector_data['Distance'],
    y=sector_data['Dte'],
    z=sector_data['5D Performance'],
    mode='markers',
    marker=dict(
        size=5,
        color=colors,           # Set color based on '5D Performance' values
        opacity=0.6             # Marker opacity
    ),
    hovertext=hover_text       # Set hover text to show only Symbol
)

# Create layout for the plot
layout = go.Layout(
    scene=dict(
        xaxis=dict(title='Distance'),
        yaxis=dict(title='Dte'),
        zaxis=dict(title='Performance')
    ),
    title='3D Scatter Plot: Distance vs Dte vs Performance'
)

# Create the figure
fig = go.Figure(data=[trace], layout=layout)

# Show the interactive plot
fig.show()


## Scan for Specific Stock Flow 

In [9]:
# Create a filter for the desired stocks
desired_stocks = ['DOCU']  # Replace with the stocks you want to filter
# Filter the DataFrame based on the 'Symbol' column
filtered_df = sector_data[sector_data['Symbol'].isin(desired_stocks)]
filtered_df = filtered_df.reset_index(drop=True)

# Display the filtered DataFrame
filtered_df


Unnamed: 0,CreatedDate,CreatedTime,Symbol,Type,Volume,Price,Side,Money,CallPut,Strike,Spot,Premium,ExpirationDate,Color,ImpliedVolatility,Dte,ER,StockEtf,Sector,Uoa,Weekly,MktCap,OI,1D After,3D After,5D After,1D Price,3D Price,5D Price,1D Performance,3D Performance,5D Performance,Distance,1PN,3PN,5PN,WeekDay
0,2023-01-09,2:46:41 PM,DOCU,ML/,150,20.7,A,ITM,PUT,77.5,56.9,310500,2023-01-20,WHITE,0.99,11,F,STOCK,Information Technology,F,F,10966000000,372,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.02,3.16,4.96,36.2,positive,positive,positive,Monday
1,2023-01-09,2:46:41 PM,DOCU,ML/,150,303.4,A,ITM,PUT,360.0,56.9,4551000,2023-01-20,MAGENTA,4.91,11,F,STOCK,Information Technology,F,F,10966000000,400,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.02,3.16,4.96,532.69,positive,positive,positive,Monday
2,2023-01-09,2:46:41 PM,DOCU,ML/,230,243.12,A,ITM,PUT,300.0,56.9,5591750,2023-01-20,YELLOW,4.08,11,F,STOCK,Information Technology,F,F,10966000000,206,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.02,3.16,4.96,427.24,positive,positive,positive,Monday
3,2023-01-09,2:46:41 PM,DOCU,ML/,230,23.25,A,ITM,PUT,80.0,56.9,534750,2023-01-20,WHITE,1.13,11,F,STOCK,Information Technology,F,F,10966000000,328,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.02,3.16,4.96,40.6,positive,positive,positive,Monday
4,2023-01-09,2:37:44 PM,DOCU,ML/,500,43.05,B,ITM,PUT,100.0,56.88,2152500,2023-01-20,YELLOW,1.21,11,F,STOCK,Information Technology,F,F,10966000000,438,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.06,3.2,4.99,75.81,positive,positive,positive,Monday
5,2023-01-09,2:37:44 PM,DOCU,ML/,1060,53.18,A,ITM,PUT,110.0,56.88,5637300,2023-01-20,WHITE,1.81,11,F,STOCK,Information Technology,F,F,10966000000,2131,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.06,3.2,4.99,93.39,positive,positive,positive,Monday
6,2023-01-09,2:37:44 PM,DOCU,ML/,560,133.3,A,ITM,PUT,190.0,56.88,7464800,2023-01-20,YELLOW,3.27,11,F,STOCK,Information Technology,F,F,10966000000,500,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.06,3.2,4.99,234.04,positive,positive,positive,Monday
7,2023-01-09,2:33:10 PM,DOCU,ML/,470,213.67,A,ITM,PUT,270.0,56.88,10042700,2024-01-19,YELLOW,1.3,375,F,STOCK,Information Technology,F,F,10966000000,425,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.06,3.2,4.99,374.68,positive,positive,positive,Monday
8,2023-01-09,2:33:10 PM,DOCU,ML/,760,53.15,A,ITM,PUT,110.0,56.88,4039400,2023-01-20,WHITE,1.75,11,F,STOCK,Information Technology,F,F,10966000000,2131,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.06,3.2,4.99,93.39,positive,positive,positive,Monday
9,2023-01-09,2:33:10 PM,DOCU,ML/,290,302.3,B,ITM,PUT,360.0,56.88,8766700,2023-01-20,WHITE,0.0,11,F,STOCK,Information Technology,F,F,10966000000,400,2023-01-10,2023-01-12,2023-01-16,58.05,58.7,59.72,2.06,3.2,4.99,532.91,positive,positive,positive,Monday
