### Imports

In [1]:
import pandas as pd
from datetime import timedelta
from datetime import datetime as dt
import datetime
import yfinance as yf
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import RandomizedSearchCV
from imblearn.over_sampling import ADASYN
from xgboost import XGBClassifier
import xgboost as xgb
import pytz
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score
from sklearn.feature_selection import SelectFromModel
import tensorflow as tf
from tensorflow import keras
from keras import layers, callbacks
from sklearn.utils.class_weight import compute_class_weight
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.pipeline import Pipeline
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.regularizers import l2
from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from keras.models import Sequential
import keras_tuner as kt
from kerastuner.tuners import RandomSearch
import joblib
import itertools
import lightgbm as lgb

  from kerastuner.tuners import RandomSearch


### Helper Functions

In [2]:
def next_four_fridays():
    # Get today's date and time in EST timezone
    est = pytz.timezone('US/Eastern')
    today = dt.now(est)
    
    # Find the next Friday
    days_until_friday = (4 - today.weekday() + 7) % 7
    next_friday = today + timedelta(days=days_until_friday)
    
    # Reset the time to 4:30 PM on the next Friday
    next_friday = next_friday.replace(hour=16, minute=30, second=0, microsecond=0)
    
    # If today is Friday and it's past 4:30 PM, consider the next week's Friday
    if today.weekday() == 4 and today.time() > next_friday.time():
        next_friday += timedelta(days=7)
    
    # Remove time from datetime for output
    next_friday = next_friday.date()
    
    # Find the Friday after the next
    following_friday = next_friday + timedelta(days=7)
    
    # Find the Friday after the following
    next_next_friday = following_friday + timedelta(days=7)
    
    # Find the Friday after the next next
    next_next_next_friday = next_next_friday + timedelta(days=7)
    
    return next_friday.strftime('%Y-%m-%d'), following_friday.strftime('%Y-%m-%d'), next_next_friday.strftime('%Y-%m-%d'), next_next_next_friday.strftime('%Y-%m-%d')

# Get the next four Fridays
friday1, friday2, friday3, friday4 = next_four_fridays()
print(f"The next four Fridays are: {friday1}, {friday2}, {friday3}, and {friday4}")

The next four Fridays are: 2023-10-27, 2023-11-03, 2023-11-10, and 2023-11-17


# Viewing option chains

In [4]:
print(friday1)

2023-10-27


In [5]:
file_viewer = pd.read_csv('Data/naked_puts_results.csv')
filtered_data = file_viewer[file_viewer['Expiration Date'] == friday1]

# Display all rows, no limit
pd.set_option('display.max_rows', None)
# Display all columns, no limit
pd.set_option('display.max_columns', None)

filtered_data['ROI (%)'] = filtered_data['ROI (%)'].astype(float)
# filter out rows with ROI (%) greater than 0.8
# filtered_data = filtered_data[filtered_data['ROI (%)'] > 0.8]
# filter out rows with strike price less than 6
# filtered_data = filtered_data[filtered_data['strike'] <= 7]

# sort by ROI (%) in ascending order
filtered_data = filtered_data.sort_values(by=['ROI (%)'], ascending=False)

# Show only the columns 'Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 'ROI (%)', 'OTM (%)', 'Implied Volatility', 'volume', 'openInterest', 'highPrice', 'Original Stock Price', 'Original ROI (%)', 'Original OTM (%)', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', '52WeekHigh', and '52WeekLow'
filtered_data = filtered_data[['Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 'ROI (%)', 'OTM (%)', 'Implied Volatility', 'volume', 'openInterest', 'highPrice', 'Original Stock Price', 'Original ROI (%)',
    'Original OTM (%)', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', '52WeekHigh', '52WeekLow']]

# Show only the top 10 rows
filtered_data.head(20)

Unnamed: 0,Expiration Date,ETF,Stock Price,bid,strike,ROI (%),OTM (%),Implied Volatility,volume,openInterest,highPrice,Original Stock Price,Original ROI (%),Original OTM (%),50-day MA,100-day MA,200-day MA,RSI,MACD,VWAP,52WeekHigh,52WeekLow
104,2023-10-27,SNAP,9.44,0.72,9.0,8.7,4.66,1.71,2426.0,4439.0,0.72,9.44,8.7,4.66,9.25,10.29,10.25,62.15,0.08,10.13,13.51,7.76
121,2023-10-27,WE,2.05,0.15,2.0,8.11,2.44,1.88,60.0,319.0,0.15,2.05,8.11,2.44,3.91,6.55,22.92,28.37,-0.55,10.75,122.4,2.05
21,2023-10-27,CGC,0.51,0.03,0.5,6.38,1.96,1.38,296.0,2879.0,0.03,0.51,6.38,1.96,0.72,0.63,1.28,27.39,-0.06,1.27,4.32,0.37
103,2023-10-27,SNAP,9.44,0.48,8.5,5.99,9.96,1.66,1646.0,13223.0,0.48,9.44,5.99,9.96,9.25,10.29,10.25,62.15,0.08,10.13,13.51,7.76
62,2023-10-27,NKLA,1.02,0.05,1.0,5.26,1.96,1.09,1013.0,23774.0,0.05,1.02,5.26,1.96,1.35,1.5,1.58,24.34,-0.09,1.66,3.79,0.54
60,2023-10-27,MPW,4.7,0.22,4.5,5.14,4.26,1.16,3912.0,4226.0,0.22,4.7,5.14,4.26,6.07,7.61,8.39,38.72,-0.39,8.77,12.6,4.64
12,2023-10-27,AMC,9.08,0.44,9.0,5.14,0.88,0.91,7266.0,5760.0,0.44,9.08,5.14,0.88,14.02,30.06,41.37,56.73,-1.14,25.83,81.7,7.18
61,2023-10-27,MVIS,2.01,0.09,2.0,4.71,0.5,0.97,100.0,273.0,0.09,2.01,4.71,0.5,2.33,3.36,2.96,45.05,-0.09,3.81,7.65,1.82
88,2023-10-27,QS,5.84,0.24,5.5,4.56,5.82,1.17,278.0,1239.0,0.24,5.84,4.56,5.82,6.7,7.63,7.65,40.0,-0.22,7.99,13.31,5.18
111,2023-10-27,SPWR,5.09,0.2,5.0,4.17,1.77,0.87,169.0,180.0,0.2,5.09,4.17,1.77,6.55,8.38,11.32,38.33,-0.35,12.57,24.49,5.09


In [6]:
# Read in data
df = pd.read_csv('Data/putsDataSuccessFailed.csv')

# Define function to color rows 
def color_row(row):
    color = 'lightgreen' if row['Status'] == 'Success' else 'lightcoral'
    return ['background-color: %s' % color]*len(row)

# Filter columns to keep
columns = ['ETF', 'Original Stock Price', 'Stock Price', 'strike', 'volume',  
           'openInterest', 'highPrice', 'Original ROI (%)', 'Original OTM (%)',
           '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP',
           '52WeekHigh', '52WeekLow', 'Status']

# Take top 100 rows ordered by Original ROI (%)
df = df[columns].sort_values('Original ROI (%)', ascending=False).head(100)

# Style DataFrame 
styled_df = (df.style  
             .applymap(lambda x: 'color: black') # Set text color
             .apply(color_row, axis=1) # Set row colors
             .format('{:.2f}', subset=df.select_dtypes(include=['float64']).columns))

# Display DataFrame 
styled_df

Unnamed: 0,ETF,Original Stock Price,Stock Price,strike,volume,openInterest,highPrice,Original ROI (%),Original OTM (%),50-day MA,100-day MA,200-day MA,RSI,MACD,VWAP,52WeekHigh,52WeekLow,Status
10,AGRI,2.97,2.4,2.0,1.0,0.0,1.7,566.67,32.66,5.54,8.39,26.74,30.34,-0.59,8.49,80.0,2.97,Success
118,HLGN,2.57,2.18,2.5,10.0,61.0,2.05,455.56,2.72,2.47,1.36,0.87,32.64,-0.08,0.45,8.47,0.18,Failed
254,VCSA,9.32,7.45,5.0,1.0,0.0,4.0,400.0,46.35,10.61,12.34,18.0,52.48,-0.29,27.4,78.6,8.12,Success
168,OTMO,3.92,,2.5,30.0,30.0,2.0,400.0,36.22,3.77,2.06,1.27,38.86,0.11,0.46,4.52,0.23,Failed
745,WE,4.31,3.1,1.0,1.0,27.0,0.75,300.0,76.8,7.38,9.26,31.8,45.2,-0.84,13.41,145.2,2.65,Success
83,FBIO,2.9,1.86,2.5,10.0,20.0,1.85,284.62,13.79,5.68,7.09,9.27,28.81,-0.57,9.32,16.5,2.9,Failed
7,AGLE,12.98,13.25,2.5,2.0,53.0,1.8,257.14,80.74,12.65,10.89,9.77,57.58,-0.15,14.17,38.5,2.65,Success
153,NLTX,3.61,3.28,2.5,,3.0,1.55,163.16,30.75,3.7,3.8,3.58,45.21,0.04,3.26,4.62,2.01,Success
262,WISH,4.26,3.89,1.0,2.0,3.0,0.61,156.41,76.53,4.92,6.33,10.11,39.38,-0.2,12.26,28.35,4.12,Success
12,AKA,5.2,7.83,2.5,1.0,11.0,1.45,138.1,51.92,5.54,5.4,8.44,45.05,-0.26,7.26,24.48,3.85,Success


In [7]:
# Load the CSV file into a DataFrame
df = pd.read_csv('Data/putsDataSuccessFailed.csv')

# Data Preprocessing
# Drop any columns that are not relevant for prediction
df.drop(['contractSymbol', 'Expiration Date', 'ETF'], axis=1, inplace=True)

# Encode categorical variables using one-hot encoding
df = pd.get_dummies(df, columns=['recommendationKey'], drop_first=True)

# Handle missing values with imputation
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
X = df.drop('Status', axis=1)
y = df['Status']

# Encode the target variable 'Status'
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_imputed = imputer.fit_transform(X)

# Address class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_imputed, y_encoded)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Model Selection and Hyperparameter Tuning (LightGBM)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.05, 0.1, 0.2]
}

# Use GridSearchCV for hyperparameter tuning
lgb_classifier = lgb.LGBMClassifier(random_state=42)
grid_search = GridSearchCV(lgb_classifier, param_grid, cv=10, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_lgb_model = grid_search.best_estimator_

# Model Evaluation
y_pred = best_lgb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Best LightGBM Model: {best_lgb_model}')
print(f'Accuracy: {accuracy}')
print(report)


import joblib

# Dump the label_encoder to a file
label_encoder_filename = "label_encoder.joblib"
joblib.dump(label_encoder, label_encoder_filename)

# Save the trained LightGBM model and scaler to separate files
model_filename = "model.joblib"
scaler_filename = "scaler.joblib"

joblib.dump(best_lgb_model, model_filename)
joblib.dump(imputer, scaler_filename)

# Making Predictions on New Data
# Load the new data
new_data = pd.read_csv("Data/naked_puts_results.csv")

# Make a copy of the new data for predictions
new_data_with_predictions = new_data.copy()

# Apply the same preprocessing steps as before
# Drop columns not relevant for prediction
new_data.drop(['contractSymbol', 'Expiration Date', 'ETF'], axis=1, inplace=True)

# Encode categorical variables using one-hot encoding
new_data = pd.get_dummies(new_data, columns=['recommendationKey'], drop_first=True)

# Handle missing values with imputation
imputer = SimpleImputer(strategy='mean')
new_X_imputed = imputer.fit_transform(new_data)  # No need to drop 'Status'

# Load the label encoder
label_encoder = joblib.load("label_encoder.joblib")

# Load the trained LightGBM model
best_lgb_model = joblib.load("model.joblib")

# Make predictions on the new data
new_predictions = best_lgb_model.predict(new_X_imputed)

# Convert numerical predictions back to labels ("Success" or "Failed")
new_predictions_labels = label_encoder.inverse_transform(new_predictions)

# Add predictions to the new data
new_data_with_predictions["Guess"] = new_predictions_labels

# Save the new data with predictions, including 'contractSymbol', 'Expiration Date', and 'ETF'
new_data_with_predictions.to_csv("Data/new_data_with_predictions.csv", index=False)

[LightGBM] [Info] Number of positive: 542, number of negative: 524
[LightGBM] [Info] Number of positive: 542, number of negative: 525
[LightGBM] [Info] Number of positive: 542, number of negative: 524
[LightGBM] [Info] Number of positive: 541, number of negative: 525
[LightGBM] [Info] Number of positive: 541, number of negative: 525
[LightGBM] [Info] Number of positive: 542, number of negative: 524
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002646 seconds.
You can set `force_col_wise=true` to remove the overhead.[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004818 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.

[LightGBM] [Info] Total Bins 8137
[LightGBM] [Info] Total Bins 8109
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003108 seconds.
You can set `force_col_wise=true` to r

In [8]:
# Read in data 
df = pd.read_csv('Data/new_data_with_predictions.csv')

# Sort by Original ROI column  
df = df.sort_values(by='Original ROI (%)', ascending=False)  

# Filter to top 300 rows
df = df.head(300)

# Filter columns to show
columns = ['Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 
           'ROI (%)', 'OTM (%)', 'recommendationKey', 'targetLowPrice', 'targetMeanPrice', 'targetHighPrice', 'Guess']
df = df[columns]

# Define row color function
def color_row(row):
    color = 'lightgreen' if row['Guess'] == 'Success' else 'lightcoral'
    return ['background-color: %s' % color]*len(row)

# Apply cell and row styling
styled_df = (df.style
                .applymap(lambda x: 'color: black') # Black text
                .apply(color_row, axis=1) # Color rows
                .format('{:.2f}', subset=df.select_dtypes(include=['float64']).columns)) 

# Display styled DataFrame
styled_df

Unnamed: 0,Expiration Date,ETF,Stock Price,bid,strike,ROI (%),OTM (%),recommendationKey,targetLowPrice,targetMeanPrice,targetHighPrice,Guess
104,2023-10-27,SNAP,9.44,0.72,9.0,8.7,4.66,hold,6.0,9.65,15.0,Success
121,2023-10-27,WE,2.05,0.15,2.0,8.11,2.44,hold,2.5,7.25,12.0,Success
21,2023-10-27,CGC,0.51,0.03,0.5,6.38,1.96,underperform,0.48,1.34,3.3,Success
103,2023-10-27,SNAP,9.44,0.48,8.5,5.99,9.96,hold,6.0,9.65,15.0,Success
62,2023-10-27,NKLA,1.02,0.05,1.0,5.26,1.96,hold,1.0,2.42,5.0,Failed
60,2023-10-27,MPW,4.7,0.22,4.5,5.14,4.26,hold,5.5,8.94,15.0,Success
12,2023-10-27,AMC,9.08,0.44,9.0,5.14,0.88,underperform,4.75,10.75,19.0,Success
61,2023-10-27,MVIS,2.01,0.09,2.0,4.71,0.5,hold,4.0,4.0,4.0,Success
88,2023-10-27,QS,5.84,0.24,5.5,4.56,5.82,hold,2.0,6.78,10.0,Success
111,2023-10-27,SPWR,5.09,0.2,5.0,4.17,1.77,hold,4.5,7.69,13.0,Success
