### Imports

In [2]:
import pandas as pd
from datetime import timedelta
from datetime import datetime as dt
import datetime
import yfinance as yf
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import RandomizedSearchCV
from imblearn.over_sampling import ADASYN
from xgboost import XGBClassifier
import xgboost as xgb
import pytz
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score
from sklearn.feature_selection import SelectFromModel
import tensorflow as tf
from tensorflow import keras
from keras import layers, callbacks
from sklearn.utils.class_weight import compute_class_weight
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.pipeline import Pipeline
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.regularizers import l2
from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from keras.models import Sequential
import keras_tuner as kt
from kerastuner.tuners import RandomSearch
import joblib
import itertools
import lightgbm as lgb

### Helper Functions

In [3]:
def next_four_fridays():
    # Get today's date and time in EST timezone
    est = pytz.timezone('US/Eastern')
    today = dt.now(est)
    
    # Find the next Friday
    days_until_friday = (4 - today.weekday() + 7) % 7
    next_friday = today + timedelta(days=days_until_friday)
    
    # Reset the time to 4:30 PM on the next Friday
    next_friday = next_friday.replace(hour=16, minute=30, second=0, microsecond=0)
    
    # If today is Friday and it's past 4:30 PM, consider the next week's Friday
    if today.weekday() == 4 and today.time() > next_friday.time():
        next_friday += timedelta(days=7)
    
    # Remove time from datetime for output
    next_friday = next_friday.date()
    
    # Find the Friday after the next
    following_friday = next_friday + timedelta(days=7)
    
    # Find the Friday after the following
    next_next_friday = following_friday + timedelta(days=7)
    
    # Find the Friday after the next next
    next_next_next_friday = next_next_friday + timedelta(days=7)
    
    return next_friday.strftime('%Y-%m-%d'), following_friday.strftime('%Y-%m-%d'), next_next_friday.strftime('%Y-%m-%d'), next_next_next_friday.strftime('%Y-%m-%d')

# Get the next four Fridays
friday1, friday2, friday3, friday4 = next_four_fridays()
print(f"The next four Fridays are: {friday1}, {friday2}, {friday3}, and {friday4}")

The next four Fridays are: 2023-09-29, 2023-10-06, 2023-10-13, and 2023-10-20


### Assuming Profits

In [4]:
# Initialize the variables
initial_amount = 1327  # Starting amount in dollars
roi_rate = 5

# Number of weeks for which the ROI is to be calculated
weeks = 38

# Initialize a variable to hold the current amount
current_amount = initial_amount

# Loop through each week to calculate and print the ROI
for week in range(1, weeks + 1):
    # Calculate the number of $100 chunks in the current amount
    chunks = current_amount // 100  # Use integer division to get whole chunks
    
    # Calculate the ROI for this week
    weekly_roi = chunks * (roi_rate)  # 1% of each $100 chunk is $1
    
    # Print the details for this week
    print(f"Week {week}: {current_amount} +{weekly_roi}")
    
    # Update the current amount for the next week
    current_amount += weekly_roi


Week 1: 1327 +65
Week 2: 1392 +65
Week 3: 1457 +70
Week 4: 1527 +75
Week 5: 1602 +80
Week 6: 1682 +80
Week 7: 1762 +85
Week 8: 1847 +90
Week 9: 1937 +95
Week 10: 2032 +100
Week 11: 2132 +105
Week 12: 2237 +110
Week 13: 2347 +115
Week 14: 2462 +120
Week 15: 2582 +125
Week 16: 2707 +135
Week 17: 2842 +140
Week 18: 2982 +145
Week 19: 3127 +155
Week 20: 3282 +160
Week 21: 3442 +170
Week 22: 3612 +180
Week 23: 3792 +185
Week 24: 3977 +195
Week 25: 4172 +205
Week 26: 4377 +215
Week 27: 4592 +225
Week 28: 4817 +240
Week 29: 5057 +250
Week 30: 5307 +265
Week 31: 5572 +275
Week 32: 5847 +290
Week 33: 6137 +305
Week 34: 6442 +320
Week 35: 6762 +335
Week 36: 7097 +350
Week 37: 7447 +370
Week 38: 7817 +390


# Viewing option chains

In [5]:
print(friday1)

2023-09-29


In [6]:
file_viewer = pd.read_csv('naked_puts_results.csv')
filtered_data = file_viewer[file_viewer['Expiration Date'] == friday1]

# Display all rows, no limit
pd.set_option('display.max_rows', None)
# Display all columns, no limit
pd.set_option('display.max_columns', None)

filtered_data['ROI (%)'] = filtered_data['ROI (%)'].astype(float)
# filter out rows with ROI (%) greater than 0.8
# filtered_data = filtered_data[filtered_data['ROI (%)'] > 0.8]
# filter out rows with strike price less than 6
# filtered_data = filtered_data[filtered_data['strike'] <= 7]

# sort by ROI (%) in ascending order
filtered_data = filtered_data.sort_values(by=['ROI (%)'], ascending=False)

# Show only the columns 'Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 'ROI (%)', 'OTM (%)', 'Implied Volatility', 'volume', 'openInterest', 'highPrice', 'Original Stock Price', 'Original ROI (%)', 'Original OTM (%)', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', '52WeekHigh', and '52WeekLow'
filtered_data = filtered_data[['Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 'ROI (%)', 'OTM (%)', 'Implied Volatility', 'volume', 'openInterest', 'highPrice', 'Original Stock Price', 'Original ROI (%)',
    'Original OTM (%)', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', '52WeekHigh', '52WeekLow']]

# Show only the top 10 rows
filtered_data.head(20)

Unnamed: 0,Expiration Date,ETF,Stock Price,bid,strike,ROI (%),OTM (%),Implied Volatility,volume,openInterest,highPrice,Original Stock Price,Original ROI (%),Original OTM (%),50-day MA,100-day MA,200-day MA,RSI,MACD,VWAP,52WeekHigh,52WeekLow
107,2023-09-29,WE,3.1,0.3,3.0,11.11,3.23,2.08,7198.0,3593.0,0.3,3.1,11.11,3.23,6.7,8.6,29.15,41.31,-0.78,12.76,124.8,2.65
40,2023-09-29,GRWG,3.01,0.15,3.0,5.26,0.33,1.02,48.0,140.0,0.15,3.01,5.26,0.33,3.27,3.47,3.84,42.86,0.02,4.44,8.08,2.61
69,2023-09-29,NVAX,7.13,0.34,7.0,5.11,1.82,0.99,354.0,1422.0,0.34,7.13,5.11,1.82,8.02,7.89,8.78,42.01,-0.26,10.54,25.04,5.76
3,2023-09-29,AAOI,9.58,0.45,9.5,4.97,0.84,1.0,3.0,15.0,0.45,9.58,4.97,0.84,11.03,7.37,4.84,18.04,-0.68,8.95,15.48,1.64
9,2023-09-29,AMC,7.62,0.35,7.5,4.9,1.57,0.94,4249.0,7986.0,0.35,7.62,4.9,1.57,29.63,38.75,45.57,22.65,-6.12,32.4,81.7,7.18
22,2023-09-29,CHPT,5.02,0.21,5.0,4.38,0.4,0.78,462.0,17781.0,0.21,5.02,4.38,0.4,7.23,7.95,9.15,12.65,-0.63,9.77,16.31,5.02
58,2023-09-29,LUMN,1.51,0.06,1.5,4.17,0.66,0.78,136.0,3315.0,0.06,1.51,4.17,0.66,1.65,1.89,2.92,44.78,-0.04,3.65,8.13,1.37
46,2023-09-29,HE,12.59,0.5,12.5,4.17,0.71,0.77,92.0,602.0,0.5,12.59,4.17,0.71,23.13,29.52,34.15,25.88,-2.37,17.54,41.6,9.66
17,2023-09-29,BB,5.16,0.2,5.0,4.17,3.1,0.93,366.0,1660.0,0.2,5.16,4.17,3.1,4.91,5.0,4.51,41.06,0.08,4.53,5.6,3.18
71,2023-09-29,OPEN,2.57,0.09,2.5,3.73,2.72,0.92,250.0,548.0,0.09,2.57,3.73,2.72,3.75,3.29,2.45,5.41,-0.24,2.39,5.27,0.97


In [7]:
# Read in data
df = pd.read_csv('Data/putsDataSuccessFailed.csv')

# Define function to color rows 
def color_row(row):
    color = 'lightgreen' if row['Status'] == 'Success' else 'lightcoral'
    return ['background-color: %s' % color]*len(row)

# Filter columns to keep
columns = ['ETF', 'Original Stock Price', 'Stock Price', 'strike', 'volume',  
           'openInterest', 'highPrice', 'Original ROI (%)', 'Original OTM (%)',
           '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP',
           '52WeekHigh', '52WeekLow', 'Status']

# Take top 100 rows ordered by Original ROI (%)
df = df[columns].sort_values('Original ROI (%)', ascending=False).head(100)

# Style DataFrame 
styled_df = (df.style  
             .applymap(lambda x: 'color: black') # Set text color
             .apply(color_row, axis=1) # Set row colors
             .format('{:.2f}', subset=df.select_dtypes(include=['float64']).columns))

# Display DataFrame 
styled_df

Unnamed: 0,ETF,Original Stock Price,Stock Price,strike,volume,openInterest,highPrice,Original ROI (%),Original OTM (%),50-day MA,100-day MA,200-day MA,RSI,MACD,VWAP,52WeekHigh,52WeekLow,Status
112,WE,4.31,3.1,1.0,1.0,27.0,0.75,300.0,76.8,7.38,9.26,31.8,45.2,-0.84,13.41,145.2,2.65,Success
139,AMC,7.18,8.36,7.0,38486.0,6893.0,0.67,10.58,2.51,36.74,43.15,48.95,7.36,-9.28,39.54,102.2,7.18,Success
140,AMPX,3.48,3.95,3.0,1023.0,985.0,0.25,9.09,13.79,6.42,7.53,7.35,16.12,-0.74,9.84,12.94,3.48,Success
148,ATNM,6.53,6.58,5.0,,1.0,0.4,8.7,23.43,6.75,7.6,9.02,57.39,-0.08,10.31,14.41,5.91,Success
251,MESO,1.51,1.42,1.5,11.0,91.0,0.1,7.14,0.66,2.83,3.2,3.25,62.81,-0.32,2.76,4.83,1.23,Failed
92,SFIX,3.23,3.12,3.0,750.0,740.0,0.19,6.76,7.12,4.14,3.88,4.12,38.08,-0.18,4.12,5.6,2.77,Success
90,RUM,6.79,4.86,6.5,930.0,131.0,0.4,6.56,4.27,7.85,8.66,8.68,40.14,-0.27,9.93,16.81,5.95,Failed
62,NKLA,1.19,1.25,1.0,29482.0,15265.0,0.06,6.38,15.97,1.81,1.36,1.71,43.64,-0.22,1.78,5.12,0.54,Success
362,WKHS,0.58,0.6,0.5,2795.0,2496.0,0.03,6.38,13.79,0.96,0.95,1.37,25.43,-0.09,1.37,3.27,0.58,Success
387,CGC,0.55,0.93,0.5,4595.0,3721.0,0.03,6.38,9.09,0.45,0.75,1.66,69.0,0.01,1.6,4.32,0.37,Success


In [16]:
# Load the CSV file into a DataFrame
df = pd.read_csv('Data/putsDataSuccessFailed.csv')

# Data Preprocessing
# Drop any columns that are not relevant for prediction
df.drop(['contractSymbol', 'Expiration Date', 'ETF'], axis=1, inplace=True)

# Encode categorical variables using one-hot encoding
df = pd.get_dummies(df, columns=['recommendationKey'], drop_first=True)

# Handle missing values with imputation
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
X = df.drop('Status', axis=1)
y = df['Status']

# Encode the target variable 'Status'
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_imputed = imputer.fit_transform(X)

# Address class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_imputed, y_encoded)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Model Selection and Hyperparameter Tuning (LightGBM)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.05, 0.1, 0.2]
}

# Use GridSearchCV for hyperparameter tuning
lgb_classifier = lgb.LGBMClassifier(random_state=42)
grid_search = GridSearchCV(lgb_classifier, param_grid, cv=10, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

best_lgb_model = grid_search.best_estimator_

# Model Evaluation
y_pred = best_lgb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Best LightGBM Model: {best_lgb_model}')
print(f'Accuracy: {accuracy}')
print(report)


import joblib

# Dump the label_encoder to a file
label_encoder_filename = "label_encoder.joblib"
joblib.dump(label_encoder, label_encoder_filename)

# Save the trained LightGBM model and scaler to separate files
model_filename = "model.joblib"
scaler_filename = "scaler.joblib"

joblib.dump(best_lgb_model, model_filename)
joblib.dump(imputer, scaler_filename)

# Making Predictions on New Data
# Load the new data
new_data = pd.read_csv("naked_puts_results.csv")

# Make a copy of the new data for predictions
new_data_with_predictions = new_data.copy()

# Apply the same preprocessing steps as before
# Drop columns not relevant for prediction
new_data.drop(['contractSymbol', 'Expiration Date', 'ETF'], axis=1, inplace=True)

# Encode categorical variables using one-hot encoding
new_data = pd.get_dummies(new_data, columns=['recommendationKey'], drop_first=True)

# Handle missing values with imputation
imputer = SimpleImputer(strategy='mean')
new_X_imputed = imputer.fit_transform(new_data)  # No need to drop 'Status'

# Load the label encoder
label_encoder = joblib.load("label_encoder.joblib")

# Load the trained LightGBM model
best_lgb_model = joblib.load("model.joblib")

# Make predictions on the new data
new_predictions = best_lgb_model.predict(new_X_imputed)

# Convert numerical predictions back to labels ("Success" or "Failed")
new_predictions_labels = label_encoder.inverse_transform(new_predictions)

# Add predictions to the new data
new_data_with_predictions["Guess"] = new_predictions_labels

# Save the new data with predictions, including 'contractSymbol', 'Expiration Date', and 'ETF'
new_data_with_predictions.to_csv("Data/new_data_with_predictions.csv", index=False)

[LightGBM] [Info] Number of positive: 206, number of negative: 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001410 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3105
[LightGBM] [Info] Number of data points in the train set: 410, number of used features: 35
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.502439 -> initscore=0.009756
[LightGBM] [Info] Start training from score 0.009756
[LightGBM] [Info] Number of positive: 206, number of negative: 204
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000899 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3054
[LightGBM] [Info] Number of data points in the train set: 410, number of used features: 34
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.502439 -> initscore=0.009756
[LightGBM] [Info] Start training from score 0.009756
[LightGBM] [Info] Number of 

In [20]:
# Read in data 
df = pd.read_csv('Data/new_data_with_predictions.csv')

# Sort by Original ROI column  
df = df.sort_values(by='Original ROI (%)', ascending=False)  

# Filter to top 300 rows
df = df.head(300)

# Filter columns to show
columns = ['Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 
           'ROI (%)', 'OTM (%)', 'recommendationKey', 'targetLowPrice', 'targetMeanPrice', 'targetHighPrice', 'Guess']
df = df[columns]

# Define row color function
def color_row(row):
    color = 'lightgreen' if row['Guess'] == 'Success' else 'lightcoral'
    return ['background-color: %s' % color]*len(row)

# Apply cell and row styling
styled_df = (df.style
                .applymap(lambda x: 'color: black') # Black text
                .apply(color_row, axis=1) # Color rows
                .format('{:.2f}', subset=df.select_dtypes(include=['float64']).columns)) 

# Display styled DataFrame
styled_df

Unnamed: 0,Expiration Date,ETF,Stock Price,bid,strike,ROI (%),OTM (%),recommendationKey,targetLowPrice,targetMeanPrice,targetHighPrice,Guess
107,2023-09-29,WE,3.1,0.3,3.0,11.11,3.23,hold,8.0,10.0,12.0,Success
40,2023-09-29,GRWG,3.01,0.15,3.0,5.26,0.33,buy,3.5,4.96,8.0,Failed
69,2023-09-29,NVAX,7.13,0.34,7.0,5.11,1.82,hold,5.0,20.6,38.0,Failed
3,2023-09-29,AAOI,9.58,0.45,9.5,4.97,0.84,buy,11.5,15.75,20.0,Failed
9,2023-09-29,AMC,7.62,0.35,7.5,4.9,1.57,underperform,4.41,17.72,39.71,Success
22,2023-09-29,CHPT,5.02,0.21,5.0,4.38,0.4,buy,7.0,10.88,17.0,Failed
58,2023-09-29,LUMN,1.51,0.06,1.5,4.17,0.66,underperform,1.5,2.45,4.0,Failed
46,2023-09-29,HE,12.59,0.5,12.5,4.17,0.71,hold,8.0,9.17,11.0,Success
17,2023-09-29,BB,5.16,0.2,5.0,4.17,3.1,hold,4.5,5.63,7.0,Success
71,2023-09-29,OPEN,2.57,0.09,2.5,3.73,2.72,hold,1.0,3.0,5.0,Success
