### Imports

In [1]:
import pandas as pd
from datetime import timedelta
from datetime import datetime as dt
import datetime
import yfinance as yf
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import RandomizedSearchCV
from imblearn.over_sampling import ADASYN
from xgboost import XGBClassifier
import xgboost as xgb
import pytz
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score
from sklearn.feature_selection import SelectFromModel
import tensorflow as tf
from tensorflow import keras
from keras import layers, callbacks
from sklearn.utils.class_weight import compute_class_weight
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.pipeline import Pipeline
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.regularizers import l2
from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

### Helper Functions

In [2]:
def next_four_fridays():
    # Get today's date and time in EST timezone
    est = pytz.timezone('US/Eastern')
    today = dt.now(est)
    
    # Find the next Friday
    days_until_friday = (4 - today.weekday() + 7) % 7
    next_friday = today + timedelta(days=days_until_friday)
    
    # Reset the time to 4:30 PM on the next Friday
    next_friday = next_friday.replace(hour=16, minute=30, second=0, microsecond=0)
    
    # If today is Friday and it's past 4:30 PM, consider the next week's Friday
    if today.weekday() == 4 and today.time() > next_friday.time():
        next_friday += timedelta(days=7)
    
    # Remove time from datetime for output
    next_friday = next_friday.date()
    
    # Find the Friday after the next
    following_friday = next_friday + timedelta(days=7)
    
    # Find the Friday after the following
    next_next_friday = following_friday + timedelta(days=7)
    
    # Find the Friday after the next next
    next_next_next_friday = next_next_friday + timedelta(days=7)
    
    return next_friday.strftime('%Y-%m-%d'), following_friday.strftime('%Y-%m-%d'), next_next_friday.strftime('%Y-%m-%d'), next_next_next_friday.strftime('%Y-%m-%d')

# Get the next four Fridays
friday1, friday2, friday3, friday4 = next_four_fridays()
print(f"The next four Fridays are: {friday1}, {friday2}, {friday3}, and {friday4}")

The next four Fridays are: 2023-09-15, 2023-09-22, 2023-09-29, and 2023-10-06


### Assuming Profits

In [3]:
# Initialize the variables
initial_amount = 1327  # Starting amount in dollars
roi_rate = 5

# Number of weeks for which the ROI is to be calculated
weeks = 38

# Initialize a variable to hold the current amount
current_amount = initial_amount

# Loop through each week to calculate and print the ROI
for week in range(1, weeks + 1):
    # Calculate the number of $100 chunks in the current amount
    chunks = current_amount // 100  # Use integer division to get whole chunks
    
    # Calculate the ROI for this week
    weekly_roi = chunks * (roi_rate)  # 1% of each $100 chunk is $1
    
    # Print the details for this week
    print(f"Week {week}: {current_amount} +{weekly_roi}")
    
    # Update the current amount for the next week
    current_amount += weekly_roi


Week 1: 1327 +65
Week 2: 1392 +65
Week 3: 1457 +70
Week 4: 1527 +75
Week 5: 1602 +80
Week 6: 1682 +80
Week 7: 1762 +85
Week 8: 1847 +90
Week 9: 1937 +95
Week 10: 2032 +100
Week 11: 2132 +105
Week 12: 2237 +110
Week 13: 2347 +115
Week 14: 2462 +120
Week 15: 2582 +125
Week 16: 2707 +135
Week 17: 2842 +140
Week 18: 2982 +145
Week 19: 3127 +155
Week 20: 3282 +160
Week 21: 3442 +170
Week 22: 3612 +180
Week 23: 3792 +185
Week 24: 3977 +195
Week 25: 4172 +205
Week 26: 4377 +215
Week 27: 4592 +225
Week 28: 4817 +240
Week 29: 5057 +250
Week 30: 5307 +265
Week 31: 5572 +275
Week 32: 5847 +290
Week 33: 6137 +305
Week 34: 6442 +320
Week 35: 6762 +335
Week 36: 7097 +350
Week 37: 7447 +370
Week 38: 7817 +390


# Viewing option chains

In [4]:
print(friday1)

2023-09-15


In [5]:
file_viewer = pd.read_csv('naked_puts_results.csv')
filtered_data = file_viewer[file_viewer['Expiration Date'] == friday1]

# Display all rows, no limit
pd.set_option('display.max_rows', None)
# Display all columns, no limit
pd.set_option('display.max_columns', None)

filtered_data['ROI (%)'] = filtered_data['ROI (%)'].astype(float)
# filter out rows with ROI (%) greater than 0.8
# filtered_data = filtered_data[filtered_data['ROI (%)'] > 0.8]
# filter out rows with strike price less than 6
# filtered_data = filtered_data[filtered_data['strike'] <= 7]

# sort by ROI (%) in ascending order
filtered_data = filtered_data.sort_values(by=['ROI (%)'], ascending=False)

# Show only the columns 'Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 'ROI (%)', 'OTM (%)', 'Implied Volatility', 'volume', 'openInterest', 'highPrice', 'Original Stock Price', 'Original ROI (%)', 'Original OTM (%)', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', '52WeekHigh', and '52WeekLow'
filtered_data = filtered_data[['Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 'ROI (%)', 'OTM (%)', 'Implied Volatility', 'volume', 'openInterest', 'highPrice', 'Original Stock Price', 'Original ROI (%)',
    'Original OTM (%)', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', '52WeekHigh', '52WeekLow']]

# Show only the top 10 rows
filtered_data.head(20)

Unnamed: 0,Expiration Date,ETF,Stock Price,bid,strike,ROI (%),OTM (%),Implied Volatility,volume,openInterest,highPrice,Original Stock Price,Original ROI (%),Original OTM (%),50-day MA,100-day MA,200-day MA,RSI,MACD,VWAP,52WeekHigh,52WeekLow
18,2023-09-15,AMC,7.18,0.67,7.0,10.58,2.51,1.84,38486.0,6893.0,0.67,7.18,10.58,2.51,36.74,43.15,48.95,7.36,-9.28,39.54,102.2,7.18
19,2023-09-15,AMPX,3.48,0.25,3.0,9.09,13.79,2.52,1023.0,985.0,0.25,3.48,9.09,13.79,6.42,7.53,7.35,16.12,-0.74,9.84,12.94,3.48
27,2023-09-15,ATNM,6.53,0.4,5.0,8.7,23.43,4.83,,1.0,0.4,6.53,8.7,23.43,6.75,7.6,9.02,57.39,-0.08,10.31,14.41,5.91
130,2023-09-15,MESO,1.51,0.1,1.5,7.14,0.66,2.34,11.0,91.0,0.1,1.51,7.14,0.66,2.83,3.2,3.25,62.81,-0.32,2.76,4.83,1.23
242,2023-09-15,WKHS,0.58,0.03,0.5,6.38,13.79,2.19,2795.0,2496.0,0.03,0.58,6.38,13.79,0.96,0.95,1.37,25.43,-0.09,1.37,3.27,0.58
80,2023-09-15,FNGR,6.59,0.35,6.0,6.19,8.95,1.84,371.0,241.0,0.35,6.59,6.19,8.95,5.01,3.46,3.13,74.4,0.34,5.46,8.95,0.62
154,2023-09-15,NVAX,8.97,0.48,8.5,5.99,5.24,1.38,400.0,310.0,0.48,8.97,5.99,5.24,8.15,7.95,9.25,64.27,0.3,11.22,33.0,5.76
17,2023-09-15,AMC,7.18,0.32,6.0,5.63,16.43,2.02,11640.0,7002.0,0.32,7.18,5.63,16.43,36.74,43.15,48.95,7.36,-9.28,39.54,102.2,7.18
227,2023-09-15,TLRY,3.02,0.16,3.0,5.63,0.66,0.98,1912.0,18060.0,0.16,3.02,5.63,0.66,2.25,2.17,2.58,63.57,0.2,2.85,4.68,1.52
199,2023-09-15,SEEL,1.02,0.05,1.0,5.26,1.96,2.69,22.0,853.0,0.05,1.02,5.26,1.96,1.29,1.1,0.95,40.38,-0.09,1.02,1.64,0.59


# Read in data
df = pd.read_csv('Data/putsDataSuccessFailed.csv')

# Define function to color rows 
def color_row(row):
    color = 'lightgreen' if row['Status'] == 'Success' else 'lightcoral'
    return ['background-color: %s' % color]*len(row)

# Filter columns to keep
columns = ['ETF', 'Original Stock Price', 'Stock Price', 'strike', 'volume',  
           'openInterest', 'highPrice', 'Original ROI (%)', 'Original OTM (%)',
           '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP',
           '52WeekHigh', '52WeekLow', 'Status']

# Take top 100 rows ordered by Original ROI (%)
df = df[columns].sort_values('Original ROI (%)', ascending=False).head(100)

# Style DataFrame 
styled_df = (df.style  
             .applymap(lambda x: 'color: black') # Set text color
             .apply(color_row, axis=1) # Set row colors
             .format('{:.2f}', subset=df.select_dtypes(include=['float64']).columns))

# Display DataFrame 
styled_df

In [6]:
# # Load your dataset
# data = pd.read_csv("Data/putsDataSuccessFailed.csv")

# # Preprocessing Steps
# # -------------------
# # 1. Fill missing values with median
# # Fill missing values with median only for numeric columns
# for col in data.select_dtypes(include=['float64']).columns:
#     data[col].fillna(data[col].median(), inplace=True)

# # 2. Label Encode 'Status' column
# label_encoder = LabelEncoder()
# data['Status'] = label_encoder.fit_transform(data['Status'])

# # 3. Feature selection
# # selected_columns = ['strike', 'volume', 'openInterest', 'Implied Volatility', 'Original Stock Price', 'Original ROI (%)', 'Original OTM (%)', 'Original Implied Volatility', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', 'Delta', 'Gamma', 'Theta', 'Vega', 'Rho', '52WeekHigh', '52WeekLow', 'targetHighPrice', 'targetLowPrice', 'targetMeanPrice', 'targetMedianPrice', 'recommendationMean']
# selected_columns = ['Original ROI (%)', 'Delta', 'openInterest', 'Gamma', 'Original OTM (%)', 'VWAP', 'MACD', '200-day MA', '52WeekHigh', 'volume']
# X = data[selected_columns]
# y = data['Status']

# # 4. Feature Scaling
# scaler = StandardScaler()
# X_scaled = scaler.fit_transform(X)

# # Split the data
# X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# # Apply SMOTE to the training data
# smote = SMOTE(random_state=42)
# X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# # Initialize the model
# random_forest_model = RandomForestClassifier(random_state=42)

# rf_param_grid = {
#     'n_estimators': [50, 100, 150, 200],
#     'max_depth': [None, 10, 20, 30, 40],
#     'min_samples_split': [2, 5, 10, 15],
#     'min_samples_leaf': [1, 2, 4, 8],
#     'max_features': ['auto', 'sqrt', 'log2', None],  # Add 'None' as a valid value
#     'bootstrap': [True, False],
#     'criterion': ['gini', 'entropy']
# }

# rf_grid_search = GridSearchCV(estimator=random_forest_model, param_grid=rf_param_grid,
#                               cv=3, n_jobs=-1, verbose=2, scoring='accuracy')

# rf_grid_search.fit(X_train_balanced, y_train_balanced)

# # Get the best parameters and update the model
# best_params = rf_grid_search.best_params_
# random_forest_model = RandomForestClassifier(**best_params, random_state=42)

# # Train the model with best parameters
# random_forest_model.fit(X_train_balanced, y_train_balanced)

# # Evaluate the model
# y_pred = random_forest_model.predict(X_test)
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Model Accuracy with Best Parameters: {accuracy * 100:.2f}%")

# # Making Predictions on New Data
# # ------------------------------
# # Load new data
# new_data = pd.read_csv("naked_puts_results.csv")

# # Apply the same preprocessing steps
# for col in new_data.select_dtypes(include=['float64']).columns:
#     new_data[col].fillna(new_data[col].median(), inplace=True)

# new_X = new_data[selected_columns]
# new_X_scaled = scaler.transform(new_X)

# # Make predictions
# new_predictions = random_forest_model.predict(new_X_scaled)

# # Convert numerical predictions back to labels ("Success" or "Failed")
# new_predictions_labels = label_encoder.inverse_transform(new_predictions)

# # Add predictions to the new data
# new_data["Guess"] = new_predictions_labels

# # Save the new data with predictions
# new_data.to_csv("Data/new_data_with_predictions.csv", index=False)









# Load your dataset
data = pd.read_csv("Data/putsDataSuccessFailed.csv")

# Preprocessing Steps
# -------------------

# 1. Fill missing values with median for numeric columns
for col in data.select_dtypes(include=['float64']).columns:
    data[col].fillna(data[col].median(), inplace=True)

# 2. Label Encode 'Status' column
label_encoder = LabelEncoder()
data['Status'] = label_encoder.fit_transform(data['Status'])

# 3. Feature selection (You can experiment with different feature sets)
selected_columns = ['strike', 'volume', 'openInterest', 'Implied Volatility', 'Original Stock Price', 'Original ROI (%)', 'Original OTM (%)', 'Original Implied Volatility', '50-day MA', '100-day MA', '200-day MA', 'RSI', 'MACD', 'VWAP', 'Delta', 'Gamma', 'Theta', 'Vega', 'Rho', '52WeekHigh', '52WeekLow', 'targetHighPrice', 'targetLowPrice', 'targetMeanPrice', 'targetMedianPrice', 'recommendationMean']
X = data[selected_columns]
y = data['Status']

# 4. Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# Initialize a more complex model with increased regularization
input_layer = keras.Input(shape=(X_train.shape[1],))
x = layers.Dense(256, activation='relu', kernel_regularizer=l2(0.001))(input_layer)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation='relu', kernel_regularizer=l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation='relu', kernel_regularizer=l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(32, activation='relu', kernel_regularizer=l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
output_layer = layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs=input_layer, outputs=output_layer)

# Compile the model with custom optimizer and learning rate
custom_optimizer = Adam(learning_rate=0.001)  # Experiment with the learning rate
model.compile(optimizer=Adam(learning_rate=0.001), 
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Define a learning rate scheduler
lr_scheduler = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,       # Reduce learning rate by half
    patience=5,       # Wait for 5 epochs with no improvement
    min_lr=1e-7,      # Minimum learning rate
    verbose=1
)

# Define early stopping
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,      # Stop if no improvement for 10 epochs
    restore_best_weights=True
)

# Implement k-fold cross-validation for more robust training
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []

for train_idx, val_idx in kfold.split(X_train, y_train):
    X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
    y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]
    
    # Apply SMOTE to the training data for each fold
    smote = SMOTE(random_state=42)
    X_train_fold_balanced, y_train_fold_balanced = smote.fit_resample(X_train_fold, y_train_fold)

    # Calculate class weights for each fold
    class_weights_fold = compute_class_weight('balanced', classes=np.unique(y_train_fold_balanced), y=y_train_fold_balanced.ravel())
    class_weight_dict_fold = {i: weight for i, weight in enumerate(class_weights_fold)}


    # Train the model for this fold
    history = model.fit(X_train_fold_balanced, y_train_fold_balanced,
                        epochs=100,
                        batch_size=128,  # Increased batch size for stability
                        validation_data=(X_val_fold, y_val_fold),
                        callbacks=[lr_scheduler, early_stopping],
                        class_weight=class_weight_dict_fold,
                        verbose=0)  # Suppress verbose output

    # Evaluate the model on the validation fold
    y_val_pred_prob = model.predict(X_val_fold)
    y_val_pred_binary = (y_val_pred_prob > 0.5).astype(int)
    val_accuracy = accuracy_score(y_val_fold, y_val_pred_binary)
    accuracies.append(val_accuracy)

# Calculate the mean accuracy over all folds
mean_accuracy = np.mean(accuracies)
print(f"Mean Validation Accuracy: {mean_accuracy * 100:.2f}%")

# Retrain the model on the full training set
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Calculate class weights for the full training set
class_weights = compute_class_weight('balanced', classes=np.unique(y_train_balanced), y=y_train_balanced.ravel())
class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

# Train the final model
history = model.fit(X_train_balanced, y_train_balanced,
                    epochs=100,
                    batch_size=128,
                    validation_split=0.2,
                    callbacks=[lr_scheduler, early_stopping],
                    class_weight=class_weight_dict)

# Evaluate the model on the test set
y_test_pred_prob = model.predict(X_test)
y_test_pred_binary = (y_test_pred_prob > 0.5).astype(int)
test_accuracy = accuracy_score(y_test, y_test_pred_binary)
print(f"Test Set Accuracy: {test_accuracy * 100:.2f}%")

# Print classification report for the test set
print(classification_report(y_test, y_test_pred_binary, target_names=['Failed', 'Success']))

# Making Predictions on New Data
# ------------------------------
# Load new data
new_data = pd.read_csv("naked_puts_results.csv")

# Apply the same preprocessing steps
for col in new_data.select_dtypes(include=['float64']).columns:
    new_data[col].fillna(new_data[col].median(), inplace=True)

new_X = new_data[selected_columns]
new_X_scaled = scaler.transform(new_X)

# Make predictions
new_predictions = model.predict(new_X_scaled)

# Convert numerical predictions to binary labels ("Success" or "Failed") using a threshold of 0.5
new_predictions_binary = (new_predictions > 0.5).astype(int)

# Convert binary labels to original labels using the label encoder
new_predictions_labels = label_encoder.inverse_transform(new_predictions_binary)

# Add predictions to the new data
new_data["Guess"] = new_predictions_labels

# Save the new data with predictions
new_data.to_csv("Data/new_data_with_predictions.csv", index=False)

  return dispatch_target(*args, **kwargs)



Epoch 15: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 21: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.

Epoch 31: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.

Epoch 36: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.

Epoch 43: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 48: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.

Epoch 53: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.

Epoch 58: ReduceLROnPlateau reducing learning rate to 3.906250185536919e-06.

Epoch 63: ReduceLROnPlateau reducing learning rate to 1.9531250927684596e-06.

Epoch 68: ReduceLROnPlateau reducing learning rate to 9.765625463842298e-07.

Epoch 73: ReduceLROnPlateau reducing learning rate to 4.882812731921149e-07.

Epoch 78: ReduceLROnPlateau reducing learning rate to 2.4414063659605745e-07.

Epoch 83: ReduceLROnPlateau reducing learning rate to 1.22070

  y = column_or_1d(y, warn=True)


In [7]:
# Read in data 
df = pd.read_csv('Data/new_data_with_predictions.csv')

# Define row color function
def color_row(row):
    color = 'lightgreen' if row['Guess'] == 'Success' else 'lightcoral'
    return ['background-color: %s' % color]*len(row)

# Sort by Original ROI column  
df = df.sort_values(by='Original ROI (%)', ascending=False)  

# Filter to top 300 rows
df = df.head(300)

# Filter columns to show
columns = ['Expiration Date', 'ETF', 'Stock Price', 'bid', 'strike', 
           'ROI (%)', 'OTM (%)', 'Guess']
df = df[columns]

# Apply cell and row styling 
styled_df = (df.style
                .applymap(lambda x: 'color: black') # Black text
                .apply(color_row, axis=1) # Color rows
                .format('{:.2f}', subset=df.select_dtypes(include=['float64']).columns)) 

# Display styled DataFrame
styled_df

Unnamed: 0,Expiration Date,ETF,Stock Price,bid,strike,ROI (%),OTM (%),Guess
18,2023-09-15,AMC,7.18,0.67,7.0,10.58,2.51,Failed
19,2023-09-15,AMPX,3.48,0.25,3.0,9.09,13.79,Failed
27,2023-09-15,ATNM,6.53,0.4,5.0,8.7,23.43,Failed
130,2023-09-15,MESO,1.51,0.1,1.5,7.14,0.66,Failed
242,2023-09-15,WKHS,0.58,0.03,0.5,6.38,13.79,Success
80,2023-09-15,FNGR,6.59,0.35,6.0,6.19,8.95,Failed
154,2023-09-15,NVAX,8.97,0.48,8.5,5.99,5.24,Failed
17,2023-09-15,AMC,7.18,0.32,6.0,5.63,16.43,Failed
227,2023-09-15,TLRY,3.02,0.16,3.0,5.63,0.66,Success
199,2023-09-15,SEEL,1.02,0.05,1.0,5.26,1.96,Failed
