In [1]:
# Install data manipulation libraries
!pip install numpy pandas pandas_datareader

# Install visualization libraries
!pip install matplotlib seaborn plotly

# Install financial data libraries
!pip install yfinance yoptions

# Install scientific computing libraries
!pip install scipy scikit-learn statsmodels



In [2]:
# Data manipulation libraries
import pandas as pd
import numpy as np
from pandas_datareader import data as pdr

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Financial data libraries
import yfinance as yf

# Date and time libraries
from datetime import datetime

# Scientific computing libraries
from scipy.stats import linregress, skew, kurtosis
from scipy.optimize import minimize
from math import pi

# Machine learning libraries
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta
import yfinance as yf
from scipy.stats import kurtosis, skew

# Warnings
import warnings
# Suppress the specific FutureWarning
warnings.filterwarnings("ignore", category=FutureWarning, message="The 'unit' keyword in TimedeltaIndex construction is deprecated and will be removed in a future version. Use pd.to_timedelta instead.")

# Thoughts
How would a quant interpret the data, to determine whether to buy share, or trade these options to hedge. For example, the Kelly fraction could be used to determine the % of a portfolio to DCA into individual shares or fractional shares. Low volatility and low probability of positive returns could mean a bearish call spread relative to % return, and high volatility and very low positive returns could be a bearish call ladder opportunity. I also could be entirely wrong about those examples, you tell me.

1. Kelly Fraction
Interpretation: The Kelly Fraction is a measure of how much of a portfolio should be allocated to an investment to maximize growth over the long term, balancing risk and reward. A higher Kelly Fraction suggests higher confidence in positive returns.
Usage:
High Kelly Fraction: Indicates favorable market conditions. This could signal that it’s a good time to buy shares or to enter a bull put spread as a way to generate income, expecting prices to stay above a certain level.
Low or Negative Kelly Fraction: Suggests caution, as the expected returns do not justify the risk. In such cases, a bear call spread might be appropriate to profit from expected declines or stagnation without risking too much capital.

2. Volatility (Standard Deviation)
Interpretation: High volatility means large price swings, which can offer more opportunities for options traders but also comes with higher risk. Low volatility suggests a stable market, which may favor more conservative strategies.
Usage:
High Volatility: Ideal for strategies that benefit from large price movements, like bear call ladders or bull put ladders. These strategies can profit from significant movement in either direction and are more suitable when large price swings are expected.
Low Volatility: Indicates stability, which might be better for strategies like bull put spreads where limited movement is expected. Here, you are betting that the price will not drop significantly below a certain level.

3. Positive Return Percentage
Interpretation: A high positive return percentage suggests that the stock has been frequently increasing in value, signaling bullish sentiment. A low percentage indicates bearish sentiment or market corrections.
Usage:
High Positive Return Percentage: A favorable outlook for buying shares or using a bull put spread. The expectation is that the price will remain stable or rise, allowing the puts sold at a higher strike to expire worthless, yielding profit.
Low Positive Return Percentage: May indicate a bearish outlook. Strategies like a bear call spread could be used to capitalize on the likelihood of the stock not surpassing a certain price level.

4. Skewness
Interpretation: Measures the asymmetry of return distribution. Positive skewness means more small losses and occasional large gains, while negative skewness implies more small gains with rare large losses.
Usage:
Positive Skewness: Suggests potential for occasional large gains. This could align with strategies like bull put ladders where you benefit from large upward movements while having limited downside risk.
Negative Skewness: Indicates frequent small gains but vulnerability to large drops. A bear call spread or bear call ladder might be suitable to protect against significant declines.

5. Kurtosis
Interpretation: High kurtosis indicates a lot of outliers—extreme returns, both positive and negative. Low kurtosis suggests returns are closer to a normal distribution.
Usage:
High Kurtosis: High risk of large moves. Strategies like ladders (bear call or bull put) are appropriate as they can capitalize on large price movements while capping potential losses.
Low Kurtosis: Less risk of extreme moves. More stable environments where simple spread strategies might be preferable, such as a bull put spread for a bullish outlook or a bear call spread for a bearish outlook.

Example Application:

Scenario 1: If the Kelly Fraction is high, volatility is low, and the positive return percentage is high, a quant might prefer dollar-cost averaging (DCA) into shares, as this indicates a strong, stable bullish trend. Alternatively, a bull put spread could be used to generate income while maintaining limited risk.

Scenario 2: If volatility is high, Kelly Fraction is low, and skewness is negative (suggesting risk of large downward moves), a bear call ladder might be used. This strategy could profit from large downward movements while limiting potential losses.

Scenario 3: If skewness and kurtosis are both high, indicating frequent small gains but risk of large losses, a bear call spread could be a good defensive strategy, protecting against sharp declines while allowing some profit if the price remains below a certain level.

In [3]:
# Analyze the existing data for monthly performance
import pandas as pd

# Load the data from the CSV file (assuming it has been uploaded previously)
data = pd.read_csv('./spy_analyzed_output.csv', parse_dates=['Date'], index_col='Date')

# Create a new column for the month to categorize data
data['Month'] = data.index.month

# Function to categorize months
def categorize_month(month):
    if month in [8, 9]:  # August, September
        return 'Bearish'
    elif month in [1, 4, 11, 12]:  # January, April, November, December
        return 'Bullish'
    else:
        return 'Neutral'

# Apply the categorization function
data['Month_Category'] = data['Month'].apply(categorize_month)

# Calculate average metrics for each month category
monthly_stats = data.groupby('Month_Category').agg({
    'Kelly_Ratio': 'mean',
    'Kelly_Fraction': 'mean',
    'Std_Dev': 'mean',
    'Positive_Return_Percentage': 'mean',
    'Skewness': 'mean',
    'Kurtosis': 'mean'
})

monthly_stats


Unnamed: 0_level_0,Kelly_Ratio,Kelly_Fraction,Std_Dev,Positive_Return_Percentage,Skewness,Kurtosis
Month_Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bearish,0.063645,0.031823,0.008252,54.100529,-0.313048,0.215383
Bullish,0.075662,0.037831,0.009476,54.689609,0.100874,-0.044679
Neutral,0.075093,0.037546,0.009919,54.661687,-0.199662,0.06937



Strategy: The combination of lower Kelly Ratio, lower positive return percentage, negative skewness, and positive kurtosis suggests that bearish strategies are appropriate. A Bear Call Spread would be ideal to capitalize on limited upside while protecting against potential declines. For higher volatility within these months, a Bear Call Ladder could be considered, especially if kurtosis and skewness values indicate the potential for large price swings.

Bullish Months (January, April, November, December):

Kelly Ratio: 0.0757, Kelly Fraction: 0.0378 – Higher, suggesting more confidence in positive returns.
Volatility (Std_Dev): 0.0095 – Moderate volatility, indicating some movement but within a bullish trend.
Positive Return Percentage: 54.69% – Higher, consistent with bullish sentiment.
Skewness: 0.1009 – Positive skewness implies more frequent small losses and fewer large gains.
Kurtosis: -0.0447 – Negative kurtosis suggests less likelihood of extreme movements.
Strategy: Higher Kelly ratios, positive return percentage, and moderate volatility suggest bullish strategies. A Bull Put Spread is appropriate, benefiting from a stable or rising market. If there are expectations of larger upward movements (based on specific market events or economic data), a Bull Put Ladder can capture larger gains with controlled risk.

Neutral or Mixed Months (Remaining Months):

Kelly Ratio: 0.0751, Kelly Fraction: 0.0375 – Similar to bullish months but requires more careful monitoring.
Volatility (Std_Dev): 0.0099 – Highest among categories, indicating unpredictability.
Positive Return Percentage: 54.66% – Comparable to bullish months, suggesting mixed sentiment.
Skewness: -0.1997 – Slightly negative, indicating a tendency towards losses.
Kurtosis: 0.0694 – Slightly positive, indicating some risk of extreme movements.
Strategy: In neutral or mixed months, where volatility is highest and sentiment is unclear, non-directional strategies such as an Iron Condor are appropriate. These strategies profit from limited price movement and can be adjusted if the market starts to trend.

Conclusion
By applying quantitative metrics to historical data, we can align option trading strategies with expected market conditions:

Bearish Months: Favor bear call spreads or ladders, expecting limited upside.
Bullish Months: Favor bull put spreads or ladders, expecting price stability or rise.
Neutral Months: Favor non-directional strategies like iron condors, expecting limited movement.


In [4]:
# Define a class to analyze seasonality on a monthly and weekly basis
class SPYSeasonalityAnalyzer:
    def __init__(self, data):
        self.data = data

    def monthly_analysis(self):
        # Add a 'Month' column
        self.data['Month'] = self.data.index.month

        # Group by 'Month' and calculate average metrics
        monthly_stats = self.data.groupby('Month').agg({
            'Kelly_Ratio': 'mean',
            'Kelly_Fraction': 'mean',
            'Std_Dev': 'mean',
            'Positive_Return_Percentage': 'mean',
            'Skewness': 'mean',
            'Kurtosis': 'mean'
        })
        return monthly_stats

    def weekly_analysis(self):
        # Add a 'Week' column (52 weeks in a year)
        self.data['Week'] = self.data.index.isocalendar().week

        # Group by 'Week' and calculate average metrics
        weekly_stats = self.data.groupby('Week').agg({
            'Kelly_Ratio': 'mean',
            'Kelly_Fraction': 'mean',
            'Std_Dev': 'mean',
            'Positive_Return_Percentage': 'mean',
            'Skewness': 'mean',
            'Kurtosis': 'mean'
        })
        return weekly_stats

# Load the data from the uploaded CSV file
data = pd.read_csv('./spy_analyzed_output.csv', parse_dates=['Date'], index_col='Date')

# Create an instance of the seasonality analyzer class
seasonality_analyzer = SPYSeasonalityAnalyzer(data)

# Perform monthly and weekly analyses
monthly_stats = seasonality_analyzer.monthly_analysis()
weekly_stats = seasonality_analyzer.weekly_analysis()

monthly_stats, weekly_stats


(       Kelly_Ratio  Kelly_Fraction   Std_Dev  Positive_Return_Percentage  \
 Month                                                                      
 1         0.085357        0.042679  0.008656                   55.164835   
 2         0.150978        0.075489  0.010151                   58.381503   
 3        -0.035461       -0.017730  0.013209                   49.242424   
 4         0.061594        0.030797  0.011832                   54.000000   
 5         0.036838        0.018419  0.009581                   52.786458   
 6         0.105079        0.052539  0.008951                   56.131579   
 7         0.203527        0.101764  0.008114                   60.957447   
 8         0.077955        0.038977  0.007567                   54.801980   
 9         0.047222        0.023611  0.009039                   53.295455   
 10        0.005365        0.002682  0.009395                   51.243655   
 11        0.108866        0.054433  0.008832                   56.317204   

# TODO 
Create an "investment calendar" with all of these values with input of capital, and output of % in shares and % in options. 

# Thoughts:
All of the previous code is great for working with with individual shares. But I also need to start working with options and for that, I need to identify potential strikes using math. One way I want to consider doing that is getting the bollinger band values.So the upper band and the lower band about the twenty day moving average and the fifty two week.Uh, bowling drill bands and then converting those to strikes, which means rounding those numbers to a whole number. Are there any other similar methods?

Methods to Identify Potential Strike Prices Using Bollinger Bands and Other Similar Methods
Bollinger Bands:

20-Day Bollinger Bands: Use the 20-day moving average as the basis and calculate the upper and lower bands as two standard deviations above and below the moving average. Round these bands to the nearest whole number to determine potential strike prices.
52-Week Bollinger Bands: Similarly, use the 52-week moving average with two standard deviations to define upper and lower bands. Again, rounding these to whole numbers can provide strike prices.
Other Similar Methods:

Keltner Channels: Like Bollinger Bands, Keltner Channels use a moving average (typically the exponential moving average, EMA) and average true range (ATR) to set upper and lower bands. These can also be rounded to determine strike prices.
Standard Deviation Channels: Similar to Bollinger Bands, these use standard deviation to create bands around a moving average but with fixed channels rather than expanding/contracting bands.
Pivot Points: Calculate potential support and resistance levels based on high, low, and close prices from the previous trading period. Pivot points and their derivatives (like R1, R2 for resistance, and S1, S2 for support) can be rounded and used as strike prices.
Fibonacci Retracements: Using Fibonacci ratios, retracement levels can be identified within a trend. These levels can provide potential strike prices as they often coincide with support and resistance.
Volatility-Based Methods: Implied volatility can be used to estimate the expected range of price movement. Calculate potential high and low prices based on current price, implied volatility, and time to expiration.


# Applied Options Pricing (Non-theoretical)
This sectionn is highly experimental and should not be used for real life trades. The purpose of this section is to brain storm on practical ways to use math, code, technicals, seasonal probabilities and risk management techniques such as the kelly criterion to generate bidirectional income on index funds and etf by **selling** options to the gamblers in the market.

In [5]:
import pandas as pd
import numpy as np
from scipy.stats import norm

class EnhancedOptionsStrategy:
    def __init__(self, data):
        self.data = data

    def calculate_bollinger_bands(self, period=20, num_std=2):
        """
        Calculate Bollinger Bands for a given period and standard deviation.
        """
        moving_avg = self.data['Close'].rolling(window=period).mean()
        moving_std = self.data['Close'].rolling(window=period).std()
        upper_band = moving_avg + (moving_std * num_std)
        lower_band = moving_avg - (moving_std * num_std)
        return upper_band, lower_band

    def calculate_keltner_channels(self, period=20, atr_multiplier=2):
        """
        Calculate Keltner Channels using ATR for a given period.
        """
        ema = self.data['Close'].ewm(span=period, adjust=False).mean()
        high_low = self.data['High'] - self.data['Low']
        high_close = np.abs(self.data['High'] - self.data['Close'].shift())
        low_close = np.abs(self.data['Low'] - self.data['Close'].shift())
        tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
        atr = tr.rolling(window=period).mean()
        upper_band = ema + (atr * atr_multiplier)
        lower_band = ema - (atr * atr_multiplier)
        return upper_band, lower_band


    def identify_strike_prices(self):
        """
        Identify potential strike prices using multiple band calculations.
        """
        # Calculate bands
        upper_band_20, lower_band_20 = self.calculate_bollinger_bands(period=20)
        upper_band_52, lower_band_52 = self.calculate_bollinger_bands(period=252)
        upper_keltner, lower_keltner = self.calculate_keltner_channels(period=20)

        # Prepare a DataFrame with potential strike prices
        strike_prices = self.data[['Close']].copy()
        strike_prices['20_day_upper_strike'] = upper_band_20.round()
        strike_prices['20_day_lower_strike'] = lower_band_20.round()
        strike_prices['52_week_upper_strike'] = upper_band_52.round()
        strike_prices['52_week_lower_strike'] = lower_band_52.round()
        strike_prices['keltner_upper_strike'] = upper_keltner.round()
        strike_prices['keltner_lower_strike'] = lower_keltner.round()

        return strike_prices.dropna()

    def calculate_option_probabilities(self, current_price, strike_price, volatility, time_to_expiration, risk_free_rate=0.01):
        """
        Calculate the probability of an option expiring worthless.
        """
        d1 = (np.log(current_price / strike_price) + (risk_free_rate + 0.5 * volatility ** 2) * time_to_expiration) / (volatility * np.sqrt(time_to_expiration))
        return norm.cdf(d1)

    def execute_strategy(self, data, strategy_type, volatility, time_to_expiration, save_csv=False):
        """
        Execute a specified options strategy (spread or ladder) based on the provided data.
        """
        strategies = {
            'bear_call_spread': self.generate_bear_call_spreads,
            'bull_put_spread': self.generate_bull_put_spreads,
            'bear_call_ladder': self.generate_bear_call_ladders,
            'bull_put_ladder': self.generate_bull_put_ladders
        }

        if strategy_type not in strategies:
            raise ValueError("Invalid strategy type. Choose from: 'bear_call_spread', 'bull_put_spread', 'bear_call_ladder', 'bull_put_ladder'.")

        result = strategies[strategy_type](data, volatility, time_to_expiration)

        if save_csv:
            result.to_csv(f'{strategy_type}.csv', index=False)

        return result

    def generate_bear_call_spreads(self, data, volatility, time_to_expiration):
        """
        Generate Bear Call Spreads based on strike prices.
        """
        return self._generate_spreads(data, volatility, time_to_expiration, ['20_day_upper_strike', '52_week_upper_strike', 'keltner_upper_strike'], 'bear')

    def generate_bull_put_spreads(self, data, volatility, time_to_expiration):
        """
        Generate Bull Put Spreads based on strike prices.
        """
        return self._generate_spreads(data, volatility, time_to_expiration, ['20_day_lower_strike', '52_week_lower_strike', 'keltner_lower_strike'], 'bull')

    def generate_bear_call_ladders(self, data, volatility, time_to_expiration):
        """
        Generate Bear Call Ladders for high volatility environments.
        """
        return self._generate_ladders(data, volatility, time_to_expiration, ['20_day_upper_strike', '52_week_upper_strike', 'keltner_upper_strike'], 'bear')

    def generate_bull_put_ladders(self, data, volatility, time_to_expiration):
        """
        Generate Bull Put Ladders for capturing larger upward movements.
        """
        return self._generate_ladders(data, volatility, time_to_expiration, ['20_day_lower_strike', '52_week_lower_strike', 'keltner_lower_strike'], 'bull')

    def _generate_spreads(self, data, volatility, time_to_expiration, bands, spread_type):
        """
        Helper method to generate credit spreads based on strike prices.
        """
        spreads = []
        for index, row in data.iterrows():
            current_price = row['Close']
            for band in bands:
                strike_price = row[band]
                if not pd.isna(strike_price):
                    prob = self.calculate_option_probabilities(current_price, strike_price, volatility, time_to_expiration)
                    spread = {'Date': index, 'Spread Type': spread_type, 'Strike Price': strike_price, 'Probability of Expiring Worthless': prob}
                    spreads.append(spread)
        return pd.DataFrame(spreads)

    def _generate_ladders(self, data, volatility, time_to_expiration, bands, ladder_type):
        """
        Helper method to generate ladders based on strike prices.
        """
        ladders = []
        for index, row in data.iterrows():
            current_price = row['Close']
            for i in range(len(bands) - 1):
                lower_strike = row[bands[i]]
                upper_strike = row[bands[i + 1]]
                if not pd.isna(lower_strike) and not pd.isna(upper_strike):
                    prob_lower = self.calculate_option_probabilities(current_price, lower_strike, volatility, time_to_expiration)
                    prob_upper = self.calculate_option_probabilities(current_price, upper_strike, volatility, time_to_expiration)
                    ladder = {'Date': index, 'Ladder Type': ladder_type, 'Lower Strike': lower_strike, 'Upper Strike': upper_strike, 
                              'Probability Lower Expiry': prob_lower, 'Probability Upper Expiry': prob_upper}
                    ladders.append(ladder)
        return pd.DataFrame(ladders)
# Load the data from the uploaded CSV file
data = pd.read_csv('./spy_analyzed_output.csv', parse_dates=['Date'], index_col='Date')

# Create an instance of the EnhancedOptionsStrategy class
strategy = EnhancedOptionsStrategy(data)



# Identify potential strike prices
potential_strikes = strategy.identify_strike_prices()


# print(potential_strikes)

# Execute different strategies and analyze spreads/ladders
volatility = 0.2  # Example volatility
time_to_expiration = 1/12  # Example: 1 month to expiration

# Example: Generate bear call spreads
bear_call_spreads = strategy.execute_strategy(potential_strikes, 'bear_call_spread', volatility, time_to_expiration, save_csv=True)
print("Bear Call Spreads:")
print(bear_call_spreads.tail() )

# Example: Generate bull put ladders
bull_put_ladders = strategy.execute_strategy(potential_strikes, 'bull_put_ladder', volatility, time_to_expiration, save_csv=True)
print("\nBull Put Ladders:")
print(bull_put_ladders.tail() )

# Example: Generate bull put spreads
bull_put_spreads = strategy.execute_strategy(potential_strikes, 'bull_put_spread', volatility, time_to_expiration, save_csv=True)
print("\nBull Put Spreads:")
print(bull_put_spreads.tail() )

# Example: Generate bear call ladders
bear_call_ladders = strategy.execute_strategy(potential_strikes, 'bear_call_ladder', volatility, time_to_expiration, save_csv=True)
print("\nBear Call Ladders:")
print(bear_call_ladders.tail() )


Bear Call Spreads:
           Date Spread Type  Strike Price  Probability of Expiring Worthless
6013 2024-08-29        bear         580.0                           0.269077
6014 2024-08-29        bear         569.0                           0.388220
6015 2024-08-30        bear         579.0                           0.336819
6016 2024-08-30        bear         581.0                           0.315298
6017 2024-08-30        bear         569.0                           0.452478

Bull Put Ladders:
           Date Ladder Type  Lower Strike  Upper Strike  \
4007 2024-08-28        bull         409.0         534.0   
4008 2024-08-29        bull         514.0         410.0   
4009 2024-08-29        bull         410.0         535.0   
4010 2024-08-30        bull         516.0         410.0   
4011 2024-08-30        bull         410.0         537.0   

      Probability Lower Expiry  Probability Upper Expiry  
4007                  1.000000                  0.792199  
4008                  0.930

In [6]:
import pandas as pd
import numpy as np
from scipy.stats import norm
import datetime
import os

# --- Helper Functions ---

def calculate_d1_d2(current_price, strike_price, volatility, time_to_expiration, risk_free_rate):
    """Calculates d1 and d2 for Black-Scholes."""
    d1 = (np.log(current_price / strike_price) + (risk_free_rate + 0.5 * volatility ** 2) * time_to_expiration) / (
        volatility * np.sqrt(time_to_expiration)
    )
    d2 = d1 - volatility * np.sqrt(time_to_expiration)
    return d1, d2

def calculate_option_greeks(current_price, strike_price, volatility, time_to_expiration, risk_free_rate=0.01, option_type='call'):
    """Calculates option Greeks using Black-Scholes."""
    d1, d2 = calculate_d1_d2(current_price, strike_price, volatility, time_to_expiration, risk_free_rate)

    if option_type == 'call':
        delta = norm.cdf(d1)
    elif option_type == 'put':
        delta = norm.cdf(d1) - 1 
    else:
        raise ValueError("Invalid option type. Choose 'call' or 'put'.")

    gamma = norm.pdf(d1) / (current_price * volatility * np.sqrt(time_to_expiration))
    theta = (- (current_price * volatility * norm.pdf(d1)) / (2 * np.sqrt(time_to_expiration)) - 
             risk_free_rate * strike_price * np.exp(-risk_free_rate * time_to_expiration) * norm.cdf(d2)) / 365
    vega = current_price * np.sqrt(time_to_expiration) * norm.pdf(d1) / 100 

    return delta, gamma, theta, vega

# --- Option Strategy Classes ---

class SpreadStrategy:
    def __init__(self, strategy_type):
        self.strategy_type = strategy_type
        self.signals = None

    def calculate_probability_of_profit(self, df, volatility, time_to_expiration, risk_free_rate):
        df['Probability of Profit'] = 0.0
        for i in range(len(df)):
            current_price = df['Underlying Price'].iloc[i]
            if self.strategy_type == 'bull_put_spread':
                df['Probability of Profit'].iloc[i] = norm.cdf((np.log(df['Strike A'].iloc[i] / current_price) - (risk_free_rate + 0.5 * volatility**2) * time_to_expiration) / (volatility * np.sqrt(time_to_expiration)))
            elif self.strategy_type == 'bear_call_spread':
                df['Probability of Profit'].iloc[i] = 1 - norm.cdf((np.log(df['Strike B'].iloc[i] / current_price) - (risk_free_rate + 0.5 * volatility**2) * time_to_expiration) / (volatility * np.sqrt(time_to_expiration)))
        return df

    def calculate_maximum_profit_loss(self, df):
        df['Max Profit'] = 0.0
        df['Max Loss'] = 0.0
        for i in range(len(df)):
            if self.strategy_type == 'bull_put_spread':
                df['Max Profit'].iloc[i] = df['Strike A'].iloc[i] - df['Strike B'].iloc[i]
                df['Max Loss'].iloc[i] = df['Strike B'].iloc[i] - df['Strike A'].iloc[i] 
            elif self.strategy_type == 'bear_call_spread':
                df['Max Profit'].iloc[i] = df['Strike B'].iloc[i] - df['Strike A'].iloc[i]
                df['Max Loss'].iloc[i] = df['Strike A'].iloc[i] - df['Strike B'].iloc[i] 
        return df

    def generate_signals(self, options_chain_data, strategy_type, underlying_price, risk_free_rate=0.01, volatility=0.2, days_to_expiration=30):
        self.strategy_type = strategy_type
        options_chain_data['Expiry'] = pd.to_datetime(options_chain_data['Expiry'])

        valid_combinations = []

        if self.strategy_type == 'bull_put_spread':
            options_chain_data = options_chain_data[options_chain_data['Type'] == 'put']
            options_chain_data = options_chain_data.sort_values(by='Strike')
            for i in range(len(options_chain_data) - 1):
                strike_a = options_chain_data['Strike'].iloc[i]
                strike_b = options_chain_data['Strike'].iloc[i + 1]
                valid_combinations.append({'Strike A': strike_a, 'Strike B': strike_b})

        elif self.strategy_type == 'bear_call_spread':
            options_chain_data = options_chain_data[options_chain_data['Type'] == 'call']
            options_chain_data = options_chain_data.sort_values(by='Strike', ascending=False)
            for i in range(len(options_chain_data) - 1):
                strike_a = options_chain_data['Strike'].iloc[i]
                strike_b = options_chain_data['Strike'].iloc[i + 1]
                valid_combinations.append({'Strike A': strike_a, 'Strike B': strike_b})

        else:
            raise ValueError("Invalid spread strategy type.")

        if valid_combinations:
            df = pd.DataFrame(valid_combinations)
            df['Underlying Price'] = underlying_price
            df['Time to Expiration'] = days_to_expiration / 365.25
            df = self.calculate_probability_of_profit(df.copy(), volatility, days_to_expiration / 365.25, risk_free_rate)
            df = self.calculate_maximum_profit_loss(df.copy())

            # Calculate Greeks
            for strike in ['A', 'B']:
                df[f'Delta {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='put' if self.strategy_type == 'bull_put_spread' else 'call'
                    )[0],
                    axis=1
                )
                df[f'Gamma {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='put' if self.strategy_type == 'bull_put_spread' else 'call'
                    )[1],
                    axis=1
                )
                df[f'Theta {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='put' if self.strategy_type == 'bull_put_spread' else 'call'
                    )[2],
                    axis=1
                )
                df[f'Vega {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='put' if self.strategy_type == 'bull_put_spread' else 'call'
                    )[3],
                    axis=1
                )

            self.signals = df.copy()
            return self.signals
        else:
            print(f"No valid {self.strategy_type} combinations found in the options chain data.")
            return None

    def export_signals_to_csv(self, filename):
        """Exports the generated trading signals to a CSV file."""
        if self.signals is not None:
            # Ensure the directory exists
            os.makedirs('options_data', exist_ok=True)
            # Save the CSV in the specified directory
            self.signals.to_csv(os.path.join('options_data', filename), index=False)
            print(f"Signals exported to options_data/{filename} successfully!")
        else:
            print("No signals to export. Generate signals first.")

class LadderStrategy:
    def __init__(self, strategy_type):
        self.strategy_type = strategy_type
        self.signals = None

    def calculate_probability_of_profit(self, df, volatility, time_to_expiration, risk_free_rate):
        """
        Calculates the probability of profit (POP) for the ladder.
        """
        df['Probability of Profit'] = 0.0

        for i in range(len(df)):
            current_price = df['Underlying Price'].iloc[i]
            strike_a = df['Strike A'].iloc[i]
            strike_b = df['Strike B'].iloc[i]
            strike_c = df['Strike C'].iloc[i]

            if self.strategy_type == 'bear_call_ladder':
                # For a bear call ladder, profit is realized if the underlying price is below Strike B.
                df['Probability of Profit'].iloc[i] = norm.cdf(
                    (np.log(strike_b / current_price) - (risk_free_rate + 0.5 * volatility ** 2) * time_to_expiration)
                    / (volatility * np.sqrt(time_to_expiration))
                )
            elif self.strategy_type == 'bull_put_ladder':
                # For a bull put ladder, profit is realized if the underlying price is above Strike B.
                df['Probability of Profit'].iloc[i] = 1 - norm.cdf(
                    (np.log(strike_b / current_price) - (risk_free_rate + 0.5 * volatility ** 2) * time_to_expiration)
                    / (volatility * np.sqrt(time_to_expiration))
                )
        return df


    def calculate_maximum_profit_loss(self, df):
        df['Max Profit'] = 0.0
        df['Max Loss'] = 0.0
        for i in range(len(df)):
            if self.strategy_type == 'bear_call_ladder':
                df['Max Profit'].iloc[i] = (df['Strike A'].iloc[i] - df['Strike B'].iloc[i]) + (df['Strike B'].iloc[i] - df['Strike C'].iloc[i])  
                df['Max Loss'].iloc[i] = df['Strike C'].iloc[i] - df['Strike A'].iloc[i] 
            elif self.strategy_type == 'bull_put_ladder':
                df['Max Profit'].iloc[i] = (df['Strike C'].iloc[i] - df['Strike B'].iloc[i]) + (df['Strike B'].iloc[i] - df['Strike A'].iloc[i])  
                df['Max Loss'].iloc[i] = df['Strike A'].iloc[i] - df['Strike C'].iloc[i] 
        return df

    def generate_signals(self, options_chain_data, strategy_type, underlying_price, risk_free_rate=0.01, volatility=0.2, days_to_expiration=30):
        self.strategy_type = strategy_type
        options_chain_data['Expiry'] = pd.to_datetime(options_chain_data['Expiry'])

        valid_combinations = []

        if self.strategy_type == 'bear_call_ladder':
            options_chain_data = options_chain_data[options_chain_data['Type'] == 'call']
            options_chain_data = options_chain_data.sort_values(by='Strike', ascending=False)
            for i in range(len(options_chain_data) - 2):
                strike_a = options_chain_data['Strike'].iloc[i]
                strike_b = options_chain_data['Strike'].iloc[i + 1]
                strike_c = options_chain_data['Strike'].iloc[i + 2]
                valid_combinations.append({'Strike A': strike_a, 'Strike B': strike_b, 'Strike C': strike_c})

        elif self.strategy_type == 'bull_put_ladder':
            options_chain_data = options_chain_data[options_chain_data['Type'] == 'put']
            options_chain_data = options_chain_data.sort_values(by='Strike')
            for i in range(len(options_chain_data) - 2):
                strike_a = options_chain_data['Strike'].iloc[i]
                strike_b = options_chain_data['Strike'].iloc[i + 1]
                strike_c = options_chain_data['Strike'].iloc[i + 2]
                valid_combinations.append({'Strike A': strike_a, 'Strike B': strike_b, 'Strike C': strike_c})
        else:
            raise ValueError("Invalid ladder strategy type.")

        if valid_combinations:
            df = pd.DataFrame(valid_combinations)
            df['Underlying Price'] = underlying_price
            df['Time to Expiration'] = days_to_expiration / 365.25
            df = self.calculate_probability_of_profit(df.copy(), volatility, days_to_expiration / 365.25, risk_free_rate)
            df = self.calculate_maximum_profit_loss(df.copy())

            # Calculate Greeks
            for strike in ['A', 'B', 'C']:
                df[f'Delta {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='call' if self.strategy_type == 'bear_call_ladder' else 'put'
                    )[0],
                    axis=1
                )
                df[f'Gamma {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='call' if self.strategy_type == 'bear_call_ladder' else 'put'
                    )[1],
                    axis=1
                )
                df[f'Theta {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='call' if self.strategy_type == 'bear_call_ladder' else 'put'
                    )[2],
                    axis=1
                )
                df[f'Vega {strike}'] = df.apply(
                    lambda row: calculate_option_greeks(
                        row['Underlying Price'], row[f'Strike {strike}'], volatility,
                        row['Time to Expiration'], risk_free_rate,
                        option_type='call' if self.strategy_type == 'bear_call_ladder' else 'put'
                    )[3],
                    axis=1
                )

            self.signals = df.copy()
            return self.signals
        else:
            print(f"No valid {self.strategy_type} combinations found in the options chain data.")
            return None

    def export_signals_to_csv(self, filename):
        """Exports the generated trading signals to a CSV file."""
        if self.signals is not None:
            # Ensure the directory exists
            os.makedirs('options_data', exist_ok=True)
            # Save the CSV in the specified directory
            self.signals.to_csv(os.path.join('options_data', filename), index=False)
            print(f"Signals exported to options_data/{filename} successfully!")
        else:
            print("No signals to export. Generate signals first.")

# --- Example Usage ---
# Sample Options Chain Data (Replace with API Data)
options_data = {
    'Strike': [440, 445, 450, 455, 460],
    'Expiry': ['2023-12-15', '2023-12-15', '2023-12-15', '2023-12-15', '2023-12-15'],
    'Type': ['call', 'call', 'call', 'put', 'put'],  # Include both call and put options
    'Underlying': ['SPY', 'SPY', 'SPY', 'SPY', 'SPY'],
    'Last Price': [10.50, 8.20, 6.10, 4.30, 2.80]
    # ... Add other relevant columns ...
}
options_df = pd.DataFrame(options_data)

# Parameters
underlying_price = 447.50  # Example current price of SPY
risk_free_rate = 0.01
volatility = 0.2  
days_to_expiration = 30  

# Create strategy objects
bull_put_spread = SpreadStrategy('bull_put_spread')
bear_call_spread = SpreadStrategy('bear_call_spread')
bear_call_ladder = LadderStrategy('bear_call_ladder')
bull_put_ladder = LadderStrategy('bull_put_ladder')

# Generate Signals
bull_put_spread_signals = bull_put_spread.generate_signals(options_df.copy(), 'bull_put_spread', underlying_price, risk_free_rate, volatility, days_to_expiration)
bear_call_spread_signals = bear_call_spread.generate_signals(options_df.copy(), 'bear_call_spread', underlying_price, risk_free_rate, volatility, days_to_expiration)
bear_call_ladder_signals = bear_call_ladder.generate_signals(options_df.copy(), 'bear_call_ladder', underlying_price, risk_free_rate, volatility, days_to_expiration)
bull_put_ladder_signals = bull_put_ladder.generate_signals(options_df.copy(), 'bull_put_ladder', underlying_price, risk_free_rate, volatility, days_to_expiration)


# Print and export signals
print("Bull Put Spread Signals:")
print(bull_put_spread_signals)
bull_put_spread.export_signals_to_csv('bull_put_spread_signals.csv')

print("\nBear Call Spread Signals:")
print(bear_call_spread_signals)
bear_call_spread.export_signals_to_csv('bear_call_spread_signals.csv')

print("\nBear Call Ladder Signals:")
print(bear_call_ladder_signals)
bear_call_ladder.export_signals_to_csv('bear_call_ladder_signals.csv')

print("\nBull Put Ladder Signals:")
print(bull_put_ladder_signals)
bull_put_ladder.export_signals_to_csv('bull_put_ladder_signals.csv')

No valid bull_put_ladder combinations found in the options chain data.
Bull Put Spread Signals:
   Strike A  Strike B  Underlying Price  Time to Expiration  \
0       455       460             447.5            0.082136   

   Probability of Profit  Max Profit  Max Loss  Delta A   Gamma A   Theta A  \
0                0.59754        -5.0       5.0 -0.59754  0.015086 -0.170277   

     Vega A   Delta B   Gamma B   Theta B    Vega B  
0  0.496275 -0.669183  0.014133 -0.158986  0.464917  
Signals exported to options_data/bull_put_spread_signals.csv successfully!

Bear Call Spread Signals:
   Strike A  Strike B  Underlying Price  Time to Expiration  \
0       450       445             447.5            0.082136   
1       445       440             447.5            0.082136   

   Probability of Profit  Max Profit  Max Loss   Delta A  Gamma A   Theta A  \
0               0.555958        -5.0       5.0  0.478386  0.01553 -0.176027   
1               0.632267        -5.0       5.0  0.555958  0.

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Probability of Profit'].iloc[i] = norm.cdf((np.log(df['Strike A'].iloc[i] / current_price) - (risk_free_rate + 0.5 * volatility**2) * time_to_expiration) / (volatility * np.sqrt(time_to_expiration)))
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Max Profit'].iloc[i] = df['Strike A'].iloc[i] - df['Strike B'].iloc[i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Max Loss'].iloc[i] = df['Strike B'].iloc[i] - df['Strike A'].iloc[i]
A value is trying to be 

In [7]:
import yfinance as yf

# Fetch options data for SPY using yfinance
ticker = 'SPY'
spy = yf.Ticker(ticker)

# Get options expiration dates
expirations = spy.options
print(f"Available expiration dates for {ticker}: {expirations}")

# Select the nearest expiration date for the example
nearest_expiration = expirations[0]
print(f"Nearest expiration date: {nearest_expiration}")

# Fetch the options chain for the nearest expiration date
options_chain = spy.option_chain(nearest_expiration)

# Extract calls and puts data
calls = options_chain.calls
puts = options_chain.puts

# Display the first few rows of calls and puts
print("Calls data:")
print(calls.head())

print("\nPuts data:")
print(puts.head())

# Example: Filter calls with strike prices close to the current underlying price
current_price = spy.history(period='1d')['Close'].iloc[-1]
strike_range = 10  # Define a range around the current price
filtered_calls = calls[(calls['strike'] >= current_price - strike_range) & (calls['strike'] <= current_price + strike_range)]

print(f"\nFiltered calls around the current price ({current_price}):")
print(filtered_calls)

# Example: Filter puts with strike prices close to the current underlying price
filtered_puts = puts[(puts['strike'] >= current_price - strike_range) & (puts['strike'] <= current_price + strike_range)]

print(f"\nFiltered puts around the current price ({current_price}):")
print(filtered_puts)

# Evaluate Bullish and Bearish Strategies using real options data
underlying_price = current_price
risk_free_rate = 0.01
volatility = 0.2  
days_to_expiration = (pd.to_datetime(nearest_expiration) - pd.to_datetime('today')).days

# Create strategy objects
bull_call_spread = SpreadStrategy('bull_call_spread')
bear_put_spread = SpreadStrategy('bear_put_spread')
bull_call_ladder = LadderStrategy('bull_call_ladder')
bear_put_ladder = LadderStrategy('bear_put_ladder')

# Generate Signals
bull_call_spread_signals = bull_call_spread.generate_signals(filtered_calls.copy(), 'bull_call_spread', underlying_price, risk_free_rate, volatility, days_to_expiration)
bear_put_spread_signals = bear_put_spread.generate_signals(filtered_puts.copy(), 'bear_put_spread', underlying_price, risk_free_rate, volatility, days_to_expiration)
bull_call_ladder_signals = bull_call_ladder.generate_signals(filtered_calls.copy(), 'bull_call_ladder', underlying_price, risk_free_rate, volatility, days_to_expiration)
bear_put_ladder_signals = bear_put_ladder.generate_signals(filtered_puts.copy(), 'bear_put_ladder', underlying_price, risk_free_rate, volatility, days_to_expiration)

# Print and export signals
print("Bull Call Spread Signals:")
print(bull_call_spread_signals)
bull_call_spread.export_signals_to_csv('bull_call_spread_signals.csv')

print("\nBear Put Spread Signals:")
print(bear_put_spread_signals)
bear_put_spread.export_signals_to_csv('bear_put_spread_signals.csv')

print("\nBull Call Ladder Signals:")
print(bull_call_ladder_signals)
bull_call_ladder.export_signals_to_csv('bull_call_ladder_signals.csv')

print("\nBear Put Ladder Signals:")
print(bear_put_ladder_signals)
bear_put_ladder.export_signals_to_csv('bear_put_ladder_signals.csv')


Available expiration dates for SPY: ('2024-09-06', '2024-09-09', '2024-09-10', '2024-09-11', '2024-09-12', '2024-09-13', '2024-09-20', '2024-09-27', '2024-09-30', '2024-10-04', '2024-10-11', '2024-10-18', '2024-10-31', '2024-11-15', '2024-11-29', '2024-12-20', '2024-12-31', '2025-01-17', '2025-01-31', '2025-02-28', '2025-03-21', '2025-03-31', '2025-04-17', '2025-06-20', '2025-06-30', '2025-08-15', '2025-09-19', '2025-12-19', '2026-01-16', '2026-06-18', '2026-12-18')
Nearest expiration date: 2024-09-06
Calls data:
       contractSymbol             lastTradeDate  strike  lastPrice     bid  \
0  SPY240906C00340000 2024-09-05 19:25:24+00:00   340.0     209.10  209.31   
1  SPY240906C00370000 2024-08-05 14:02:40+00:00   370.0     154.00  180.11   
2  SPY240906C00375000 2024-08-27 17:41:14+00:00   375.0     187.09  174.32   
3  SPY240906C00380000 2024-08-27 17:41:14+00:00   380.0     182.10  169.30   
4  SPY240906C00385000 2024-09-05 16:06:59+00:00   385.0     163.32  164.33   

      ask   

KeyError: 'Expiry'

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming dataframes bear_call_spread, bull_put_ladder, bull_put_spread, bear_call_ladder are already loaded

# 1. Summary Statistics
def summary_stats(data):
    return data.describe()

# 2. Plot Histograms of Strike Prices and Probabilities
def plot_histograms(data, strategy_type):
    data['Strike Price'].hist(bins=20, alpha=0.5, label='Strike Price')
    data['Probability of Expiring Worthless'].hist(bins=20, alpha=0.5, label='Probability')
    plt.title(f'Histogram for {strategy_type}')
    plt.xlabel('Value')
    plt.ylabel('Frequency')
    plt.legend()
    plt.show()

# 3. Correlation Analysis for Ladder Strategies
def correlation_analysis(data, strategy_type):
    corr = data[['Lower Strike', 'Upper Strike']].corr()
    print(f'Correlation Matrix for {strategy_type}:\n', corr)

# 4. Plot Time-Series Analysis
def plot_time_series(data, strategy_type):
    data.plot(x='Date', y=['Strike Price', 'Probability of Expiring Worthless'])
    plt.title(f'Time-Series for {strategy_type}')
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.show()

# Run analyses
print("Summary Statistics for Bear Call Spread:")
print(summary_stats(bear_call_spreads))

# print("\nSummary Statistics for Bull Put Ladder:")
# print(summary_stats(bull_put_ladder))

# print("\nSummary Statistics for Bull Put Spread:")
# print(summary_stats(bull_put_spread))

# print("\nSummary Statistics for Bear Call Ladder:")
# print(summary_stats(bear_call_ladder))

plot_histograms(bear_call_spreads, 'Bear Call Spread')
# plot_histograms(bull_put_spreads, 'Bull Put Spread')

# correlation_analysis(bull_put_ladders, 'Bull Put Ladder')
# correlation_analysis(bear_call_ladders, 'Bear Call Ladder')

# plot_time_series(bear_call_spreads, 'Bear Call Spread')
# plot_time_series(bull_put_ladders, 'Bull Put Ladder')


In [None]:
import pandas as pd
import numpy as np

class OptionsDataAnalyzer:
    """
    A class to analyze and process options data for various strategies.
    """
    def __init__(self, bear_call_spread_file, bull_put_ladder_file, bull_put_spread_file, bear_call_ladder_file):
        """
        Initialize the OptionsDataAnalyzer with file paths for different strategies.
        
        Parameters:
        bear_call_spread_file (str): Path to the CSV file for bear call spread data.
        bull_put_ladder_file (str): Path to the CSV file for bull put ladder data.
        bull_put_spread_file (str): Path to the CSV file for bull put spread data.
        bear_call_ladder_file (str): Path to the CSV file for bear call ladder data.
        """
        self.bear_call_spread = pd.read_csv(bear_call_spread_file)
        self.bull_put_ladder = pd.read_csv(bull_put_ladder_file)
        self.bull_put_spread = pd.read_csv(bull_put_spread_file)
        self.bear_call_ladder = pd.read_csv(bear_call_ladder_file)

    def fetch_historical_data(self, row):
        """
        Simulate fetching historical options data for a given row.
        In practice, this would fetch from an API or database.
        
        Parameters:
        row (pd.Series): A row from the DataFrame containing options data.
        
        Returns:
        dict: A dictionary containing historical data for the given row.
        """
        # Placeholder values
        underlying = "SPY"  # Simplification
        expiration_date = row['Date']  # Simplification
        premium = np.random.uniform(1, 5)  # Random premium value
        underlying_price = np.random.uniform(300, 500)  # Random underlying price
        profit_loss = premium * 100  # Simplification for profit/loss calculation
        
        return {
            "Underlying": underlying,
            "Expiration Date": expiration_date,
            "Premium": premium,
            "Underlying Price": underlying_price,
            "Profit/Loss": profit_loss
        }

    def add_historical_data(self, df):
        """
        Add historical options data to the DataFrame.
        
        Parameters:
        df (pd.DataFrame): DataFrame containing options data.
        
        Returns:
        pd.DataFrame: DataFrame with added historical data.
        """
        historical_data = df.apply(self.fetch_historical_data, axis=1, result_type='expand')
        return pd.concat([df, historical_data], axis=1)

    def analyze_data(self):
        """
        Analyze and return data with added historical context.
        
        Returns:
        dict: A dictionary containing DataFrames with added historical data for each strategy.
        """
        self.bear_call_spread = self.add_historical_data(self.bear_call_spread)
        self.bull_put_ladder = self.add_historical_data(self.bull_put_ladder)
        self.bull_put_spread = self.add_historical_data(self.bull_put_spread)
        self.bear_call_ladder = self.add_historical_data(self.bear_call_ladder)
        
        return {
            'Bear Call Spread': self.bear_call_spread,
            'Bull Put Ladder': self.bull_put_ladder,
            'Bull Put Spread': self.bull_put_spread,
            'Bear Call Ladder': self.bear_call_ladder
        }

# Paths to the CSV files (assuming they are uploaded and accessible)
bear_call_spread_file = './bear_call_spread.csv'
bull_put_ladder_file = './bull_put_ladder.csv'
bull_put_spread_file = './bull_put_spread.csv'
bear_call_ladder_file = './bear_call_ladder.csv'

# Initialize the analyzer with the file paths
analyzer = OptionsDataAnalyzer(bear_call_spread_file, bull_put_ladder_file, bull_put_spread_file, bear_call_ladder_file)

# Analyze the data
analyzed_data = analyzer.analyze_data()

# Example: Display the first few rows of the analyzed bear call spread data
# analyzed_data['Bear Call Spread'].tail()
# analyzed_data['Bear Call Ladder'].tail()
# Repeat the analysis for each strategy
for strategy_name, strategy_data in analyzed_data.items():
    print(f"Analyzing {strategy_name} strategy...")
    # Display the first few rows of the analyzed data for each strategy
    print(strategy_data.tail())
    print("\n")


Analyzing Bear Call Spread strategy...
            Date Spread Type  Strike Price  Probability of Expiring Worthless  \
6013  2024-08-29        bear         580.0                           0.269077   
6014  2024-08-29        bear         569.0                           0.388220   
6015  2024-08-30        bear         579.0                           0.336819   
6016  2024-08-30        bear         581.0                           0.315298   
6017  2024-08-30        bear         569.0                           0.452478   

     Underlying Expiration Date   Premium  Underlying Price  Profit/Loss  
6013        SPY      2024-08-29  3.122054        390.302821   312.205426  
6014        SPY      2024-08-29  4.780681        373.057037   478.068107  
6015        SPY      2024-08-30  3.106257        406.414011   310.625663  
6016        SPY      2024-08-30  4.653206        388.532152   465.320646  
6017        SPY      2024-08-30  2.208064        487.191959   220.806401  


Analyzing Bull Put Lad

In [None]:
def fetch_and_display_statistics(ticker, start_date, end_date, output='print', csv_path=None):
    # Fetch historical data
    df = yf.download(ticker, start=start_date, end=end_date)
    
    # Calculate Technical Indicators
    df['ATR'] = pd.concat([
        df['High'] - df['Low'],
        np.abs(df['High'] - df['Close'].shift()),
        np.abs(df['Low'] - df['Close'].shift())
    ], axis=1).max(axis=1).rolling(window=14).mean()
    
    delta = df['Close'].diff()
    gain = delta.clip(lower=0).rolling(window=14).mean()
    loss = -delta.clip(upper=0).rolling(window=14).mean()
    df['RSI'] = 100 - (100 / (1 + gain / loss))
    
    df['20 Day MA'] = df['Close'].rolling(window=20).mean()
    df['20 Day STD'] = df['Close'].rolling(window=20).std()
    df['Upper Band'] = df['20 Day MA'] + 2 * df['20 Day STD']
    df['Lower Band'] = df['20 Day MA'] - 2 * df['20 Day STD']
    
    df['tenkan_sen'] = (df['High'].rolling(window=9).max() + df['Low'].rolling(window=9).min()) / 2
    df['kijun_sen'] = (df['High'].rolling(window=26).max() + df['Low'].rolling(window=26).min()) / 2
    df['senkou_span_a'] = ((df['tenkan_sen'] + df['kijun_sen']) / 2).shift(26)
    df['senkou_span_b'] = ((df['High'].rolling(window=52).max() + df['Low'].rolling(window=52).min()) / 2).shift(26)
    df['chikou_span'] = df['Close'].shift(-26)
    
    # Resample data to monthly
    df_monthly = df.resample('M').agg({
        'Open': 'first',
        'High': 'max',
        'Low': 'min',
        'Close': 'last',
        'Volume': 'sum',
        'ATR': 'mean',
        'RSI': 'mean',
        '20 Day MA': 'mean',
        'Upper Band': 'mean',
        'Lower Band': 'mean',
        'tenkan_sen': 'mean',
        'kijun_sen': 'mean',
        'senkou_span_a': 'mean',
        'senkou_span_b': 'mean',
        'chikou_span': 'mean'
    })
    
    df_monthly['Return'] = df_monthly['Close'].pct_change() * 100
    
    # Generate statistics
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    statistics = []

    for month in range(1, 13):
        monthly_data = df_monthly[df_monthly.index.month == month]
        if monthly_data.empty:
            continue

        monthly_stats = {
            'Month': months[month-1],
            'Mean Return (%)': monthly_data['Return'].mean(),
            'Std Return (%)': monthly_data['Return'].std(),
            'Highest Return (%)': monthly_data['Return'].max(),
            'Highest Return Year': monthly_data['Return'].idxmax().year,
            'Lowest Return (%)': monthly_data['Return'].min(),
            'Lowest Return Year': monthly_data['Return'].idxmin().year,
            'Mean Volume': monthly_data['Volume'].mean(),
            'Highest Volume': monthly_data['Volume'].max(),
            'Highest Volume Year': monthly_data['Volume'].idxmax().year,
            'Lowest Volume': monthly_data['Volume'].min(),
            'Lowest Volume Year': monthly_data['Volume'].idxmin().year,
            'Mean ATR': monthly_data['ATR'].mean(),
            'Mean RSI': monthly_data['RSI'].mean(),
            'Mean 20 Day MA': monthly_data['20 Day MA'].mean(),
            'Mean Upper Band': monthly_data['Upper Band'].mean(),
            'Mean Lower Band': monthly_data['Lower Band'].mean(),
            'Mean Tenkan-sen': monthly_data['tenkan_sen'].mean(),
            'Mean Kijun-sen': monthly_data['kijun_sen'].mean(),
            'Mean Senkou Span A': monthly_data['senkou_span_a'].mean(),
            'Mean Senkou Span B': monthly_data['senkou_span_b'].mean(),
            'Mean Chikou Span': monthly_data['chikou_span'].mean(),
            'Close > RSI Days': (monthly_data['Close'] > monthly_data['RSI']).sum(),
            'Close > Upper Band Days': (monthly_data['Close'] > monthly_data['Upper Band']).sum(),
            'High > ATR Days': (monthly_data['High'] > monthly_data['ATR']).sum()
        }

        statistics.append(monthly_stats)

    df_statistics = pd.DataFrame(statistics)

    # Output based on the selected option
    if output == 'print':
        for index, row in df_statistics.iterrows():
            print(f"{ticker} - {row['Month']} Statistics")
            print(f"Mean {row['Month']} Return: {row['Mean Return (%)']:.2f}%")
            print(f"Standard Deviation of {row['Month']} Returns: {row['Std Return (%)']:.2f}%")
            print(f"Highest {row['Month']} Return: {row['Highest Return (%)']:.2f}% in {row['Highest Return Year']}")
            print(f"Lowest {row['Month']} Return: {row['Lowest Return (%)']:.2f}% in {row['Lowest Return Year']}")
            print(f"Mean {row['Month']} Volume: {row['Mean Volume']:.2f}")
            print(f"Highest {row['Month']} Volume: {row['Highest Volume']:.2f} in {row['Highest Volume Year']}")
            print(f"Lowest {row['Month']} Volume: {row['Lowest Volume']:.2f} in {row['Lowest Volume Year']}")
            print(f"Mean {row['Month']} ATR: {row['Mean ATR']:.2f}")
            print(f"Mean {row['Month']} RSI: {row['Mean RSI']:.2f}")
            print(f"Mean {row['Month']} 20 Day MA: {row['Mean 20 Day MA']:.2f}")
            print(f"Mean {row['Month']} Upper Band: {row['Mean Upper Band']:.2f}")
            print(f"Mean {row['Month']} Lower Band: {row['Mean Lower Band']:.2f}")
            print(f"Mean {row['Month']} Tenkan-sen: {row['Mean Tenkan-sen']:.2f}")
            print(f"Mean {row['Month']} Kijun-sen: {row['Mean Kijun-sen']:.2f}")
            print(f"Mean {row['Month']} Senkou Span A: {row['Mean Senkou Span A']:.2f}")
            print(f"Mean {row['Month']} Senkou Span B: {row['Mean Senkou Span B']:.2f}")
            print(f"Mean {row['Month']} Chikou Span: {row['Mean Chikou Span']:.2f}")
            print(f"Number of days Close > RSI in {row['Month']}: {row['Close > RSI Days']}")
            print(f"Number of days Close > Upper Bollinger Band in {row['Month']}: {row['Close > Upper Band Days']}")
            print(f"Number of days High > ATR in {row['Month']}: {row['High > ATR Days']}")
            print()

    elif output == 'csv':
        if csv_path is None:
            raise ValueError("CSV path must be provided if output is set to 'csv'")
        df_statistics.to_csv(csv_path, index=False)
        print(f"Statistics exported to {csv_path}")
    
    elif output == 'df':
        return df_statistics

# Examples of usage:

# 1. Print the monthly statistics
fetch_and_display_statistics('SPY', '2020-01-01', '2023-01-01', output='print')

# 2. Export the monthly statistics to a CSV file
fetch_and_display_statistics('SPY', '2020-01-01', '2023-01-01', output='csv', csv_path='SPY_monthly_statistics.csv')

# 3. Return the monthly statistics as a DataFrame
df_stats = fetch_and_display_statistics('SPY', '2020-01-01', '2023-01-01', output='df')


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

SPY - Jan Statistics
Mean Jan Return: -3.15%
Standard Deviation of Jan Returns: 3.01%
Highest Jan Return: -1.02% in 2021
Lowest Jan Return: -5.27% in 2022
Mean Jan Volume: 1759812333.33
Highest Jan Volume: 2485167800.00 in 2022
Lowest Jan Volume: 1392003800.00 in 2020
Mean Jan ATR: 4.76
Mean Jan RSI: 51.61
Mean Jan 20 Day MA: 389.13
Mean Jan Upper Band: 400.40
Mean Jan Lower Band: 377.86
Mean Jan Tenkan-sen: 387.67
Mean Jan Kijun-sen: 416.15
Mean Jan Senkou Span A: 409.88
Mean Jan Senkou Span B: 397.89
Mean Jan Chikou Span: 379.68
Number of days Close > RSI in Jan: 3
Number of days Close > Upper Bollinger Band in Jan: 0
Number of days High > ATR in Jan: 3

SPY - Feb Statistics
Mean Feb Return: -2.70%
Standard Deviation of Feb Returns: 5.35%
Highest Feb Return: 2.78% in 2021
Lowest Feb Return: -7.92% in 2020
Mean Feb Volume: 1905332066.67
Highest Feb Volume: 2297975100.00 in 2022
Lowest Feb Volume: 1307806200.00 in 2021
Mean Feb ATR: 6.14
Mean Feb RSI: 52.06
Mean Feb 20 Day MA: 386.60
M




In [None]:
# Load historical SPY data
spy_data = yf.download('SPY', start='2000-01-01', end='2023-12-31')

# Resample to monthly data
spy_data['Return'] = spy_data['Adj Close'].pct_change()
spy_data['Month'] = spy_data.index.month
spy_data['Year'] = spy_data.index.year
spy_data_monthly = spy_data.resample('M').apply({
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Adj Close': 'last',
    'Volume': 'sum',
    'Return': 'sum'
})



spy_data = fetch_and_display_statistics(ticker='SPY', start_date='2000-01-01', end_date='2023-12-31', output='df')

# Define the strategy
def apply_strategy(df):
    initial_capital = 5000  # Starting with $5,000
    capital = initial_capital
    position = 0  # No initial position
    returns = []

    for i in range(1, len(df)):
        month = df.index[i].month
        if month in [4, 7, 11]:  # Buy in April, July, and November
            if position == 0:
                position = capital / df['Adj Close'].iloc[i]
                capital = 0
        elif month in [6, 8, 9]:  # Sell in June, August, and September
            if position > 0:
                capital = position * df['Adj Close'].iloc[i]
                position = 0
        else:  # Hedge or hold
            if position > 0:
                capital = position * df['Adj Close'].iloc[i]
                position = 0
            position = capital / df['Adj Close'].iloc[i]
            capital = 0
        
        # Calculate monthly return
        total_value = capital + position * df['Adj Close'].iloc[i]
        monthly_return = (total_value - initial_capital) / initial_capital
        returns.append(monthly_return)

    # Final capital
    if position > 0:
        capital = position * df['Adj Close'].iloc[-1]

    return returns, capital

# Backtest the strategy
monthly_returns, final_capital = apply_strategy(spy_data_monthly)

# Calculate performance metrics
cumulative_returns = (1 + np.array(monthly_returns)).cumprod()
total_return = (final_capital - 5000) / 5000
annualized_return = np.mean(monthly_returns) * 12
annualized_volatility = np.std(monthly_returns) * np.sqrt(12)
sharpe_ratio = annualized_return / annualized_volatility

# Print performance metrics
print(f"Final Portfolio Value: ${final_capital:.2f}")
print(f"Total Return: {total_return * 100:.2f}%")
print(f"Annualized Return: {annualized_return * 100:.2f}%")
print(f"Annualized Volatility: {annualized_volatility * 100:.2f}%")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

# Ensure x and y have the same length
spy_data_monthly = spy_data_monthly.iloc[1:]  # Exclude the first month for alignment

# Plot cumulative returns
plt.figure(figsize=(10, 6))
plt.plot(spy_data_monthly.index, cumulative_returns, label='Strategy')
plt.plot(spy_data_monthly.index, (spy_data_monthly['Adj Close'] / spy_data_monthly['Adj Close'].iloc[0]), label='SPY')
plt.title('Strategy vs SPY Cumulative Returns')
plt.xlabel('Year')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()


In [8]:

# Example Kelly data for each week (replace with actual Kelly fractions)
kelly_fractions = pd.Series(0.2, index=spy_data_weekly.index)  # Placeholder values, use real data here

# Define the strategy
def apply_strategy(df, kelly_fractions):
    initial_capital = 25000  # Starting with $25,000
    capital = initial_capital
    position = 0  # No initial position
    returns = []

    for i in range(1, len(df)):
        week = df['Week'].iloc[i]
        kelly_fraction = kelly_fractions.iloc[i]
        
        if week in [7, 10, 12]:  # Buy in week 7, 10, and 12
            if position == 0:
                position_size = capital * kelly_fraction
                position = position_size / df['Adj Close'].iloc[i]
                capital -= position_size
        elif week in [24, 37]:  # Sell or hedge in weeks 24 and 37
            if position > 0:
                capital += position * df['Adj Close'].iloc[i]
                position = 0
        else:  # Hold position or implement a hedge
            if position > 0:
                capital += position * df['Adj Close'].iloc[i]
                position = 0
            position_size = capital * kelly_fraction
            position = position_size / df['Adj Close'].iloc[i]
            capital -= position_size
        
        # Calculate weekly return
        total_value = capital + position * df['Adj Close'].iloc[i]
        weekly_return = (total_value - initial_capital) / initial_capital
        returns.append(weekly_return)

    # Final capital
    if position > 0:
        capital += position * df['Adj Close'].iloc[-1]

    return returns, capital

# Backtest the strategy
weekly_returns, final_capital = apply_strategy(spy_data_weekly, kelly_fractions)

# Calculate performance metrics
cumulative_returns = (1 + np.array(weekly_returns)).cumprod()
total_return = (final_capital - 25000) / 25000
annualized_return = np.mean(weekly_returns) * 52
annualized_volatility = np.std(weekly_returns) * np.sqrt(52)
sharpe_ratio = annualized_return / annualized_volatility

# Print performance metrics
print(f"Final Portfolio Value: ${final_capital:.2f}")
print(f"Total Return: {total_return * 100:.2f}%")
print(f"Annualized Return: {annualized_return * 100:.2f}%")
print(f"Annualized Volatility: {annualized_volatility * 100:.2f}%")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

# Ensure x and y have the same length
spy_data_weekly = spy_data_weekly.iloc[1:]  # Exclude the first week for alignment

# Plot cumulative returns
plt.figure(figsize=(10, 6))
plt.plot(spy_data_weekly.index, cumulative_returns, label='Strategy')
plt.plot(spy_data_weekly.index, (spy_data_weekly['Adj Close'] / spy_data_weekly['Adj Close'].iloc[0]), label='SPY')
plt.title('Strategy vs SPY Cumulative Returns')
plt.xlabel('Year')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True)
plt.show()


NameError: name 'spy_data_weekly' is not defined