# Load all the libraries and frameworks

In [1]:
import yfinance as yf
import yoptions as yo
import optionlab as ol
import pandas as pd
from pandas_datareader import data as pdr
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy as si
from scipy import stats
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
import concurrent.futures
import backtrader as bt
import quandl
import QuantLib as ql
import quantstats as qs
from datetime import datetime

# Import custom functions
from data_collection.load_data import load_price_data, fetch_financial_data
from data_collection.resample_data import resample_to_monthly
from data_collection.technicals import bollinger_bands, macd, rsi, woodie_pivots, obv, atr, stochastic_oscillator, ichimoku_cloud
# from data_collection.fundamentals import fetch_fundamentals, calculate_fair_value
from data_collection.fetch_options import get_options_data, get_spy_options

# Import functions from the provided files
from analysis.seasonality_analysis import seasonality_analysis, display_seasonality_stats, plot_seasonality, create_seasonality_table, display_all_monthly_statistics
from analysis.forecasting import forecast_and_plot_complete


In [2]:
# Set default figure size
plt.rcParams['figure.figsize'] = (8, 6)  # Change these values to your desired size


# Create and CSV for a list of etfs and leveraged efts

In [3]:
    # Create summary CSV
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    create_summary_csv(tickers, start_date, end_date)

NameError: name 'create_summary_csv' is not defined

# Get the initial seasonality of each asset ('SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL')

In [None]:

def calculate_returns(df):
    """Calculate the daily returns."""
    df['Return'] = df['Adj Close'].pct_change() * 100
    return df

def create_seasonality_table(df):
    """Create a seasonality table for returns."""
    df = df.dropna(subset=['Return'])
    df_monthly = resample_to_monthly(df)
    df_monthly['Monthly Return'] = df_monthly['Adj Close'].pct_change() * 100
    return seasonality_analysis(df_monthly)

def visualize_seasonality_table(seasonality_table, title):
    """Visualize the seasonality table as a heatmap."""
    sns.heatmap(seasonality_table, annot=True, cmap='RdYlGn', center=0)
    plt.title(title)
    plt.show()

def display_all_monthly_statistics(df):
    """Display all monthly statistics for a DataFrame."""
    df_monthly = resample_to_monthly(df)
    df_monthly['Monthly Return'] = df_monthly['Adj Close'].pct_change() * 100
    display_seasonality_stats(df_monthly)

def analyze_ticker(ticker, start_date, end_date):
    df = load_price_data(ticker, start=start_date, end=end_date)
    
    if isinstance(df, pd.Series):
        df = df.to_frame(name='Adj Close')
    
    if 'Adj Close' not in df.columns:
        print(f"Column 'Adj Close' not found in the data for {ticker}. Available columns: {df.columns}")
        return
    
    df = calculate_returns(df)
    
    seasonality_table = create_seasonality_table(df)
    visualize_seasonality_table(seasonality_table, f'Seasonality of {ticker} Returns')
    display_all_monthly_statistics(df)

def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    for ticker in tickers:
        analyze_ticker(ticker, start_date, end_date)

if __name__ == "__main__":
    main()
    


### Explanation of Results and Interpretation for Each Asset in the Notebook

The notebook includes analysis for multiple assets, specifically SPY, QQQ, TQQQ, SQQQ, SOXL, TSLL, and NVDL. Each function follows a similar pattern:
1. Load price data.
2. Calculate returns.
3. Create a seasonality table.
4. Visualize the seasonality table.
5. Display monthly statistics.

#### General Approach:
1. **Load Price Data**:
   - Using `yfinance` to load adjusted closing prices for the specified period.
2. **Calculate Returns**:
   - Daily returns are calculated as the percentage change in adjusted closing prices.
3. **Create Seasonality Table**:
   - Monthly returns are calculated and aggregated to show mean, standard deviation, count of observations, and the probability of positive returns.
4. **Visualize Seasonality Table**:
   - A heatmap is used to visualize the seasonality statistics.
5. **Display Monthly Statistics**:
   - Monthly mean returns and other statistics are printed.

#### Metrics Explained:
- **Mean Monthly Return**: This is the average return for a particular month across all years in the dataset. A positive mean indicates that the asset generally performs well in that month, while a negative mean suggests poorer performance.
- **Standard Deviation (std)**: This measures the volatility of returns for a particular month. A higher standard deviation indicates more variability and hence higher risk.
- **Count**: This is the number of observations or data points available for that particular month. A higher count improves the reliability of the mean and standard deviation.
- **Positive Probability**: This is the probability that the returns for a given month are positive. It is calculated as the proportion of months with positive returns to the total number of months. A higher positive probability suggests more consistent positive performance in that month.

### SPY (S&P 500 ETF)
- **Mean Monthly Return**: Generally positive, with notable highs in April (2.0%) and November (2.4%).
- **Standard Deviation**: Moderate volatility, with the highest standard deviation in October (5.8%).
- **Positive Probability**: High probability of positive returns in April and November (75%).

### QQQ (Nasdaq-100 ETF)
- **Mean Monthly Return**: Positive overall, with high returns in November (2.9%) and January (0.92%).
- **Standard Deviation**: High volatility in February (8.1%) and October (8.0%).
- **Positive Probability**: Higher probability of positive returns in May (75%) and November (62%).

### TQQQ (Triple-Leveraged QQQ ETF)
- **Mean Monthly Return**: Extremely high in some months, e.g., July (10.9%) and April (11.8%).
- **Standard Deviation**: Extremely high volatility, particularly in February (24.9%) and November (18.1%).
- **Positive Probability**: High probability of positive returns in April (67%) and July (62%).

### SQQQ (Triple-Leveraged Inverse QQQ ETF)
- **Mean Monthly Return**: Negative in most months, reflecting the inverse nature of the ETF. Highest negative return in July (-13%).
- **Standard Deviation**: Volatile, especially in November (9.8%) and January (17%).
- **Positive Probability**: Low probability of positive returns, with 50% probability in June and August being the highest.

### SOXL (Triple-Leveraged Semiconductor ETF)
- **Mean Monthly Return**: High returns in certain months, e.g., November (7.0%) and January (4.9%).
- **Standard Deviation**: High volatility, particularly in March (26.7%) and November (16.2%).
- **Positive Probability**: High probability of positive returns in October (62%) and November (67%).

### TSLL (Triple-Leveraged Tesla ETF)
- **Mean Monthly Return**: Volatile, with high returns in May (11.5%) and November (8.6%).
- **Standard Deviation**: Extremely high volatility in February (40.2%) and October (27.6%).
- **Positive Probability**: High probability of positive returns in May (64%) and November (67%).

### NVDL (Triple-Leveraged Nvidia ETF)
- **Mean Monthly Return**: Volatile, with high returns in April (10.8%) and November (8.7%).
- **Standard Deviation**: High volatility, especially in March (24.5%) and November (26.8%).
- **Positive Probability**: High probability of positive returns in April (67%) and November (62%).

### Interpretation:
1. **Seasonality Trends**:
   - Some ETFs exhibit clear seasonality patterns, such as higher returns in certain months.
   - Leveraged ETFs (e.g., TQQQ, SOXL, TSLL) show extreme returns and volatility, emphasizing the high risk-reward nature.
2. **Investment Strategy**:
   - Investors could use this seasonality information to time entries and exits.
   - For instance, historically strong months might be preferred for initiating long positions.
3. **Risk Management**:
   - Higher standard deviations indicate periods of higher risk, necessitating careful risk management.
   - Leveraged and inverse ETFs, due to their high volatility, should be approached with caution.

### Position Sizing and Risk Management Methods:
#### Kelly Criterion:
The Kelly Criterion is a formula used to determine the optimal size of a series of bets to maximize the logarithm of wealth. It balances risk and reward by considering the probability of winning and the payoff.

\[ f^* = \frac{bp - q}{b} \]

Where:
- \( f^* \) is the fraction of the portfolio to bet.
- \( b \) is the odds received on the bet.
- \( p \) is the probability of winning.
- \( q \) is the probability of losing, which is \( 1 - p \).

#### Fixed Ratio Method:
This method involves increasing position size based on the amount of profit accumulated. It's commonly used in futures and options trading.

1. Determine the base position size.
2. Increase the position size by a fixed amount after reaching a certain profit threshold.

#### Fixed Fractional Method:
This method involves risking a fixed percentage of the portfolio on each trade. It's simple and helps in preserving capital.

1. Decide the percentage of the portfolio to risk (e.g., 2%).
2. Calculate the dollar risk per trade based on the stop loss.

#### Managing Margin:
Managing margin involves maintaining enough funds in your account to cover the margin requirements for leveraged positions. This can prevent margin calls and forced liquidation.

- **Initial Margin**: The amount required to open a position.
- **Maintenance Margin**: The minimum amount that must be maintained in the account.

#### Hedging:
Hedging involves taking an offsetting position in a related security to mitigate risk. Common hedging strategies include using options and futures.

- **Options**: Buying puts to protect against downside risk.
- **Futures**: Shorting futures contracts to hedge against a potential decline in the asset's price.

#### Technical Analysis:
Technical analysis involves using historical price data and technical indicators to forecast future price movements. Common tools include:

- **Moving Averages**: Used to smooth out price data to identify trends.
- **Relative Strength Index (RSI)**: Measures the speed and change of price movements.
- **Bollinger Bands**: Provides a relative definition of high and low prices.

#### Fundamental Analysis:
Fundamental analysis involves evaluating an asset's intrinsic value based on economic and financial factors. Key elements include:

- **Earnings Reports**: Assessing a company's profitability.
- **Economic Indicators**: Analyzing GDP growth, unemployment rates, etc.
- **Valuation Ratios**: Using P/E ratio, P/B ratio, etc., to determine if an asset is overvalued or undervalued.

By understanding these trends and applying appropriate position sizing and risk management strategies, investors can make more informed decisions, potentially leveraging seasonal patterns to optimize returns and manage risks.


In [None]:
# import yfinance as yf
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score
# import backtrader as bt
# from datetime import datetime
# from statsmodels.tsa.seasonal import seasonal_decompose
# from arch import arch_model
# from statsmodels.tsa.arima.model import ARIMA

# # Import custom functions from data_collection
# from data_collection.load_data import load_price_data
# from data_collection.technicals import (
#     bollinger_bands, macd, rsi, woodie_pivots, obv, atr, stochastic_oscillator, ichimoku_cloud
# )

# # Import custom functions from analysis
# from analysis.seasonality_analysis import (
#     seasonality_analysis, display_seasonality_stats, plot_seasonality,
#     calculate_returns, create_seasonality_table, visualize_seasonality_table, display_all_monthly_statistics
# )

# # Import custom functions from forecasting
# from analysis.forecasting import forecast_and_plot_complete

# # Import custom functions from fundamentals
# from data_collection.fundementals import (
#     fetch_fundamentals, calculate_fair_value, get_cost_of_equity, calculate_wacc
# )

# # Example usage
# def main():
#     # Set the parameters
#     tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
#     start_date = '2000-01-01'
#     end_date = '2024-01-01'
    
#     # Analyze each ticker
#     for ticker in tickers:
#         analyze_ticker(ticker, start_date, end_date)

#     # Create a summary CSV
#     create_summary_csv(tickers, start_date, end_date)

# def analyze_ticker(ticker, start_date, end_date):
#     df = load_price_data(ticker, start=start_date, end=end_date)
    
#     if isinstance(df, pd.Series):
#         df = df.to_frame(name='Adj Close')
    
#     if 'Adj Close' not in df.columns:
#         print(f"Column 'Adj Close' not found in the data for {ticker}. Available columns: {df.columns}")
#         return
    
#     # Calculate returns
#     df = calculate_returns(df)
    
#     # Add technical indicators
#     df = add_technical_indicators(df)
    
#     # Create and visualize the seasonality table
#     seasonality_table = create_seasonality_table(df)
#     visualize_seasonality_table(seasonality_table, f'Seasonality of {ticker} Returns')
#     display_all_monthly_statistics(df)
    
#     # Perform machine learning analysis
#     model, accuracy = machine_learning_analysis(df)
    
#     # Perform fundamental analysis
#     pe_ratio, pb_ratio, debt_to_equity = get_fundamental_ratios(ticker)
#     print(f"P/E Ratio: {pe_ratio}, P/B Ratio: {pb_ratio}, Debt to Equity: {debt_to_equity}")
    
#     # Perform ARIMA and GARCH forecasts
#     arima_forecast(df)
#     garch_forecast(df)
    
#     # Backtest strategy
#     backtest_strategy(df)

# def add_technical_indicators(df):
#     df = bollinger_bands(df)
#     df = macd(df)
#     df = rsi(df)
#     df = woodie_pivots(df)
#     df = obv(df)
#     df = atr(df)
#     df = stochastic_oscillator(df)
#     df = ichimoku_cloud(df)
#     return df

# def create_summary_csv(tickers, start_date, end_date, filename='summary.csv'):
#     summary_data = []
    
#     for ticker in tickers:
#         df = load_price_data(ticker, start=start_date, end=end_date)
        
#         if isinstance(df, pd.Series):
#             df = df.to_frame(name='Adj Close')
        
#         if 'Adj Close' not in df.columns:
#             print(f"Column 'Adj Close' not found in the data for {ticker}. Available columns: {df.columns}")
#             continue
        
#         df = calculate_returns(df)
#         seasonality_table = create_seasonality_table(df)
        
#         for month, stats in seasonality_table.iterrows():
#             mean_return = stats['mean']
#             std_dev = stats['std']
#             count = stats['count']
#             positive_prob = stats['positive_prob']
#             kelly_size = apply_kelly_method(mean_return, std_dev, positive_prob)
            
#             summary_data.append({
#                 'Ticker': ticker,
#                 'Month': month,
#                 'Mean Return': mean_return,
#                 'Standard Deviation': std_dev,
#                 'Count': count,
#                 'Positive Probability': positive_prob,
#                 'Kelly Size': kelly_size
#             })
    
#     summary_df = pd.DataFrame(summary_data)
#     summary_df.to_csv(filename, index=False)
#     print(f"Summary CSV created: {filename}")

# def apply_kelly_method(mean_return, std_dev, win_prob):
#     b = mean_return / std_dev  # Assuming b is the edge ratio
#     kelly_fraction = win_prob - ((1 - win_prob) / b)
#     return kelly_fraction

# def machine_learning_analysis(df):
#     df['Target'] = (df['Return'] > 0).astype(int)
#     features = ['Adj Close', 'Return']
#     X = df[features].shift(1).dropna()
#     y = df['Target'].shift(1).dropna()
#     X, y = X.align(y, join='inner', axis=0)
    
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#     model = RandomForestClassifier(n_estimators=100, random_state=42)
#     model.fit(X_train, y_train)
#     y_pred = model.predict(X_test)
#     accuracy = accuracy_score(y_test, y_pred)

#     print(f"Model Accuracy: {accuracy:.2f}")
#     return model, accuracy

# def get_fundamental_ratios(ticker):
#     stock = yf.Ticker(ticker)
#     pe_ratio = stock.info['trailingPE']
#     pb_ratio = stock.info['priceToBook']
#     debt_to_equity = stock.info['debtToEquity']
#     return pe_ratio, pb_ratio, debt_to_equity

# def arima_forecast(df, column='Adj Close', order=(5, 1, 0)):
#     model = ARIMA(df[column], order=order)
#     model_fit = model.fit()
#     forecast = model_fit.forecast(steps=10)
#     return forecast

# def garch_forecast(df, column='Adj Close'):
#     model = arch_model(df[column], vol='Garch', p=1, q=1)
#     model_fit = model.fit()
#     forecast = model_fit.forecast(horizon=10)
#     return forecast

# def backtest_strategy(df):
#     class MyStrategy(bt.Strategy):
#         params = (('maperiod', 15),)

#         def __init__(self):
#             self.dataclose = self.datas[0].close
#             self.order = None
#             self.sma = bt.indicators.SimpleMovingAverage(self.datas[0], period=self.params.maperiod)

#         def next(self):
#             if self.order:
#                 return

#             if not self.position:
#                 if self.dataclose[0] > self.sma[0]:
#                     self.order = self.buy()
#             else:
#                 if self.dataclose[0] < self.sma[0]:
#                     self.order = self.sell()

#     cerebro = bt.Cerebro()
#     cerebro.addstrategy(MyStrategy)
#     data = bt.feeds.PandasData(dataname=df)
#     cerebro.adddata(data)
#     cerebro.run()
#     cerebro.plot()

# if __name__ == "__main__":
#     main()
