# Load all the libraries and frameworks

In [31]:
import yfinance as yf
import pandas as pd
from pandas_datareader import data as pdr
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy as si
from scipy import stats
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
import concurrent.futures
import backtrader as bt
import quandl
import QuantLib as ql
import quantstats as qs
from datetime import datetime

from data_collection.data_forecasting import *
from data_collection.data_seasonality import *
from data_collection.data_visualization import *

# # Import custom functions
# from data_collection.data_processing    import (
#     load_data, load_price_data, 
#     half_kelly_criterion, calculate_half_kelly_fractions, position_size_half_kelly,
#     # calculate_returns, 
#     create_summary_csv, analyze_ticker,
#     bollinger_bands, macd, rsi, woodie_pivots, atr, stochastic_oscillator,
#     # create_seasonality_table, resample_to_monthly, fetch_fundamentals,
#     # half_kelly_criterion, calculate_half_kelly_fractions, position_size_half_kelly, 
#     # backtest_strategy_with_half_kelly, get_fundamentals_data
# )
# # from data_collection.data_processing import calculate_returns

# # def calculate_returns(df):
# #     """Calculate the daily returns."""
# #     df['Return'] = df['Adj Close'].pct_change() * 100
# #     return df

# from data_collection.data_visualization import (
#     plot_technical_indicators, plot_monthly_technical_indicators, plot_with_macro_data, 
#     plot_spreads, plot_ghost_candles, plot_cumulative_returns_with_half_kelly
# )


# from data_collection.data_forecasting import (
#     forecast_and_plot, download_stock_data, forecast_future, scale_data, create_sequences, build_and_train_model,
#     calculate_metrics, plot_forecasts,plot_ghost_candles, machine_learning_analysis, get_fundamental_ratios, arima_forecast, garch_forecast, backtest_strategy
# )

# from data_collection.data_seasonality import (
#     create_seasonality_table
# )

# from seasonality_analysis import (
#     seasonality_analysis, display_seasonality_stats, display_all_monthly_statistics,
#     visualize_seasonality_table
# )


In [32]:

def calculate_returns(df):
    """Calculate the daily returns."""
    df['Return'] = df['Adj Close'].pct_change() * 100
    return df

data = load_price_data('AAPL', '2010-01-01', '2021-01-01')

# Calculate the returns
returns = calculate_returns(data)
print(returns)
# daily_technical = get_daily_woodies_pivots_with_bollinger(data, 20, 2, 2)
# print(daily_technical)

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed

                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2010-01-04    7.622500    7.660714    7.585000    7.643214    6.461977   
2010-01-05    7.664286    7.699643    7.616071    7.656429    6.473150   
2010-01-06    7.656429    7.686786    7.526786    7.534643    6.370186   
2010-01-07    7.562500    7.571429    7.466071    7.520714    6.358409   
2010-01-08    7.510714    7.571429    7.466429    7.570714    6.400682   
...                ...         ...         ...         ...         ...   
2020-12-24  131.320007  133.460007  131.100006  131.970001  129.339020   
2020-12-28  133.990005  137.339996  133.509995  136.690002  133.964920   
2020-12-29  138.050003  138.789993  134.339996  134.869995  132.181198   
2020-12-30  135.580002  135.990005  133.399994  133.720001  131.054138   
2020-12-31  134.080002  134.740005  131.720001  132.690002  130.044647   

               Volume    Return  
Dat




In [33]:
# Set default figure size
plt.rcParams['figure.figsize'] = (8, 6)  # Change these values to your desired size


In [34]:

def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    for ticker in tickers:
        analyze_ticker(ticker, start_date, end_date)
    
    # # Create summary CSV
    # create_summary_csv(tickers, start_date, end_date)

if __name__ == "__main__":
    main()


  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed


NameError: name 'arima_forecast' is not defined

### Explanation of Results and Interpretation for Each Asset in the Notebook

The notebook includes analysis for multiple assets, specifically SPY, QQQ, TQQQ, SQQQ, SOXL, TSLL, and NVDL. Each function follows a similar pattern:
1. Load price data.
2. Calculate returns.
3. Create a seasonality table.
4. Visualize the seasonality table.
5. Display monthly statistics.

#### General Approach:
1. **Load Price Data**:
   - Using `yfinance` to load adjusted closing prices for the specified period.
2. **Calculate Returns**:
   - Daily returns are calculated as the percentage change in adjusted closing prices.
3. **Create Seasonality Table**:
   - Monthly returns are calculated and aggregated to show mean, standard deviation, count of observations, and the probability of positive returns.
4. **Visualize Seasonality Table**:
   - A heatmap is used to visualize the seasonality statistics.
5. **Display Monthly Statistics**:
   - Monthly mean returns and other statistics are printed.

#### Metrics Explained:
- **Mean Monthly Return**: This is the average return for a particular month across all years in the dataset. A positive mean indicates that the asset generally performs well in that month, while a negative mean suggests poorer performance.
- **Standard Deviation (std)**: This measures the volatility of returns for a particular month. A higher standard deviation indicates more variability and hence higher risk.
- **Count**: This is the number of observations or data points available for that particular month. A higher count improves the reliability of the mean and standard deviation.
- **Positive Probability**: This is the probability that the returns for a given month are positive. It is calculated as the proportion of months with positive returns to the total number of months. A higher positive probability suggests more consistent positive performance in that month.

### SPY (S&P 500 ETF)
- **Mean Monthly Return**: Generally positive, with notable highs in April (2.0%) and November (2.4%).
- **Standard Deviation**: Moderate volatility, with the highest standard deviation in October (5.8%).
- **Positive Probability**: High probability of positive returns in April and November (75%).

### QQQ (Nasdaq-100 ETF)
- **Mean Monthly Return**: Positive overall, with high returns in November (2.9%) and January (0.92%).
- **Standard Deviation**: High volatility in February (8.1%) and October (8.0%).
- **Positive Probability**: Higher probability of positive returns in May (75%) and November (62%).

### TQQQ (Triple-Leveraged QQQ ETF)
- **Mean Monthly Return**: Extremely high in some months, e.g., July (10.9%) and April (11.8%).
- **Standard Deviation**: Extremely high volatility, particularly in February (24.9%) and November (18.1%).
- **Positive Probability**: High probability of positive returns in April (67%) and July (62%).

### SQQQ (Triple-Leveraged Inverse QQQ ETF)
- **Mean Monthly Return**: Negative in most months, reflecting the inverse nature of the ETF. Highest negative return in July (-13%).
- **Standard Deviation**: Volatile, especially in November (9.8%) and January (17%).
- **Positive Probability**: Low probability of positive returns, with 50% probability in June and August being the highest.

### SOXL (Triple-Leveraged Semiconductor ETF)
- **Mean Monthly Return**: High returns in certain months, e.g., November (7.0%) and January (4.9%).
- **Standard Deviation**: High volatility, particularly in March (26.7%) and November (16.2%).
- **Positive Probability**: High probability of positive returns in October (62%) and November (67%).

### TSLL (Triple-Leveraged Tesla ETF)
- **Mean Monthly Return**: Volatile, with high returns in May (11.5%) and November (8.6%).
- **Standard Deviation**: Extremely high volatility in February (40.2%) and October (27.6%).
- **Positive Probability**: High probability of positive returns in May (64%) and November (67%).

### NVDL (Triple-Leveraged Nvidia ETF)
- **Mean Monthly Return**: Volatile, with high returns in April (10.8%) and November (8.7%).
- **Standard Deviation**: High volatility, especially in March (24.5%) and November (26.8%).
- **Positive Probability**: High probability of positive returns in April (67%) and November (62%).

### Interpretation:
1. **Seasonality Trends**:
   - Some ETFs exhibit clear seasonality patterns, such as higher returns in certain months.
   - Leveraged ETFs (e.g., TQQQ, SOXL, TSLL) show extreme returns and volatility, emphasizing the high risk-reward nature.
2. **Investment Strategy**:
   - Investors could use this seasonality information to time entries and exits.
   - For instance, historically strong months might be preferred for initiating long positions.
3. **Risk Management**:
   - Higher standard deviations indicate periods of higher risk, necessitating careful risk management.
   - Leveraged and inverse ETFs, due to their high volatility, should be approached with caution.

### Position Sizing and Risk Management Methods:
#### Kelly Criterion:
The Kelly Criterion is a formula used to determine the optimal size of a series of bets to maximize the logarithm of wealth. It balances risk and reward by considering the probability of winning and the payoff.

\[ f^* = \frac{bp - q}{b} \]

Where:
- \( f^* \) is the fraction of the portfolio to bet.
- \( b \) is the odds received on the bet.
- \( p \) is the probability of winning.
- \( q \) is the probability of losing, which is \( 1 - p \).

#### Fixed Ratio Method:
This method involves increasing position size based on the amount of profit accumulated. It's commonly used in futures and options trading.

1. Determine the base position size.
2. Increase the position size by a fixed amount after reaching a certain profit threshold.

#### Fixed Fractional Method:
This method involves risking a fixed percentage of the portfolio on each trade. It's simple and helps in preserving capital.

1. Decide the percentage of the portfolio to risk (e.g., 2%).
2. Calculate the dollar risk per trade based on the stop loss.

#### Managing Margin:
Managing margin involves maintaining enough funds in your account to cover the margin requirements for leveraged positions. This can prevent margin calls and forced liquidation.

- **Initial Margin**: The amount required to open a position.
- **Maintenance Margin**: The minimum amount that must be maintained in the account.

#### Hedging:
Hedging involves taking an offsetting position in a related security to mitigate risk. Common hedging strategies include using options and futures.

- **Options**: Buying puts to protect against downside risk.
- **Futures**: Shorting futures contracts to hedge against a potential decline in the asset's price.

#### Technical Analysis:
Technical analysis involves using historical price data and technical indicators to forecast future price movements. Common tools include:

- **Moving Averages**: Used to smooth out price data to identify trends.
- **Relative Strength Index (RSI)**: Measures the speed and change of price movements.
- **Bollinger Bands**: Provides a relative definition of high and low prices.

#### Fundamental Analysis:
Fundamental analysis involves evaluating an asset's intrinsic value based on economic and financial factors. Key elements include:

- **Earnings Reports**: Assessing a company's profitability.
- **Economic Indicators**: Analyzing GDP growth, unemployment rates, etc.
- **Valuation Ratios**: Using P/E ratio, P/B ratio, etc., to determine if an asset is overvalued or undervalued.

By understanding these trends and applying appropriate position sizing and risk management strategies, investors can make more informed decisions, potentially leveraging seasonal patterns to optimize returns and manage risks.


In [None]:



def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    # for ticker in tickers:
    #     analyze_ticker(ticker, start_date, end_date)

    # Create summary CSV
    create_summary_csv(tickers, start_date, end_date)

if __name__ == "__main__":
    main()


In [None]:


def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    for ticker in tickers:
        analyze_ticker(ticker, start_date, end_date)

if __name__ == "__main__":
    main()


In [None]:

# Import custom functions
from data_collection.data_processing    import (
    load_data, load_price_data, 
    analyze_ticker,
    half_kelly_criterion, calculate_half_kelly_fractions, position_size_half_kelly,
    # calculate_return
    # create_seasonality_table, resample_to_monthly, fetch_fundamentals,
    bollinger_bands, macd, rsi, woodie_pivots, atr, stochastic_oscillator,
    # half_kelly_criterion, calculate_half_kelly_fractions, position_size_half_kelly, 
    # backtest_strategy_with_half_kelly, get_fundamentals_data
)
# from data_collection.data_processing import calculate_returns

# ]
# from data_collection.data_visualization import (
#     plot_technical_indicators, plot_monthly_technical_indicators, plot_with_macro_data, 
#     plot_spreads, plot_ghost_candles, plot_cumulative_returns_with_half_kelly
# )


# from data_collection.data_forecasting import (
#     forecast_and_plot, download_stock_data, forecast_future, scale_data, create_sequences, build_and_train_model,
#     calculate_metrics, plot_forecasts,plot_ghost_candles, machine_learning_analysis, get_fundamental_ratios, arima_forecast, garch_forecast, backtest_strategy
# )

# from seasonality_analysis import (
#     seasonality_analysis, display_seasonality_stats, display_all_monthly_statistics,
#     visualize_seasonality_table
# )


In [None]:
# Set default figure size
plt.rcParams['figure.figsize'] = (8, 6)  # Change these values to your desired size


In [35]:

def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    for ticker in tickers:
        analyze_ticker(ticker, start_date, end_date)
    
    # # Create summary CSV
    # create_summary_csv(tickers, start_date, end_date)

if __name__ == "__main__":
    main()


  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')
[*********************100%%**********************]  1 of 1 completed


NameError: name 'arima_forecast' is not defined

### Explanation of Results and Interpretation for Each Asset in the Notebook

The notebook includes analysis for multiple assets, specifically SPY, QQQ, TQQQ, SQQQ, SOXL, TSLL, and NVDL. Each function follows a similar pattern:
1. Load price data.
2. Calculate returns.
3. Create a seasonality table.
4. Visualize the seasonality table.
5. Display monthly statistics.

#### General Approach:
1. **Load Price Data**:
   - Using `yfinance` to load adjusted closing prices for the specified period.
2. **Calculate Returns**:
   - Daily returns are calculated as the percentage change in adjusted closing prices.
3. **Create Seasonality Table**:
   - Monthly returns are calculated and aggregated to show mean, standard deviation, count of observations, and the probability of positive returns.
4. **Visualize Seasonality Table**:
   - A heatmap is used to visualize the seasonality statistics.
5. **Display Monthly Statistics**:
   - Monthly mean returns and other statistics are printed.

#### Metrics Explained:
- **Mean Monthly Return**: This is the average return for a particular month across all years in the dataset. A positive mean indicates that the asset generally performs well in that month, while a negative mean suggests poorer performance.
- **Standard Deviation (std)**: This measures the volatility of returns for a particular month. A higher standard deviation indicates more variability and hence higher risk.
- **Count**: This is the number of observations or data points available for that particular month. A higher count improves the reliability of the mean and standard deviation.
- **Positive Probability**: This is the probability that the returns for a given month are positive. It is calculated as the proportion of months with positive returns to the total number of months. A higher positive probability suggests more consistent positive performance in that month.

### SPY (S&P 500 ETF)
- **Mean Monthly Return**: Generally positive, with notable highs in April (2.0%) and November (2.4%).
- **Standard Deviation**: Moderate volatility, with the highest standard deviation in October (5.8%).
- **Positive Probability**: High probability of positive returns in April and November (75%).

### QQQ (Nasdaq-100 ETF)
- **Mean Monthly Return**: Positive overall, with high returns in November (2.9%) and January (0.92%).
- **Standard Deviation**: High volatility in February (8.1%) and October (8.0%).
- **Positive Probability**: Higher probability of positive returns in May (75%) and November (62%).

### TQQQ (Triple-Leveraged QQQ ETF)
- **Mean Monthly Return**: Extremely high in some months, e.g., July (10.9%) and April (11.8%).
- **Standard Deviation**: Extremely high volatility, particularly in February (24.9%) and November (18.1%).
- **Positive Probability**: High probability of positive returns in April (67%) and July (62%).

### SQQQ (Triple-Leveraged Inverse QQQ ETF)
- **Mean Monthly Return**: Negative in most months, reflecting the inverse nature of the ETF. Highest negative return in July (-13%).
- **Standard Deviation**: Volatile, especially in November (9.8%) and January (17%).
- **Positive Probability**: Low probability of positive returns, with 50% probability in June and August being the highest.

### SOXL (Triple-Leveraged Semiconductor ETF)
- **Mean Monthly Return**: High returns in certain months, e.g., November (7.0%) and January (4.9%).
- **Standard Deviation**: High volatility, particularly in March (26.7%) and November (16.2%).
- **Positive Probability**: High probability of positive returns in October (62%) and November (67%).

### TSLL (Triple-Leveraged Tesla ETF)
- **Mean Monthly Return**: Volatile, with high returns in May (11.5%) and November (8.6%).
- **Standard Deviation**: Extremely high volatility in February (40.2%) and October (27.6%).
- **Positive Probability**: High probability of positive returns in May (64%) and November (67%).

### NVDL (Triple-Leveraged Nvidia ETF)
- **Mean Monthly Return**: Volatile, with high returns in April (10.8%) and November (8.7%).
- **Standard Deviation**: High volatility, especially in March (24.5%) and November (26.8%).
- **Positive Probability**: High probability of positive returns in April (67%) and November (62%).

### Interpretation:
1. **Seasonality Trends**:
   - Some ETFs exhibit clear seasonality patterns, such as higher returns in certain months.
   - Leveraged ETFs (e.g., TQQQ, SOXL, TSLL) show extreme returns and volatility, emphasizing the high risk-reward nature.
2. **Investment Strategy**:
   - Investors could use this seasonality information to time entries and exits.
   - For instance, historically strong months might be preferred for initiating long positions.
3. **Risk Management**:
   - Higher standard deviations indicate periods of higher risk, necessitating careful risk management.
   - Leveraged and inverse ETFs, due to their high volatility, should be approached with caution.

### Position Sizing and Risk Management Methods:
#### Kelly Criterion:
The Kelly Criterion is a formula used to determine the optimal size of a series of bets to maximize the logarithm of wealth. It balances risk and reward by considering the probability of winning and the payoff.

\[ f^* = \frac{bp - q}{b} \]

Where:
- \( f^* \) is the fraction of the portfolio to bet.
- \( b \) is the odds received on the bet.
- \( p \) is the probability of winning.
- \( q \) is the probability of losing, which is \( 1 - p \).

#### Fixed Ratio Method:
This method involves increasing position size based on the amount of profit accumulated. It's commonly used in futures and options trading.

1. Determine the base position size.
2. Increase the position size by a fixed amount after reaching a certain profit threshold.

#### Fixed Fractional Method:
This method involves risking a fixed percentage of the portfolio on each trade. It's simple and helps in preserving capital.

1. Decide the percentage of the portfolio to risk (e.g., 2%).
2. Calculate the dollar risk per trade based on the stop loss.

#### Managing Margin:
Managing margin involves maintaining enough funds in your account to cover the margin requirements for leveraged positions. This can prevent margin calls and forced liquidation.

- **Initial Margin**: The amount required to open a position.
- **Maintenance Margin**: The minimum amount that must be maintained in the account.

#### Hedging:
Hedging involves taking an offsetting position in a related security to mitigate risk. Common hedging strategies include using options and futures.

- **Options**: Buying puts to protect against downside risk.
- **Futures**: Shorting futures contracts to hedge against a potential decline in the asset's price.

#### Technical Analysis:
Technical analysis involves using historical price data and technical indicators to forecast future price movements. Common tools include:

- **Moving Averages**: Used to smooth out price data to identify trends.
- **Relative Strength Index (RSI)**: Measures the speed and change of price movements.
- **Bollinger Bands**: Provides a relative definition of high and low prices.

#### Fundamental Analysis:
Fundamental analysis involves evaluating an asset's intrinsic value based on economic and financial factors. Key elements include:

- **Earnings Reports**: Assessing a company's profitability.
- **Economic Indicators**: Analyzing GDP growth, unemployment rates, etc.
- **Valuation Ratios**: Using P/E ratio, P/B ratio, etc., to determine if an asset is overvalued or undervalued.

By understanding these trends and applying appropriate position sizing and risk management strategies, investors can make more informed decisions, potentially leveraging seasonal patterns to optimize returns and manage risks.


In [None]:



def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    # for ticker in tickers:
    #     analyze_ticker(ticker, start_date, end_date)

    # Create summary CSV
    create_summary_csv(tickers, start_date, end_date)

if __name__ == "__main__":
    main()


In [None]:
# import yfinance as yf
# import yoptions as yo
# import optionlab as ol
# import pandas as pd
# from pandas_datareader import data as pdr
# import matplotlib.pyplot as plt
# import seaborn as sns
# import numpy as np
# import scipy as si
# from scipy import stats
# from statsmodels.tsa.seasonal import seasonal_decompose
# from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
# from sklearn.linear_model import LinearRegression
# from sklearn.model_selection import train_test_split, GridSearchCV
# from sklearn.metrics import accuracy_score
# import concurrent.futures
# import backtrader as bt
# import quandl
# import QuantLib as ql
# import quantstats as qs

# # Import custom functions
# from data_collection.load_data import load_price_data
# from data_collection.resample_data import resample_to_monthly
# from data_collection.technicals import bollinger_bands, macd, rsi, woodie_pivots, obv, atr, stochastic_oscillator

# # Import functions from the provided files
# from analysis.seasonality_analysis import seasonality_analysis, display_seasonality_stats, plot_seasonality

# # Set default figure size
# plt.rcParams['figure.figsize'] = (8, 6)

# Technical Indicators
def ichimoku_cloud(df):
    if 'High' not in df.columns or 'Low' not in df.columns:
        print("Data does not contain 'High' or 'Low' columns necessary for Ichimoku Cloud.")
        return df

    high_9 = df['High'].rolling(window=9).max()
    low_9 = df['Low'].rolling(window=9).min()
    df['tenkan_sen'] = (high_9 + low_9) / 2

    high_26 = df['High'].rolling(window=26).max()
    low_26 = df['Low'].rolling(window=26).min()
    df['kijun_sen'] = (high_26 + low_26) / 2

    df['senkou_span_a'] = ((df['tenkan_sen'] + df['kijun_sen']) / 2).shift(26)
    high_52 = df['High'].rolling(window=52).max()
    low_52 = df['Low'].rolling(window=52).min()
    df['senkou_span_b'] = ((high_52 + low_52) / 2).shift(26)

    df['chikou_span'] = df['Adj Close'].shift(-26)

    return df

def add_technical_indicators(df):
    try:
        df = bollinger_bands(df)
        df = macd(df)
        df = rsi(df)
        df = woodie_pivots(df)
        df = obv(df)
        df = atr(df)
        df = stochastic_oscillator(df)
    except KeyError as e:
        print(f"Missing column for technical indicator calculation: {e}")
    return df

# Advanced Statistical Models
def arima_forecast(df, column='Adj Close', order=(5, 1, 0)):
    model = ARIMA(df[column], order=order)
    model_fit = model.fit()
    forecast = model_fit.forecast(steps=10)
    return forecast

def garch_forecast(df, column='Adj Close'):
    model = arch_model(df[column], vol='Garch', p=1, q=1)
    model_fit = model.fit()
    forecast = model_fit.forecast(horizon=10)
    return forecast

# Fundamental Analysis
def get_fundamental_ratios(ticker):
    stock = yf.Ticker(ticker)
    pe_ratio = stock.info.get('trailingPE', None)
    pb_ratio = stock.info.get('priceToBook', None)
    debt_to_equity = stock.info.get('debtToEquity', None)
    return pe_ratio, pb_ratio, debt_to_equity

# Hyperparameter Optimization
def optimize_model_hyperparameters(X, y):
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }
    model = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
    grid_search.fit(X, y)
    return grid_search.best_estimator_

# Backtesting Framework
class MyStrategy(bt.Strategy):
    params = (
        ('maperiod', 15),
    )

    def __init__(self):
        self.dataclose = self.datas[0].close
        self.order = None
        self.sma = bt.indicators.SimpleMovingAverage(self.datas[0], period=self.params.maperiod)

    def next(self):
        if self.order:
            return

        if not self.position:
            if self.dataclose[0] > self.sma[0]:
                self.order = self.buy()
        else:
            if self.dataclose[0] < self.sma[0]:
                self.order = self.sell()

def backtest_strategy(df):
    cerebro = bt.Cerebro()
    cerebro.addstrategy(MyStrategy)
    data = bt.feeds.PandasData(dataname=df)
    cerebro.adddata(data)
    cerebro.run()
    cerebro.plot()

# Main Analysis Function
def analyze_ticker(ticker, start_date, end_date):
    df = load_price_data(ticker, start=start_date, end=end_date)
    
    if isinstance(df, pd.Series):
        df = df.to_frame(name='Adj Close')
    
    if 'Adj Close' not in df.columns:
        print(f"Column 'Adj Close' not found in the data for {ticker}. Available columns: {df.columns}")
        return
    
    if 'Close' not in df.columns or 'High' not in df.columns or 'Low' not in df.columns:
        print(f"Columns 'Close', 'High', and 'Low' are required. Available columns: {df.columns}")
        return
    
    df['Return'] = df['Adj Close'].pct_change() * 100
    df = ichimoku_cloud(df)
    df = add_technical_indicators(df)

    # ARIMA and GARCH Forecasts
    arima_forecast(df)
    garch_forecast(df)

    # Fundamental Ratios
    pe_ratio, pb_ratio, debt_to_equity = get_fundamental_ratios(ticker)
    print(f"P/E Ratio: {pe_ratio}, P/B Ratio: {pb_ratio}, Debt to Equity: {debt_to_equity}")

    # Machine Learning with Hyperparameter Optimization
    df['Target'] = (df['Return'] > 0).astype(int)
    features = ['Adj Close', 'Return']
    X = df[features].shift(1).dropna()
    y = df['Target'].shift(1).dropna()
    X, y = X.align(y, join='inner')
    best_model = optimize_model_hyperparameters(X, y)
    y_pred = best_model.predict(X)
    accuracy = accuracy_score(y, y_pred)
    print(f"Optimized Model Accuracy: {accuracy:.2f}")

    # Backtest Strategy
    backtest_strategy(df)

def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    for ticker in tickers:
        analyze_ticker(ticker, start_date, end_date)

if __name__ == "__main__":
    main()


# Step 1: Data Processing

In [None]:
# Import custom functions
from data_collection.data_processing import load_price_data, fetch_financial_data, load_data
from data_collection.data_processing import resample_to_monthly
from data_collection.data_processing import bollinger_bands, macd, rsi, woodie_pivots, atr, stochastic_oscillator, ichimoku_cloud

# Step 2: Data Analysis of Seasonalitlity

In [None]:
# Import functions from the provided files
from analysis.seasonality_analysis import seasonality_analysis, display_seasonality_stats, plot_seasonality, create_seasonality_table, display_all_monthly_statistics

# Import functions from the provided files
from analysis.seasonality_analysis import seasonality_analysis, display_seasonality_stats, plot_seasonality, create_seasonality_table, display_all_monthly_statistics

# Step 3: Forecasting with ARIMA, SARIMA, and SARIMAX

In [None]:

from QuantitativeFinance.Studies.Seasonality.data_collection.data_forecasting import forecast_and_plot_complete, forecast_and_plot
# forecast_and_plot_with_confidence_intervals

# Create a CSV of historical technical data from a given asset

# Machine Learning, Feature Imporance and Feature Engineering

In [None]:
# # Create summary CSV
# tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
# start_date = '2000-01-01'
# end_date = '2024-01-01'
# create_summary_csv(tickers, start_date, end_date)

# Get the initial seasonality of each asset ('SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL')

In [None]:

def main():
    tickers = ['SPY', 'QQQ', 'TQQQ', 'SQQQ', 'SOXL', 'TSLL', 'NVDL']
    start_date = '2000-01-01'
    end_date = '2024-01-01'
    
    for ticker in tickers:
        analyze_ticker(ticker, start_date, end_date)
    
    # # Create summary CSV
    # create_summary_csv(tickers, start_date, end_date)

if __name__ == "__main__":
    main()
