In [28]:
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.graph_objects as go

## Commodity Futures Data Retrieval

This code utilizes the `yfinance` library to download commodity futures contract data for a specified time period, with the option to choose the interval (default is daily). The function `get_data` returns a pandas dataframe with the downloaded data and drops any rows with missing values.

In [9]:
# Get the data for the commodity futures contracts
def get_data(ticker, start_date, end_date, interval='1d'):
    data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
    data = data.dropna()
    return data

## Test `get_data` Function

The code below tests the get_data function by downloading data for Gold Futures from January 1, 2019 to December 31, 2020 and plotting it using plotly.graph_objects.

In [10]:
# Test the get_data function
start_date = '2019-01-01'
end_date = '2020-12-31'
ticker = 'GC=F'
data = get_data(ticker, start_date, end_date)

# Plot the data
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['Close'], name='Close'))
fig.update_layout(title='Gold Futures', xaxis_title='Date', yaxis_title='Price')
fig.show()

[*********************100%***********************]  1 of 1 completed


## Portfolio Data Retrieval

The code below defines a list of commodity futures contract tickers and retrieves data for each ticker from the past 20 years, saving it into one dataframe.

In [11]:
from datetime import datetime, timedelta
# Define a portfolio of commodity futures contracts
portfolio = ['GC=F', 'SI=F', 'CL=F', 'NG=F', 'ZC=F', 'ZS=F', 'ZM=F', 'ZW=F', 'ZL=F', 'ZB=F', 'ZN=F', 'ZT=F', 'ZQ=F', 'ZC=F', 'ZS=F', 'ZM=F', 'ZW=F', 'ZL=F', 'ZB=F', 'ZN=F', 'ZT=F', 'ZQ=F']

# Get the past 20 years of data for the portfolio and save into one dataframe
end_date = datetime.today()
start_date = end_date - timedelta(days=20*365)
data = pd.DataFrame()
for ticker in portfolio:
    data[ticker] = get_data(ticker, start_date, end_date)['Close']

print(data.head())

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

## Plot Data for Corn Futures

The code below plots data for Corn Futures (/ZC=F) using plotly.graph_objects. This is used for testing purposes.

In [19]:
# Plot the data for /ZC=F (Corn Futures)
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index, y=data['ZC=F'], name='Close'))
fig.update_layout(title='/ZC=F (Corn Futures)', xaxis_title='Date', yaxis_title='Price')
fig.show()


## Corn Futures Seasonality Analysis Test

This code retrieves corn futures data using a pandas dataframe, calculates the daily percent change, and groups the data by day of the year for each year. The resulting dataframe is saved as a .csv file and plotted using plotly.graph_objects.

In [20]:
import pandas as pd

# Define the start and end years
start_year = 2003
end_year = 2023

# Initialize an empty DataFrame to store the results
yearly_data = pd.DataFrame()

corn = pd.DataFrame()
corn['ZC=F'] = data['ZC=F']
corn['ZC=F % Chg.'] = corn['ZC=F'].pct_change(1)

# Convert index to "year" - "day of year" format
corn.index = corn.index.strftime('%Y-%j')

# Loop over each year and create a DataFrame with the day of the year as columns
# If its missing skip it and replace with a NaN
for year in range(start_year, end_year + 1):
    date_range = pd.date_range(start=f"{year}-01-01", end=f"{year}-12-31", freq="D")
    df = pd.DataFrame(index=[year], columns=date_range.dayofyear.values)
    for day in df.columns:
        try:
            df.loc[year, day] = corn['ZC=F % Chg.'][f"{year}-{day:03d}"]
        except:
            df.loc[year, day] = np.nan
    yearly_data = pd.concat([yearly_data, df])

# Print the resulting DataFrame
print(yearly_data)

yearly_data.to_csv('yearly_data.csv')

      1         2         3         4         5         6         7    \
2003  NaN       NaN       NaN       NaN       NaN       NaN       NaN   
2004  NaN       NaN       NaN       NaN  0.028455 -0.005929 -0.005964   
2005  NaN       NaN -0.014652 -0.003717  0.002488   0.03598 -0.009581   
2006  NaN       NaN  0.019699 -0.007955   -0.0126   -0.0058       NaN   
2007  NaN       0.0 -0.050609 -0.022267  0.016563       NaN       NaN   
2008  NaN  0.015368  0.007568  0.001609       NaN       NaN -0.001071   
2009  NaN  0.012899       NaN       NaN -0.002426  0.039514 -0.025731   
2010  NaN       NaN       NaN   0.00965  0.000597  0.007164 -0.010077   
2011  NaN       NaN -0.013514 -0.019339  0.017666 -0.027856 -0.011628   
2012  NaN       NaN  0.018561       0.0 -0.022779       0.0       NaN   
2013  NaN -0.010741 -0.002172 -0.013058       NaN       NaN  0.007718   
2014  NaN -0.003555  0.007134       NaN       NaN  0.010035 -0.004091   
2015  NaN -0.003149       NaN       NaN    0.0259 -

In [21]:
# Calculate the average of each column for the bottom 15 rows
average_15 = yearly_data.iloc[-15:].mean().cumsum()
average_15 = average_15.ffill()
average_5 = yearly_data.iloc[-5:].mean().cumsum()
average_5 = average_5.ffill()
last_year = yearly_data.iloc[-2].cumsum()
last_year = last_year.ffill()
this_year = yearly_data.iloc[-1].cumsum()
this_year = this_year.dropna()

# Plot 15 Year Avg, 5 Year Avg, and Last Year
fig = go.Figure()
fig.add_trace(go.Scatter(x=average_15.index, y=average_15, name='15 Year Avg'))
fig.add_trace(go.Scatter(x=average_5.index, y=average_5, name='5 Year Avg'))
fig.add_trace(go.Scatter(x=last_year.index, y=last_year, name='Last Year'))
fig.add_trace(go.Scatter(x=this_year.index, y=this_year, name='This Year'))
fig.update_layout(title='/ZC=F (Corn Futures) Seasonality', xaxis_title='Day of Year', yaxis_title='Price')
fig.show()


## Example Strategizing: Corn Futures
1. Find the longest streaks of positive and negative returns in the 15 and 5 year averages
2. Find statistics for the 15 and 5 year averages when they are bought and sold during the longest positive and negative streaks

To accomplish this, we must first find the streaks and then highlight the data in the dataframe. The code below finds the longest streaks of positive and negative returns the 15 and 5 year averages and highlights the data in the dataframe.

In [22]:
# Find the longest positive number streak in the 15 Year Avg
longest_stretch = 0
current_stretch = 0
pos_start_day_15 = 0
pos_end_day_15 = 0

average_15_notsum = yearly_data.iloc[-15:].mean()

# Convert to a df
average_15_notsum = pd.DataFrame(average_15_notsum, columns=['% Chg.'])

# Add a column to the df with whether the value is positive
average_15_notsum['Positive'] = average_15_notsum['% Chg.'] > 0

# Loop over the values in the "Positive" column and keep track of the longest streak and what day it starts and ends
for i in average_15_notsum['Positive'].index:
    if average_15_notsum['Positive'][i]:
        current_stretch += 1
    else:
        current_stretch = 0
    if current_stretch > longest_stretch:
        longest_stretch = current_stretch
        pos_start_day_15 = average_15_notsum.index[i - longest_stretch + 1]
        pos_end_day_15 = average_15_notsum.index[i]

print(f"The longest stretch of positive % changes in the 15 Year Avg is {longest_stretch} days and spans from {pos_start_day_15-1} to {pos_end_day_15-1}")


The longest stretch of positive % changes in the 15 Year Avg is 12 days and spans from 342 to 353


In [23]:
# Find the longest positive number streak in the 5 Year Avg
longest_stretch = 0
current_stretch = 0
pos_start_day_5 = 0
pos_end_day_5 = 0

average_5_notsum = yearly_data.iloc[-5:].mean()

# Convert to a df
average_5_notsum = pd.DataFrame(average_5_notsum, columns=['% Chg.'])

# Add a column to the df with whether the value is positive
average_5_notsum['Positive'] = average_5_notsum['% Chg.'] > 0

# Loop over the values in the "Positive" column and keep track of the longest streak and what day it starts and ends
for i in average_5_notsum['Positive'].index:
    if average_5_notsum['Positive'][i]:
        current_stretch += 1
    else:
        current_stretch = 0
    if current_stretch > longest_stretch:
        longest_stretch = current_stretch
        pos_start_day_5 = average_5_notsum.index[i - longest_stretch + 1]
        pos_end_day_5 = average_5_notsum.index[i]

print(f"The longest stretch of positive % changes in the 5 Year Avg is {longest_stretch} days and spans from {pos_start_day_5-1} to {pos_end_day_5-1}")


The longest stretch of positive % changes in the 5 Year Avg is 14 days and spans from 348 to 361


In [24]:
# Plot the 15 Year Avg and the 5 Year Avg with the longest positive number streak highlighted
fig = go.Figure()
fig.add_trace(go.Scatter(x=average_15.index, y=average_15, name='15 Year Avg'))
fig.add_trace(go.Scatter(x=average_5.index, y=average_5, name='5 Year Avg'))
fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_15-1, y0=0, x1=pos_end_day_15-1, y1=1, fillcolor="lightgreen", opacity=0.25, line_width=0)
fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_5-1, y0=0, x1=pos_end_day_5-1, y1=1, fillcolor="lightgreen", opacity=0.25, line_width=0)
fig.update_layout(title='/ZC=F (Corn Futures) Seasonality', xaxis_title='Day of Year', yaxis_title='Price')
fig.show()

In [25]:
# Find the longest negative number streak in the 15 Year Avg
longest_stretch = 0
current_stretch = 0
neg_start_day_15 = 0
neg_end_day_15 = 0

average_15_notsum = yearly_data.iloc[-15:].mean()

# Convert to a df
average_15_notsum = pd.DataFrame(average_15_notsum, columns=['% Chg.'])

# Add a column to the df with whether the value is negative
average_15_notsum['Negative'] = average_15_notsum['% Chg.'] < 0

# Loop over the values in the "Negative" column and keep track of the longest streak and what day it starts and ends
for i in average_15_notsum['Negative'].index:
    if average_15_notsum['Negative'][i]:
        current_stretch += 1
    else:
        current_stretch = 0
    if current_stretch > longest_stretch:
        longest_stretch = current_stretch
        neg_start_day_15 = average_15_notsum.index[i - longest_stretch + 1]
        neg_end_day_15 = average_15_notsum.index[i]

print(f"The longest stretch of negative % changes in the 15 Year Avg is {longest_stretch} days and spans from {neg_start_day_15-1} to {neg_end_day_15-1}")


The longest stretch of negative % changes in the 15 Year Avg is 6 days and spans from 84 to 89


In [26]:
# Find the longest negative number streak in the 5 Year Avg
longest_stretch = 0
current_stretch = 0
neg_start_day_5 = 0
neg_end_day_5 = 0

average_5_notsum = yearly_data.iloc[-5:].mean()

# Convert to a df
average_5_notsum = pd.DataFrame(average_5_notsum, columns=['% Chg.'])

# Add a column to the df with whether the value is negative
average_5_notsum['Negative'] = average_5_notsum['% Chg.'] < 0

# Loop over the values in the "Negative" column and keep track of the longest streak and what day it starts and ends
for i in average_5_notsum['Negative'].index:
    if average_5_notsum['Negative'][i]:
        current_stretch += 1
    else:
        current_stretch = 0
    if current_stretch > longest_stretch:
        longest_stretch = current_stretch
        neg_start_day_5 = average_5_notsum.index[i - longest_stretch + 1]
        neg_end_day_5 = average_5_notsum.index[i]

print(f"The longest stretch of negative % changes in the 5 Year Avg is {longest_stretch} days and spans from {neg_start_day_5-1} to {neg_end_day_5-1}")


The longest stretch of negative % changes in the 5 Year Avg is 6 days and spans from 264 to 269


In [27]:
# Plot the 15 Year Avg and the 5 Year Avg with the longest positive number streak highlighted
fig = go.Figure()
fig.add_trace(go.Scatter(x=average_15.index, y=average_15, name='15 Year Avg'))
fig.add_trace(go.Scatter(x=average_5.index, y=average_5, name='5 Year Avg'))
fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_15-1, y0=0, x1=pos_end_day_15-1, y1=1, fillcolor="lightgreen", opacity=0.25, line_width=0)
fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_5-1, y0=0, x1=pos_end_day_5-1, y1=1, fillcolor="lightgreen", opacity=0.25, line_width=0)

# Highlight the longest negative number streaks
fig.add_shape(type="rect", xref="x", yref="paper", x0=neg_start_day_15-1, y0=0, x1=neg_end_day_15-1, y1=1, fillcolor="lightcoral", opacity=0.25, line_width=0)
fig.add_shape(type="rect", xref="x", yref="paper", x0=neg_start_day_5-1, y0=0, x1=neg_end_day_5-1, y1=1, fillcolor="lightcoral", opacity=0.25, line_width=0)

# Update the Title and Axis Labels and show the plot
fig.update_layout(title='/ZC=F (Corn Futures) Seasonality', xaxis_title='Day of Year', yaxis_title='Price')
fig.show()

## Putting it all Together into One Function:

Now that we have all of the pieces, we can put them together into one function that will take a list of tickers and return the best day of the year to buy and sell each ticker.

We can also perform a walkforward-like analysis to see how the results would have performed over the past however many years the user wants to look at by creating a nextTrade dataframe that contains the best day to buy and sell for the next year.

In [None]:
import os

# Make a function to do all of the above for each ticker in the portfolio
def seasonality(portfolio, start_date= end_date - timedelta(days=20*365), end_date= datetime.today()):

    # Create a new df to store the results
    portfolio_stats = pd.DataFrame()

    # Create a new df that contains the best trades for the next year
    nextTrade = pd.DataFrame()

    # Get the past 20 years of data for the portfolio and save into one dataframe
    data = pd.DataFrame()
    for ticker in portfolio:
        data[ticker] = get_data(ticker, start_date, end_date)['Close']

        # Define the start and end years
        start_year = start_date.year
        end_year = end_date.year

        # Initialize an empty DataFrame to store the results
        yearly_data = pd.DataFrame()

        # Calculate the % change for each day of the year
        ticker_data = pd.DataFrame()
        ticker_data[ticker] = data[ticker]
        ticker_data[f'{ticker} % Chg.'] = ticker_data[ticker].pct_change(1)

        # Convert index to "year" - "day of year" format
        ticker_data.index = ticker_data.index.strftime('%Y-%j')

        # Loop over each year and create a DataFrame with the day of the year as columns
        # If its missing skip it and replace with a NaN
        for year in range(start_year, end_year + 1):
            date_range = pd.date_range(start=f"{year}-01-01", end=f"{year}-12-31", freq="D")
            df = pd.DataFrame(index=[year], columns=date_range.dayofyear.values)
            for day in df.columns:
                try:
                    df.loc[year, day] = ticker_data[f'{ticker} % Chg.'][f"{year}-{day:03d}"]
                except:
                    df.loc[year, day] = np.nan
            yearly_data = pd.concat([yearly_data, df])

        # Calculate the average of each column for the bottom 15 rows
        average_15 = yearly_data.iloc[-15:].mean().cumsum()
        average_15 = average_15.ffill()
        average_5 = yearly_data.iloc[-5:].mean().cumsum()
        average_5 = average_5.ffill()
        last_year = yearly_data.iloc[-2].cumsum()
        last_year = last_year.ffill()
        this_year = yearly_data.iloc[-1].cumsum()
        this_year = this_year.dropna()

        # Plot 15 Year Avg, 5 Year Avg, and Last Year
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=average_15.index, y=average_15, name='15 Year Avg'))
        fig.add_trace(go.Scatter(x=average_5.index, y=average_5, name='5 Year Avg'))
        fig.add_trace(go.Scatter(x=last_year.index, y=last_year, name='Last Year'))
        fig.add_trace(go.Scatter(x=this_year.index, y=this_year, name='This Year'))

        # Plot the 20D Moving Average for the 15 Year Avg, and the 5 Year Avg
        fig.add_trace(go.Scatter(x=average_15.index, y=average_15.rolling(20).mean(), name='15 Year Avg 20D MA'))
        fig.add_trace(go.Scatter(x=average_5.index, y=average_5.rolling(20).mean(), name='5 Year Avg 20D MA'))

        # Find the longest positive number streak in the 15 Year Avg
        longest_stretch = 0
        current_stretch = 0
        pos_start_day_15 = 0
        pos_end_day_15 = 0

        average_15_notsum = yearly_data.iloc[-15:].mean()

        # Convert to a df
        average_15_notsum = pd.DataFrame(average_15_notsum, columns=['% Chg.'])

        # Add a column to the df with whether the value is positive
        average_15_notsum['Positive'] = average_15_notsum['% Chg.'] > 0

        # Loop over the values in the "Positive" column and keep track of the longest streak and what day it starts and ends
        for i in average_15_notsum['Positive'].index:
            if i == 366:
                continue
            if average_15_notsum['Positive'][i]:
                current_stretch += 1
            else:
                current_stretch = 0
            if current_stretch > longest_stretch:
                longest_stretch = current_stretch
                pos_start_day_15 = average_15_notsum.index[i - longest_stretch + 1]
                pos_end_day_15 = average_15_notsum.index[i]

        # Find the longest positive number streak in the 5 Year Avg
        longest_stretch = 0
        current_stretch = 0
        pos_start_day_5 = 0
        pos_end_day_5 = 0

        average_5_notsum = yearly_data.iloc[-5:].mean()

        # Convert to a df
        average_5_notsum = pd.DataFrame(average_5_notsum, columns=['% Chg.'])

        # Add a column to the df with whether the value is positive
        average_5_notsum['Positive'] = average_5_notsum['% Chg.'] > 0

        # Loop over the values in the "Positive" column and keep track of the longest streak and what day it starts and ends
        for i in average_5_notsum['Positive'].index:
            if i == 366:
                continue
            if average_5_notsum['Positive'][i]:
                current_stretch += 1
            else:
                current_stretch = 0
            if current_stretch > longest_stretch:
                longest_stretch = current_stretch
                pos_start_day_5 = average_5_notsum.index[i - longest_stretch + 1]
                pos_end_day_5 = average_5_notsum.index[i]

        # Plot the 15 Year Avg and the 5 Year Avg with the longest positive number streak highlighted
        fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_15 - 1, y0=0, x1=pos_end_day_15 - 1, y1=1,
                      fillcolor="lightgreen", opacity=0.25, line_width=0)
        fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_5 - 1, y0=0, x1=pos_end_day_5 - 1, y1=1,
                      fillcolor="lightgreen", opacity=0.25, line_width=0)
        # Find the longest negative number streak in the 15 Year Avg
        longest_stretch = 0
        current_stretch = 0
        neg_start_day_15 = 0
        neg_end_day_15 = 0

        average_15_notsum = yearly_data.iloc[-15:].mean()

        # Convert to a df
        average_15_notsum = pd.DataFrame(average_15_notsum, columns=['% Chg.'])

        # Add a column to the df with whether the value is negative
        average_15_notsum['Negative'] = average_15_notsum['% Chg.'] < 0

        # Loop over the values in the "Negative" column and keep track of the longest streak and what day it starts and ends
        for i in average_15_notsum['Negative'].index:
            if i == 366:
                continue
            if average_15_notsum['Negative'][i]:
                current_stretch += 1
            else:
                current_stretch = 0
            if current_stretch > longest_stretch:
                longest_stretch = current_stretch
                neg_start_day_15 = average_15_notsum.index[i - longest_stretch + 1]
                neg_end_day_15 = average_15_notsum.index[i]

        # Find the longest negative number streak in the 5 Year Avg
        longest_stretch = 0
        current_stretch = 0
        neg_start_day_5 = 0
        neg_end_day_5 = 0

        average_5_notsum = yearly_data.iloc[-5:].mean()

        # Convert to a df
        average_5_notsum = pd.DataFrame(average_5_notsum, columns=['% Chg.'])

        # Add a column to the df with whether the value is negative
        average_5_notsum['Negative'] = average_5_notsum['% Chg.'] < 0

        # Loop over the values in the "Negative" column and keep track of the longest streak and what day it starts and ends
        for i in average_5_notsum['Negative'].index:
            if i == 366:
                continue
            if average_5_notsum['Negative'][i]:
                current_stretch += 1
            else:
                current_stretch = 0
            if current_stretch > longest_stretch:
                longest_stretch = current_stretch
                neg_start_day_5 = average_5_notsum.index[i - longest_stretch + 1]
                neg_end_day_5 = average_5_notsum.index[i]

        # Plot the 15 Year Avg and the 5 Year Avg with the longest positive number streak highlighted
        fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_15 - 1, y0=0, x1=pos_end_day_15 - 1, y1=1,
                      fillcolor="green", opacity=0.25, line_width=0)
        fig.add_shape(type="rect", xref="x", yref="paper", x0=pos_start_day_5 - 1, y0=0, x1=pos_end_day_5 - 1, y1=1,
                      fillcolor="lightgreen", opacity=0.25, line_width=0)

        # Highlight the longest negative number streaks
        fig.add_shape(type="rect", xref="x", yref="paper", x0=neg_start_day_15 - 1, y0=0, x1=neg_end_day_15 - 1, y1=1,
                      fillcolor="coral", opacity=0.25, line_width=0)
        fig.add_shape(type="rect", xref="x", yref="paper", x0=neg_start_day_5 - 1, y0=0, x1=neg_end_day_5 - 1, y1=1,
                      fillcolor="lightcoral", opacity=0.25, line_width=0)

        # Update the Title and Axis Labels and show the plot
        fig.update_layout(title=f'{ticker} Seasonality for {start_year} to {end_year}', xaxis_title='Day of Year', yaxis_title='% Change')
        # fig.show()

        # Save HTML of plot
        folder_path = "E:/path/of/your/choice/" + str(start_year + 5) + "-" + str(end_year)

        # Create the folder if it doesn't exist
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        file_path = folder_path + "/" + ticker + "_seasonality.html"
        fig.write_html(file_path)

        # Create an array of the years from start_year to end_year
        years = np.arange(start_year, end_year+1)

        # Calculate the % gain or loss for each year (in the rows) for the longest positive streak in the 15 Year Avg
        ticker_pos_streak_15 = pd.DataFrame()
        for i in range(0, end_year-start_year+1):
            pos_streak_15 = yearly_data.iloc[i][pos_start_day_15 - 2:pos_end_day_15 - 1].cumsum()
            pos_streak_15 = pos_streak_15.ffill()
            ticker_pos_streak_15[f'{years[i]}'] = pos_streak_15


        # Calculate the % gain or loss for each year (in the rows) for the longest positive streak in the 5 Year Avg
        ticker_pos_streak_5 = pd.DataFrame()
        for i in range(0, end_year-start_year+1):
            pos_streak_5 = yearly_data.iloc[i][pos_start_day_5 - 2:pos_end_day_5 - 1].cumsum()
            pos_streak_5 = pos_streak_5.ffill()
            ticker_pos_streak_5[f'{years[i]}'] = pos_streak_5

        # Calculate the % gain or loss for each year (in the rows) for the longest negative streak in the 15 Year Avg
        ticker_neg_streak_15 = pd.DataFrame()
        for i in range(0, end_year-start_year+1):
            neg_streak_15 = yearly_data.iloc[i][neg_start_day_15 - 2:neg_end_day_15 - 1].cumsum()
            neg_streak_15 = neg_streak_15.ffill()
            ticker_neg_streak_15[f'{years[i]}'] = neg_streak_15

        # Calculate the % gain or loss for each year (in the rows) for the longest negative streak in the 5 Year Avg
        ticker_neg_streak_5 = pd.DataFrame()
        for i in range(0, end_year-start_year+1):
            neg_streak_5 = yearly_data.iloc[i][neg_start_day_5 - 2:neg_end_day_5 - 1].cumsum()
            neg_streak_5 = neg_streak_5.ffill()
            ticker_neg_streak_5[f'{years[i]}'] = neg_streak_5

        # Create a df to hold the statistics for the different streaks
        stats = pd.DataFrame(index=['Asset', 'Start Day', 'End Day', 'Longest Streak', '% Correct', 'Expected Value (%)',
                                    'Max DD From Entry (%)', 'Risk Reward Ratio', 'Sharpe Ratio', ' '],
                             columns=['15 Year Positive Streak', '5 Year Positive Streak', '15 Year Negative Streak', '5 Year Negative Streak'])

        # Write to each row label with the specified statistic for each streak type
        # Asset
        stats.loc['Asset']['15 Year Positive Streak'] = ticker
        stats.loc['Asset']['5 Year Positive Streak'] = ticker
        stats.loc['Asset']['15 Year Negative Streak'] = ticker
        stats.loc['Asset']['5 Year Negative Streak'] = ticker

        # Start Day
        stats.loc['Start Day']['15 Year Positive Streak'] = datetime.strptime(str(pos_start_day_15 - 1), "%j").strftime("%m-%d")
        stats.loc['Start Day']['5 Year Positive Streak'] = datetime.strptime(str(pos_start_day_5 - 1), "%j").strftime("%m-%d")
        stats.loc['Start Day']['15 Year Negative Streak'] = datetime.strptime(str(neg_start_day_15 - 1), "%j").strftime("%m-%d")
        stats.loc['Start Day']['5 Year Negative Streak'] = datetime.strptime(str(neg_start_day_5 - 1), "%j").strftime("%m-%d")

        # End Day
        stats.loc['End Day']['15 Year Positive Streak'] = datetime.strptime(str(pos_end_day_15 - 1), "%j").strftime("%m-%d")
        stats.loc['End Day']['5 Year Positive Streak'] = datetime.strptime(str(pos_end_day_5 - 1), "%j").strftime("%m-%d")
        stats.loc['End Day']['15 Year Negative Streak'] = datetime.strptime(str(neg_end_day_15 - 1), "%j").strftime("%m-%d")
        stats.loc['End Day']['5 Year Negative Streak'] = datetime.strptime(str(neg_end_day_5 - 1), "%j").strftime("%m-%d")

        # Longest Streak
        stats.loc['Longest Streak']['15 Year Positive Streak'] = pos_end_day_15 - pos_start_day_15 + 1
        stats.loc['Longest Streak']['5 Year Positive Streak'] = pos_end_day_5 - pos_start_day_5 + 1
        stats.loc['Longest Streak']['15 Year Negative Streak'] = neg_end_day_15 - neg_start_day_15 + 1
        stats.loc['Longest Streak']['5 Year Negative Streak'] = neg_end_day_5 - neg_start_day_5 + 1

        # Expected Value
        stats.loc['Expected Value (%)']['15 Year Positive Streak'] = ticker_pos_streak_15.iloc[-1].mean()
        stats.loc['Expected Value (%)']['5 Year Positive Streak'] = ticker_pos_streak_5.iloc[-1].mean()
        stats.loc['Expected Value (%)']['15 Year Negative Streak'] = ticker_neg_streak_15.iloc[-1].mean() * -1
        stats.loc['Expected Value (%)']['5 Year Negative Streak'] = ticker_neg_streak_5.iloc[-1].mean() * -1

        # % Correct
        stats.loc['% Correct']['15 Year Positive Streak'] = (ticker_pos_streak_15.iloc[-1] > 0).sum() / len(ticker_pos_streak_15.iloc[-1])
        stats.loc['% Correct']['5 Year Positive Streak'] = (ticker_pos_streak_5.iloc[-1] > 0).sum() / len(ticker_pos_streak_5.iloc[-1])
        stats.loc['% Correct']['15 Year Negative Streak'] = (ticker_neg_streak_15.iloc[-1] < 0).sum() / len(ticker_neg_streak_15.iloc[-1])
        stats.loc['% Correct']['5 Year Negative Streak'] = (ticker_neg_streak_5.iloc[-1] < 0).sum() / len(ticker_neg_streak_5.iloc[-1])

        # Average DD
        stats.loc['Max DD From Entry (%)']['15 Year Positive Streak'] = ticker_pos_streak_15.iloc[-1].min()
        stats.loc['Max DD From Entry (%)']['5 Year Positive Streak'] = ticker_pos_streak_5.iloc[-1].min()
        stats.loc['Max DD From Entry (%)']['15 Year Negative Streak'] = ticker_neg_streak_15.iloc[-1].max() * -1
        stats.loc['Max DD From Entry (%)']['5 Year Negative Streak'] = ticker_neg_streak_5.iloc[-1].max() * -1

        # Risk Reward Ratio
        stats.loc['Risk Reward Ratio']['15 Year Positive Streak'] = abs(ticker_pos_streak_15.iloc[-1].mean() / ticker_pos_streak_15.iloc[-1].min())
        stats.loc['Risk Reward Ratio']['5 Year Positive Streak'] = abs(ticker_pos_streak_5.iloc[-1].mean() / ticker_pos_streak_5.iloc[-1].min())
        stats.loc['Risk Reward Ratio']['15 Year Negative Streak'] = abs(ticker_neg_streak_15.iloc[-1].mean() / ticker_neg_streak_15.iloc[-1].max())
        stats.loc['Risk Reward Ratio']['5 Year Negative Streak'] = abs(ticker_neg_streak_5.iloc[-1].mean() / ticker_neg_streak_5.iloc[-1].max())

        # Sharpe Ratio
        stats.loc['Sharpe Ratio']['15 Year Positive Streak'] = ticker_pos_streak_15.iloc[-1].mean() / ticker_pos_streak_15.iloc[-1].std()
        stats.loc['Sharpe Ratio']['5 Year Positive Streak'] = ticker_pos_streak_5.iloc[-1].mean() / ticker_pos_streak_5.iloc[-1].std()
        stats.loc['Sharpe Ratio']['15 Year Negative Streak'] = ticker_neg_streak_15.iloc[-1].mean() / ticker_neg_streak_15.iloc[-1].std() * -1
        stats.loc['Sharpe Ratio']['5 Year Negative Streak'] = ticker_neg_streak_5.iloc[-1].mean() / ticker_neg_streak_5.iloc[-1].std() * -1

        # Next Header Row
        stats.loc[' ']['15 Year Positive Streak'] = '15 Year Positive Streak'
        stats.loc[' ']['5 Year Positive Streak'] = '5 Year Positive Streak'
        stats.loc[' ']['15 Year Negative Streak'] = '15 Year Negative Streak'
        stats.loc[' ']['5 Year Negative Streak'] = '5 Year Negative Streak'

        # Concatenate the stats for the current ticker to the portfolio stats
        portfolio_stats = pd.concat([portfolio_stats, stats])

        # Find the best trades for the next year by selecting the column with the highest % correct and a sharpe ratio greater than 0.7
        goodTrades = stats.loc[:, (stats.loc['% Correct'] >= 0.7) & (stats.loc['Sharpe Ratio'] >= 0.7)]

        # Drop the last row from the selected columns
        goodTrades = goodTrades.iloc[:-1, :]

        # If there are no good trades, skip to the next ticker
        if goodTrades.empty:
            continue
        else:
            nextTrade = pd.concat([nextTrade, goodTrades])

    # Return the portfolio stats and next trade df
    return portfolio_stats, nextTrade


# Define a portfolio of commodity futures contracts
portfolio = ['GC=F', 'SI=F', 'HG=F', 'CL=F', 'NG=F', 'ZC=F', 'ZS=F', 'ZM=F', 'ZW=F', 'ZL=F', 'ZB=F', 'ZN=F', 'ZT=F', 'ZQ=F']

# Make a range of years to test
years = range(2015, 2024)

# Run seasonality for each year as a walk forward test to create a portfolio of the best trades for the next year
for year in years:
    start_date = datetime.strptime(str(year - 20) + "-01-01", '%Y-%m-%d')
    end_date = datetime.strptime(str(year) + "-01-01", '%Y-%m-%d')
    portfolio_stats, nextTrade = seasonality(portfolio, start_date=start_date, end_date=end_date)
    portfolio_stats.to_csv(f'{str(start_date.year + 5)}-{str(end_date.year)}/seasonality_' + str(start_date.year + 5) + '_' + str(end_date.year) + '.csv')
    nextTrade.to_csv(f'{str(start_date.year + 5)}-{str(end_date.year)}/nextTrade_' + str(start_date.year + 5) + '_' + str(end_date.year) + '.csv')
