In [None]:
'''

Looping through multiple tickers

Trying pycaret time series with daily S&P features (with beta and rf).
1200 rows
target = 'M1_forward_return'
fold_strategy = 'expanding'
fold=5
fh=21

Dropped Mkt_M1_forward_return

SUCCESSFUL Out of sample forecast

Backtesting out of sample forecast strategy to see if it outperforms a buy and hold strategy
Strategy in this notebook is to buy if 1M predicted return is positive and have no position if 1M predicted return is negative, rebalanced daily.
This notebook looks at stocks one at a time and does not consider portfolio performance

Get beta and risk-free (rf) from SP500 betas_expectedreturns_MOM_MR_strategies
Get S&P features from S&P500 Features
'''

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pycaret
#from pycaret.regression import *
from pycaret.time_series import *

from pycaret.utils.time_series import clean_time_index

# Yahoo finance
import yfinance as yf

# Datetime
import datetime as dt
from datetime import datetime

In [None]:
#Tickers to loop over
Ticker_list=["NVDA","META","TSLA","AMZN","GOOG","MSFT","AAPL","GS","JPM","NFLX","NKE","AAL","LUV","GM"]

In [None]:
#Parameters when downloading data from yfinance
start=pd.to_datetime('2023-01-01', format='%Y-%m-%d')
today_date = datetime.today().strftime('%Y-%m-%d')
period=256           #lookback period for calculation of beta, realized returns
period_1m=21

In [None]:
tickers = yf.Tickers(Ticker_list)

In [None]:
df = tickers.download(start=start,end=today_date,interval='1D',)   #['Adj Close']
#df contains raw data downloaded from yfinance
df1=df['Close']

In [None]:
df1

In [None]:
#Calculate the returns according to the trade holding period
#This is daily returns
returns_df = np.log(df1/df1.shift(1))

In [None]:
returns_df 

In [None]:
# Dictionary to store DataFrames
Yahoo_data = {}

for column in returns_df.columns:
    # Create a DataFrame for each ticker containing both price and return
    Yahoo_data[column] = pd.concat([df1[column], returns_df[column]], axis=1)
    Yahoo_data[column].columns = ['Price', 'Returns']

    # Drop the first row since it will have NaN return
    #dfs_by_ticker[column].dropna(inplace=True)

    # Reset index
    #dfs_by_ticker[column].reset_index(inplace=True)

    # Rename the Date column to 'Date'
    #dfs_by_ticker[column].rename(columns={'Date': 'Date'}, inplace=True)

In [None]:
Yahoo_data["META"]["Returns"]

In [None]:
# Dictionary to store DataFrames
dfs = {}

for ticker in Ticker_list:
    # Replace 'path/to/your/file.csv' with the actual path to your CSV file
    filepath = f'/Users/bennyang/Downloads/Ticker Features/{ticker} features.csv'
    
    # Use pandas to read the CSV file
    dfs[ticker] = pd.read_csv(filepath)
    
    # Print the dataframe to verify it was read correctly
    #print(dfs)

In [None]:
for i in dfs.values():
    i['Date'] = pd.to_datetime(i['Date'])

In [None]:
# Replace 'path/to/your/file.xlsx' with the actual path to your Excel file
Beta_rf = '/Users/bennyang/Downloads/beta&rf.csv'

# Use pandas to read the Excel file
Beta_rf = pd.read_csv(Beta_rf)

# Print the dataframe to verify it was read correctly
print(Beta_rf)

In [None]:
#Extract beta data and combine it with the main dataframe
for key,value in dfs.items():
    beta = Beta_rf[Beta_rf['Ticker'] == f'{key}']
    beta['Date'] = pd.to_datetime(beta['Date'])
    beta.drop('Ticker',axis=1,inplace=True)
    dfs[key]= pd.merge(dfs[key], beta,  how='left', left_on=['Date'], right_on = ['Date'])
    #Drop market 1 month forward return
    dfs[key].drop('Mkt_M1_forward_return',axis=1,inplace=True) 

In [None]:
'''
#Index the dates and set frequency
for key,value in dfs.items():
    dfs[key].set_index("Date", inplace=True)
    dfs[key] = dfs[key].asfreq('B')

'''

In [None]:
#Get the latest features to avoid NA
#Get the test features to test out of Pycaret predictions
#Split out the "M1_forward_return" to plot graph later
#Have to remove the "M1_forward_return" from test features to predict model
#Test_Features must drop M1_forward_return for PyCaret predict_model to work
Latest_Features = {}
Test_Features = {}
M1_forward_return = {}

#Need to edit the last values as necessary to avoid NA
for key,value in dfs.items():
    Latest_Features[key]= dfs[key][-1400:-221]
    Test_Features[key]=dfs[key][-221:]
    M1_forward_return[key]=Test_Features[key][['M1_forward_return','Date']]      #Used for plotting the chart of actual one-month forward returns later
    Test_Features[key].drop('M1_forward_return',axis=1,inplace=True)

In [None]:
M1_forward_return['TSLA']

In [None]:
'''

#Using top 3 models to forecast
for key,value in Latest_Features.items():
    
    model = setup(Latest_Features[key], target = 'M1_forward_return',fold_strategy = 'expanding', fold=5, fh=21, n_jobs=-1,
                  scale_target = 'zscore', scale_exogenous= 'zscore',
                  session_id=123, numeric_imputation_target="ffill", numeric_imputation_exogenous="ffill",
                  log_experiment=False, experiment_name=f'{key} experiment')

    best_3model = compare_models(n_select = 3,exclude = "auto_arima")
    blend = blend_models(best_3model)
    # plot forecast for 21 sessions in future
    plot_model(blend, plot = 'forecast', data_kwargs = {'fh' : 21})

'''    

In [None]:
#Using solely ARIMA to forecast

Ticker_backtest = {}
y_pred = {}
y_pred1={}

for key,value in Latest_Features.items():
    
    model = setup(Latest_Features[key], target = 'M1_forward_return',fold_strategy = 'expanding', fold=5, fh=21, n_jobs=-1,
                  scale_target = 'zscore', scale_exogenous= 'zscore',
                  session_id=123, numeric_imputation_target="ffill", numeric_imputation_exogenous="ffill",
                  log_experiment=False, experiment_name=f'{key} experiment')

    #best_3model = compare_models(n_select = 3,exclude = "auto_arima")
    #blend = blend_models(best_3model)
    # plot forecast for 21 sessions in future
    arima = create_model('arima')
    plot_model(arima, plot = 'forecast', data_kwargs = {'fh' : 21})
    final_model = finalize_model(arima)
    test_length=Test_Features[key].shape[0]
    y_pred[key]=predict_model(final_model , X = Test_Features[key],fh=test_length)
    
    #Set up a temporary df to plot the predicted vs actual forward returns with dates
    temp_df= pd.merge(y_pred[key], M1_forward_return[key], left_index=True, right_index=True)
    temp_df.set_index('Date',inplace=True)
    
    plt.figure()
    plt.plot(temp_df['y_pred'],color='green',label=f'{key} Prediction')
    plt.plot(temp_df['M1_forward_return'],color='red',label=f'{key} Actual')
    plt.title(f'{key} Actual vs predicted 1 month ahead returns')
    plt.xlabel("Session")
    plt.ylabel("Return 1 month ahead")
    plt.legend()
    plt.show();

    Date_df1=Test_Features[key]["Date"]
    Date_df2=Date_df1.reset_index()
    
    #Matching date with index number
    Ticker_backtest[key] = pd.merge(Yahoo_data[key], Date_df2,  how='left', left_on=['Date'], right_on = ['Date'])
    y_pred1[key]=y_pred[key].reset_index()
    Ticker_backtest[key]=pd.merge(Ticker_backtest[key] , y_pred1[key], how='left', left_on=['index'], right_on = ['index'])

    #Dropna just for index and y_pred
    Ticker_backtest[key].dropna(axis=0,subset=['index', 'y_pred'], inplace=True)
    
    #Generating signals, buy if expected return in 1 month is positive, sell otherwise
    Ticker_backtest[key]['Signal']=np.nan

    #If prediction is positive, buy (1), if prediction is negative, have no position (0)
    Ticker_backtest[key].loc[Ticker_backtest[key]['y_pred'] > 0, 'Signal'] = 1
    Ticker_backtest[key].loc[Ticker_backtest[key]['y_pred'] < 0, 'Signal'] = 0
    Ticker_backtest[key].loc[Ticker_backtest[key]['y_pred'] == 0, 'Signal'] = 0
    
    Ticker_backtest[key]['Signal Returns']= Ticker_backtest[key]['Returns']*Ticker_backtest[key]['Signal'].shift(1)
    
    Ticker_backtest[key]['CumulativeStrategyReturns'] = (1 + Ticker_backtest[key]['Signal Returns']).cumprod() - 1
    Ticker_backtest[key]['CumulativeBuyHoldReturns'] = (1 + Ticker_backtest[key]['Returns']).cumprod() - 1
    
    Ticker_backtest[key].set_index('Date',inplace=True)
    
    plt.figure()
    plt.plot(Ticker_backtest[key]['CumulativeStrategyReturns'],color='green',label='CumulativeStrategyReturns')
    plt.plot(Ticker_backtest[key]['CumulativeBuyHoldReturns'],color='red',label='CumulativeBuyHoldReturns')
    plt.title(f'{key} Returns')
    plt.xlabel("Time")
    plt.ylabel("Returns")
    plt.legend();