In [1]:
# %pip install pycaret

In [2]:
import pandas as pd
import numpy as np
from pycaret.time_series import *
import plotly.express as px

In [3]:
# Load your stock data
csv_filename = 'Data/SP500_last_decade.csv'
df = pd.read_csv(csv_filename)
df.columns = map(str.lower, df.columns)

In [4]:
df['date'] = pd.to_datetime(df['date'])
df.drop(['adj_close','open','high','low','volume'], axis=1, inplace=True)
df

Unnamed: 0,date,close
0,2014-11-26,2072.830078
1,2014-11-28,2067.560059
2,2014-12-01,2053.439941
3,2014-12-02,2066.550049
4,2014-12-03,2074.330078
...,...,...
2510,2024-11-18,5893.620117
2511,2024-11-19,5916.979980
2512,2024-11-20,5917.109863
2513,2024-11-21,5948.709961


In [5]:
df['ma50'] = df['close'].rolling(window=50).mean()
df['ma100'] = df['close'].rolling(window=100).mean()
df['ma200'] = df['close'].rolling(window=200).mean()

df_ma = df.dropna(subset=['ma50','ma100','ma200'])

# Plot the stock prices along with the moving averages
fig = px.line(df, x="date", y=["close", "ma50", "ma100", "ma200"], template='plotly_dark',
              title="Stock Prices with 50-Day, 100-Day, and 200-Day Moving Averages")
fig.show()

In [6]:
# extract month and year from dates**
df['month'] = [i.month for i in df['date']]
df['year'] = [i.year for i in df['date']]
df['day'] = [i.day for i in df['date']]

# create a sequence of numbers
df['series'] = np.arange(1,len(df)+1)

#drop unnecessary columns and re-arrange
df.drop(['date','ma50','ma100','ma200'], axis=1, inplace=True)
data = df[['series', 'year', 'month','day','close']] 

# check the head of the dataset**
data.head()

Unnamed: 0,series,year,month,day,close
0,1,2014,11,26,2072.830078
1,2,2014,11,28,2067.560059
2,3,2014,12,1,2053.439941
3,4,2014,12,2,2066.550049
4,5,2014,12,3,2074.330078


In [7]:
df.head()

Unnamed: 0,close,month,year,day,series
0,2072.830078,11,2014,26,1
1,2067.560059,11,2014,28,2
2,2053.439941,12,2014,1,3
3,2066.550049,12,2014,2,4
4,2074.330078,12,2014,3,5


In [8]:
s = setup(data = df, target = 'close', fold = 5, fh=90)


Unnamed: 0,Description,Value
0,session_id,1652
1,Target,close
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(2515, 5)"
5,Transformed data shape,"(2515, 5)"
6,Transformed train set shape,"(2425, 5)"
7,Transformed test set shape,"(90, 5)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [9]:
# Compare multiple time series forecasting models
best = compare_models(n_select=5)


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
knn_cds_dt,K Neighbors w/ Cond. Deseasonalize & Detrending,2.5603,2.0764,175.1264,208.0871,0.0378,0.0388,-0.7631,1.288
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,2.9269,2.3165,200.4304,232.1927,0.043,0.0444,-1.0488,1.08
huber_cds_dt,Huber w/ Cond. Deseasonalize & Detrending,2.9428,2.2647,200.8767,226.9775,0.0446,0.045,-1.4278,0.82
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,2.9429,2.3246,201.4917,233.0041,0.0432,0.0447,-1.0566,2.79
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,2.9462,2.3278,201.7043,233.3203,0.0433,0.0447,-1.0607,0.788
arima,ARIMA,2.9535,2.3619,201.6481,236.5689,0.0438,0.0454,-1.1526,2.742
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,2.9555,2.3311,202.3258,233.6494,0.0435,0.0449,-1.0633,0.796
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,2.9556,2.3312,202.3308,233.6548,0.0435,0.0449,-1.0634,0.932
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,2.9598,2.3434,202.5661,234.8662,0.0436,0.045,-1.1057,2.848
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,3.0058,2.3486,205.3254,235.2347,0.0446,0.0462,-1.1763,1.958


Processing:   0%|          | 0/89 [00:00<?, ?it/s]

In [12]:
#forecast = predict_model(best, fh=90)

# Step 4: Plot the forecast
plot_model(best, plot='forecast')

In [11]:
plot_model(best[1], plot='ts')