In [12]:
# %pip install pycaret

In [13]:
import pandas as pd
import numpy as np
from pycaret.time_series import *
import plotly.express as px

In [14]:
# Load your stock data
csv_filename = 'Data/META_last_decade.csv'
df = pd.read_csv(csv_filename)

In [15]:
df['Date'] = pd.to_datetime(df['Date'])
#df.set_index('Date', inplace=True)
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-10-08,76.180000,77.709999,75.099998,77.519997,77.287292,32976000
1,2014-10-09,77.279999,77.760002,75.849998,75.910004,75.682137,32487000
2,2014-10-10,75.589996,76.500000,72.760002,72.910004,72.691139,52146000
3,2014-10-13,73.230003,74.779999,72.519997,72.989998,72.770897,43938000
4,2014-10-14,74.000000,74.180000,72.050003,73.589996,73.369095,50299000
...,...,...,...,...,...,...,...
2510,2024-09-30,567.700012,574.679993,564.799988,572.440002,572.440002,12792300
2511,2024-10-01,577.979980,583.039978,570.099976,576.469971,576.469971,15259300
2512,2024-10-02,574.859985,576.000000,569.340027,572.809998,572.809998,6524700
2513,2024-10-03,570.150024,583.359985,568.729980,582.770020,582.770020,11581000


In [16]:
df['MA50'] = df['Close'].rolling(window=50).mean()
df['MA100'] = df['Close'].rolling(window=100).mean()
df['MA200'] = df['Close'].rolling(window=200).mean()

# Plot the stock prices along with the moving averages
fig = px.line(df, x="Date", y=["Close", "MA50", "MA100", "MA200"], template='plotly_dark',
              title="Stock Prices with 50-Day, 100-Day, and 200-Day Moving Averages")
fig.show()

In [17]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,MA50,MA100,MA200
0,2014-10-08,76.180000,77.709999,75.099998,77.519997,77.287292,32976000,,,
1,2014-10-09,77.279999,77.760002,75.849998,75.910004,75.682137,32487000,,,
2,2014-10-10,75.589996,76.500000,72.760002,72.910004,72.691139,52146000,,,
3,2014-10-13,73.230003,74.779999,72.519997,72.989998,72.770897,43938000,,,
4,2014-10-14,74.000000,74.180000,72.050003,73.589996,73.369095,50299000,,,
...,...,...,...,...,...,...,...,...,...,...
2510,2024-09-30,567.700012,574.679993,564.799988,572.440002,572.440002,12792300,517.215999,504.676200,474.72490
2511,2024-10-01,577.979980,583.039978,570.099976,576.469971,576.469971,15259300,518.997398,505.714899,475.93355
2512,2024-10-02,574.859985,576.000000,569.340027,572.809998,572.809998,6524700,520.679798,506.688799,477.13175
2513,2024-10-03,570.150024,583.359985,568.729980,582.770020,582.770020,11581000,523.109799,507.754499,478.37100


In [18]:
# extract month and year from dates**
df['Month'] = [i.month for i in df['Date']]
df['Year'] = [i.year for i in df['Date']]
df['Day'] = [i.day for i in df['Date']]

# create a sequence of numbers
df['Series'] = np.arange(1,len(df)+1)

# drop unnecessary columns and re-arrange
df.drop(['Date', 'MA50','MA100','MA200'], axis=1, inplace=True)
data = df[['Series', 'Year', 'Month', 'Close']] 

# check the head of the dataset**
data.head()

Unnamed: 0,Series,Year,Month,Close
0,1,2014,10,77.519997
1,2,2014,10,75.910004
2,3,2014,10,72.910004
3,4,2014,10,72.989998
4,5,2014,10,73.589996


In [19]:
s = setup(data = df, target = 'Close', fold = 5, fh=90)


Unnamed: 0,Description,Value
0,session_id,963
1,Target,Close
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(2515, 10)"
5,Transformed data shape,"(2515, 10)"
6,Transformed train set shape,"(2425, 10)"
7,Transformed test set shape,"(90, 10)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [20]:
# Compare multiple time series forecasting models
best = compare_models(n_select=5)


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,0.0087,0.0069,0.0779,0.0897,0.0002,0.0002,1.0,1.122
auto_arima,Auto ARIMA,0.0087,0.0069,0.0779,0.0897,0.0002,0.0002,1.0,16.448
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.0441,0.0342,0.3759,0.434,0.0016,0.0016,0.9997,0.93
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.0441,0.0342,0.3758,0.4339,0.0016,0.0016,0.9997,0.938
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.0441,0.0342,0.3758,0.4339,0.0016,0.0016,0.9997,0.916
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.0443,0.0342,0.3768,0.4339,0.0017,0.0017,0.9997,0.984
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.1446,0.1259,1.2388,1.6021,0.005,0.005,0.9965,0.916
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.1529,0.1396,1.3109,1.7782,0.0052,0.0052,0.9957,0.916
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,3.3518,2.676,28.5995,33.9489,0.1268,0.113,-0.8182,1.904
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,3.4591,2.7039,29.4596,34.2479,0.1317,0.1242,-0.691,1.196


Processing:   0%|          | 0/89 [00:00<?, ?it/s]

In [21]:
forecast = predict_model(best[1], fh=90)

# Step 4: Plot the forecast
plot_model(best, plot='forecast')

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,0.0577,0.0443,0.5516,0.6374,0.0011,0.0011,0.9996
