In [15]:
# %pip install pycaret

In [16]:
import pandas as pd
import numpy as np
from pycaret.time_series import *
import plotly.express as px

In [17]:
# Load your stock data
csv_filename = 'Data/MSFT_last_decade.csv'
df = pd.read_csv(csv_filename)

In [18]:
df['Date'] = pd.to_datetime(df['Date'])
#df.set_index('Date', inplace=True)
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2014-10-08,45.480000,46.889999,45.340000,46.779999,39.997784,33031000
1,2014-10-09,46.500000,46.799999,45.740002,45.849998,39.202618,34422800
2,2014-10-10,45.599998,46.119999,43.950001,44.029999,37.646488,51978100
3,2014-10-13,43.820000,44.560001,43.490002,43.650002,37.321583,37100200
4,2014-10-14,43.869999,44.380001,43.560001,43.730000,37.389980,38115700
...,...,...,...,...,...,...,...
2510,2024-09-30,428.209991,430.420013,425.369995,430.299988,430.299988,16807300
2511,2024-10-01,428.450012,428.480011,418.809998,420.690002,420.690002,19092900
2512,2024-10-02,422.579987,422.820007,416.709991,417.130005,417.130005,16582300
2513,2024-10-03,417.630005,419.549988,414.290009,416.540009,416.540009,13686400


In [19]:
df['MA50'] = df['Close'].rolling(window=50).mean()
df['MA100'] = df['Close'].rolling(window=100).mean()
df['MA200'] = df['Close'].rolling(window=200).mean()

# Plot the stock prices along with the moving averages
fig = px.line(df, x="Date", y=["Close", "MA50", "MA100", "MA200"], template='plotly_dark',
              title="Stock Prices with 50-Day, 100-Day, and 200-Day Moving Averages")
fig.show()

In [20]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,MA50,MA100,MA200
0,2014-10-08,45.480000,46.889999,45.340000,46.779999,39.997784,33031000,,,
1,2014-10-09,46.500000,46.799999,45.740002,45.849998,39.202618,34422800,,,
2,2014-10-10,45.599998,46.119999,43.950001,44.029999,37.646488,51978100,,,
3,2014-10-13,43.820000,44.560001,43.490002,43.650002,37.321583,37100200,,,
4,2014-10-14,43.869999,44.380001,43.560001,43.730000,37.389980,38115700,,,
...,...,...,...,...,...,...,...,...,...,...
2510,2024-09-30,428.209991,430.420013,425.369995,430.299988,430.299988,16807300,419.773999,428.498099,415.66890
2511,2024-10-01,428.450012,428.480011,418.809998,420.690002,420.690002,19092900,419.328999,428.599599,415.90050
2512,2024-10-02,422.579987,422.820007,416.709991,417.130005,417.130005,16582300,418.774599,428.647699,416.15650
2513,2024-10-03,417.630005,419.549988,414.290009,416.540009,416.540009,13686400,418.527399,428.665699,416.38555


In [21]:
# extract month and year from dates**
df['Month'] = [i.month for i in df['Date']]
df['Year'] = [i.year for i in df['Date']]
df['Day'] = [i.day for i in df['Date']]

# create a sequence of numbers
df['Series'] = np.arange(1,len(df)+1)

# drop unnecessary columns and re-arrange
df.drop(['Date', 'MA50','MA100','MA200'], axis=1, inplace=True)
data = df[['Series', 'Year', 'Month', 'Close']] 

# check the head of the dataset**
data.head()

Unnamed: 0,Series,Year,Month,Close
0,1,2014,10,46.779999
1,2,2014,10,45.849998
2,3,2014,10,44.029999
3,4,2014,10,43.650002
4,5,2014,10,43.73


In [22]:
s = setup(data = df, target = 'Close', fold = 5, fh=90)


Unnamed: 0,Description,Value
0,session_id,8489
1,Target,Close
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(2515, 10)"
5,Transformed data shape,"(2515, 10)"
6,Transformed train set shape,"(2425, 10)"
7,Transformed test set shape,"(90, 10)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [23]:
# Compare multiple time series forecasting models
best = compare_models(n_select=5)


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2,TT (Sec)
arima,ARIMA,0.1295,0.0954,0.6838,0.7825,0.0021,0.0021,0.996,3.258
auto_arima,Auto ARIMA,0.2265,0.1681,1.1695,1.3578,0.0039,0.0038,0.9901,63.364
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.3901,0.2648,2.0238,2.144,0.0066,0.0066,0.9761,3.416
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.3906,0.265,2.0266,2.1464,0.0066,0.0066,0.976,0.914
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.3906,0.2651,2.0271,2.1467,0.0066,0.0066,0.976,0.884
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.4204,0.3451,2.1725,2.788,0.0072,0.0072,0.9571,1.84
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.4241,0.3476,2.1907,2.8074,0.0072,0.0072,0.9567,0.902
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.4599,0.3074,2.395,2.4953,0.0077,0.0077,0.9667,0.906
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,4.0194,2.9127,20.7249,23.462,0.0674,0.0649,-3.2318,1.832
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,4.2046,3.0785,21.1821,24.3768,0.0738,0.0707,-1.8524,2.316


Processing:   0%|          | 0/89 [00:00<?, ?it/s]

In [24]:
forecast = predict_model(best[1], fh=90)

# Step 4: Plot the forecast
plot_model(best, plot='forecast')

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,Auto ARIMA,0.1922,0.1593,1.1246,1.4135,0.0027,0.0026,0.9934


In [25]:
evaluate_model(best)

NameError: name 'evaluate_model' is not defined