In [None]:
# Only enable critical logging (Optional)
import os
os.environ["PYCARET_CUSTOM_LOGGING_LEVEL"] = "CRITICAL"

In [None]:
def what_is_installed():
    from pycaret import show_versions
    show_versions()

try:
    what_is_installed()
except ModuleNotFoundError:
    !pip install pycaret
    what_is_installed()

Collecting pycaret
  Downloading pycaret-3.3.2-py3-none-any.whl.metadata (17 kB)
Collecting scipy<=1.11.4,>=1.6.1 (from pycaret)
  Downloading scipy-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m886.2 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting joblib<1.4,>=1.2.0 (from pycaret)
  Downloading joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)
Collecting scikit-learn>1.4.0 (from pycaret)
  Downloading scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting pyod>=1.1.3 (from pycaret)
  Downloading pyod-2.0.2.tar.gz (165 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.8/165.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting category-encoders>=2.4.0 (from pycaret)
  Downloading category_encoders-2.6.3-py2.py3-none-any.whl.metadata 

In [None]:
import time
import numpy as np
import pandas as pd

from pycaret.datasets import get_data
from pycaret.time_series import TSForecastingExperiment

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file_path='/content/drive/My Drive/pycaret_timeseries/StarbucksDataset.csv'

In [None]:
data=pd.read_csv(file_path)

This dataset provides a comprehensive record of Starbucks' stock price changes over the years. It includes crucial columns such as the date, opening price, highest price of the day, lowest price of the day, closing price, adjusted closing price, and trading volume.

In [None]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1992-06-26,0.328125,0.347656,0.320313,0.335938,0.260703,224358400
1,1992-06-29,0.339844,0.367188,0.332031,0.359375,0.278891,58732800
2,1992-06-30,0.367188,0.371094,0.34375,0.347656,0.269797,34777600
3,1992-07-01,0.351563,0.359375,0.339844,0.355469,0.27586,18316800
4,1992-07-02,0.359375,0.359375,0.347656,0.355469,0.27586,13996800


In [None]:
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)
data = data.asfreq('M')

In [None]:
# Fill missing values using forward fill
data.fillna(method='ffill', inplace=True)

# Display the data to verify
print(data.isnull().sum())  # This should return 0 if all missing values are filled


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64


In [None]:
from pycaret.time_series import *

# Set up PyCaret for time series forecasting
exp_name = setup(data=data, target='Adj Close', session_id=123)

# Compare different models to find the best one
best_model = compare_models()


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Adj Close
2,Approach,Univariate
3,Exogenous Variables,Present
4,Original data shape,"(383, 6)"
5,Transformed data shape,"(383, 6)"
6,Transformed train set shape,"(382, 6)"
7,Transformed test set shape,"(1, 6)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
auto_arima,Auto ARIMA,0.1294,0.0703,0.3711,0.3711,0.004,0.004,13.61
arima,ARIMA,0.1896,0.1031,0.5438,0.5438,0.0058,0.0058,3.2533
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.1947,0.1057,0.5581,0.5581,0.006,0.006,0.47
ridge_cds_dt,Ridge w/ Cond. Deseasonalize & Detrending,0.263,0.1431,0.7548,0.7548,0.008,0.0079,0.1467
lr_cds_dt,Linear w/ Cond. Deseasonalize & Detrending,0.3132,0.1703,0.8987,0.8987,0.0096,0.0095,0.33
croston,Croston,0.4457,0.2418,1.2772,1.2772,0.0138,0.0137,0.0433
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.5114,0.2777,1.4662,1.4662,0.0158,0.0156,0.2133
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.5814,0.3159,1.6675,1.6675,0.0177,0.0174,0.2467
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.5899,0.3201,1.6906,1.6906,0.0183,0.018,0.3333
gbr_cds_dt,Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.6415,0.3485,1.8397,1.8397,0.0196,0.0194,0.3033


Processing:   0%|          | 0/85 [00:00<?, ?it/s]

In [None]:
from pycaret.time_series import *

# Manually create an ARIMA model (no exogenous variables)
best_model = create_model('arima')

# Forecast the next 6 periods without exogenous variables
future_forecast = predict_model(best_model)

# Display the forecasted values
print(future_forecast)

# Plot the forecast to visualize future trends
plot_model(best_model, plot='forecast')


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE
0,2023-12,0.0009,0.0005,0.0027,0.0027,0.0,0.0
1,2024-01,0.2779,0.151,0.7969,0.7969,0.0085,0.0085
2,2024-02,0.2899,0.1577,0.8319,0.8319,0.0088,0.0088
Mean,NaT,0.1896,0.1031,0.5438,0.5438,0.0058,0.0058
SD,NaT,0.1335,0.0726,0.3829,0.3829,0.0041,0.0041


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE
0,ARIMA,0.175,0.0953,0.5027,0.5027,0.0057,0.0057


          y_pred
2024-04  88.3264


In [None]:
# Plot residuals to check for errors in the model's predictions
plot_model(best_model, plot='residuals')

# Check diagnostics for model performance
plot_model(best_model, plot='diagnostics')


In [None]:
# Save the best model
save_model(best_model, 'best_time_series_model')


Transformation Pipeline and Model Successfully Saved


(ForecastingPipeline(steps=[('forecaster',
                             TransformedTargetForecaster(steps=[('model',
                                                                 ARIMA(seasonal_order=(0,
                                                                                       1,
                                                                                       0,
                                                                                       4)))]))]),
 'best_time_series_model.pkl')

In [None]:
# Load the saved model
loaded_model = load_model('best_time_series_model')

# Use the loaded model to forecast future values
future_forecast = predict_model(loaded_model)


Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE
0,ARIMA,0.175,0.0953,0.5027,0.5027,0.0057,0.0057


In [None]:
# Export forecasted values to a CSV file
future_forecast.to_csv('forecasted_values.csv')
