In [1]:
from pycaret.time_series import *
import pandas as pd

In [3]:
# load the BTC long term dataset

btc_5y_df = pd.read_csv('../backend/data/BTC-past-5y-ohlc.csv', index_col=0, parse_dates=True)

print(btc_5y_df.head().to_markdown())

| timestamp           | symbol   |    open |    high |     low |   close |   volume |
|:--------------------|:---------|--------:|--------:|--------:|--------:|---------:|
| 2020-03-13 00:00:00 | BTCUSDT  | 4800.01 | 5955    | 3782.13 | 5578.6  |   402202 |
| 2020-03-14 00:00:00 | BTCUSDT  | 5576.05 | 5640.52 | 5055.13 | 5172.06 |   136910 |
| 2020-03-15 00:00:00 | BTCUSDT  | 5172.48 | 5940    | 5093.1  | 5361.3  |   139916 |
| 2020-03-16 00:00:00 | BTCUSDT  | 5360.33 | 5365.42 | 4442.12 | 5028.97 |   227277 |
| 2020-03-17 00:00:00 | BTCUSDT  | 5028.86 | 5525    | 4921.45 | 5312.64 |   150090 |


In [4]:
# arima_experiment = setup(btc_5y_df.loc[:,'close'], fh = 3, fold = 5, session_id = 123)

# close prices
btc_5y_close_df = btc_5y_df.loc[:, 'close']
btc_5y_close_df.info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 1825 entries, 2020-03-13 to 2025-03-11
Series name: close
Non-Null Count  Dtype  
--------------  -----  
1825 non-null   float64
dtypes: float64(1)
memory usage: 28.5 KB


In [5]:
print("\n--- Setting up Data Split ---")

# hold off last month for testing
split_frame = btc_5y_close_df.index[-30-1]
btc_train = btc_5y_close_df.loc[btc_5y_close_df.index <= split_frame].copy()
btc_test = btc_5y_close_df.loc[btc_5y_close_df.index > split_frame].copy()
print(f"Training data: {btc_train.shape[0]} days")
print(f"Testing data: {btc_test.shape[0]} days")


--- Setting up Data Split ---
Training data: 1795 days
Testing data: 30 days


In [6]:
print("\n--- Setting up PyCaret Environment ---")
# Setup PyCaret environment
s = setup(data=btc_5y_close_df, target='close', transform_target='log',
          fh=30, 
          seasonal_period='D', fold=5, session_id=123)


--- Setting up PyCaret Environment ---


Unnamed: 0,Description,Value
0,session_id,123
1,Target,close
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(1825, 1)"
5,Transformed data shape,"(1825, 1)"
6,Transformed train set shape,"(1795, 1)"
7,Transformed test set shape,"(30, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [7]:
s.plot_model(plot='decomp')
s.plot_model(plot='decomp', data_kwargs={'type': 'multiplicative'})
s.plot_model(plot='acf')
s.plot_model(plot='pacf')

In [8]:
print("\n --- Create Prophet model --- ")

Prophet = create_model('prophet')

print("--- model parameters: ---")
print(Prophet)


 --- Create Prophet model --- 


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2024-09-12,2.7642,1.9764,6216.8012,6649.8657,0.0987,0.1046,-11.1309
1,2024-10-12,1.2768,1.1638,2885.1479,3914.6577,0.0395,0.04,0.4006
2,2024-11-11,10.101,6.8614,23194.7642,23506.9399,0.2423,0.2764,-37.246
3,2024-12-11,3.0326,2.3052,7127.8595,8140.543,0.0726,0.0721,-4.0141
4,2025-01-10,6.1405,4.5386,14613.8747,16164.9801,0.1476,0.1353,-19.5984
Mean,NaT,4.663,3.3691,10807.6895,11675.3973,0.1202,0.1257,-14.3177
SD,NaT,3.1459,2.0736,7284.3769,7185.0121,0.0706,0.0818,13.3157


--- model parameters: ---
ProphetPeriodPatched()


In [9]:
# Tune the ARIMA model
print("\n--- Tuning Prophet model... ---")
tuned_prophet = tune_model(Prophet)
print("Tuned Prophet model parameters:")
print(tuned_prophet)



--- Tuning Prophet model... ---


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2024-09-12,1.1059,0.8812,2487.2996,2964.9524,0.0402,0.0394,-1.4116
1,2024-10-12,3.2196,2.3768,7275.1346,7994.6679,0.1043,0.0983,-1.4997
2,2024-11-11,5.6995,3.9001,13087.5946,13361.4994,0.1365,0.1469,-11.3568
3,2024-12-11,3.0399,2.391,7145.2405,8443.4365,0.0722,0.0736,-4.3941
4,2025-01-10,3.3871,2.978,8061.1378,10606.8274,0.0823,0.0769,-7.8686
Mean,NaT,3.2904,2.5054,7611.2814,8674.2767,0.0871,0.087,-5.3062
SD,NaT,1.4596,0.9835,3369.7999,3429.7623,0.0322,0.0354,3.8383


Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed:    4.2s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    5.4s finished


Tuned Prophet model parameters:
ProphetPeriodPatched(changepoint_prior_scale=0.006204765906220318,
                     holidays_prior_scale=0.7814989008992038,
                     seasonality_prior_scale=2.142956309076899)


In [10]:
plot_model(tuned_prophet, plot='forecast')

print(pull().to_markdown(index=False))


| Model   |   MASE |   RMSSE |     MAE |    RMSE |   MAPE |   SMAPE |       R2 |
|:--------|-------:|--------:|--------:|--------:|-------:|--------:|---------:|
| Prophet | 15.708 | 10.9503 | 37894.1 | 39394.4 |  0.425 |  0.3444 | -42.4886 |


In [11]:
print("\n--- Finalizing Model and Making Predictions ---")
final_prophet = finalize_model(tuned_prophet)

# Generate predictions for the test set
predictions = predict_model(final_prophet)
print("Prediction results (first 5 rows):")
display(predictions.head())

plot_model(final_prophet, plot='forecast')


--- Finalizing Model and Making Predictions ---
Prediction results (first 5 rows):


Unnamed: 0,y_pred
2025-03-12,121361.259
2025-03-13,122524.1502
2025-03-14,123728.3033
2025-03-15,125127.5093
2025-03-16,126452.6607


In [12]:
# from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# print("\n--- Model Test Scoring ---")
# # Calculate evaluation metrics

# # y_true = btc_test
# # y_pred = predictions
# mae = mean_absolute_error(btc_train[-30:], predictions)
# mse = mean_squared_error(btc_train[-30:], predictions)
# r2 = r2_score(btc_train[-30:], predictions)

# # btc_train_last_30 = btc_train[-30:]
# # print(btc_train_last_30.index)
# # print(predictions.index)

# print(f"Mean squared error: {mse:.2f}")
# print(f"Mean absolute error: {mae:.2f}")
# print(f"R-squared score : {r2:.2f}") 

In [13]:
# print("\n--- Plot against valdation set ---")

# import plotly.express as px

# # create a dataframe for visualisation
# btc_vis = pd.DataFrame(
#     {
#         "date" : btc_test[-30:].index,
#         "actual" : btc_test[-30:].values,
#         "predicted" : predictions.values.flatten(),
#     }
# )

# # print(len(btc_test[-30:].index))

# # print(len(btc_test[-30:].values))
# # print(len(predictions.values.flatten()))
# # melt the dataframe for easier handling of multiple series for pyplot
# btc_vis = btc_vis.melt(id_vars=['date'], var_name="type", value_name='close')

# # plot validation set with predictions
# fig = px.line(
#     btc_vis,
#     x='date',
#     y='close',
#     color='type',
#     title="Prophet Forecast vs Actual",
#     labels=dict(close='close price', date='date')
# )

# fig.show()

In [14]:
# from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# print("\n--- Model Evaluation ---")
# # Calculate evaluation metrics

# # y_true = btc_test
# # y_pred = predictions
# mae = mean_absolute_error(btc_test,predictions)
# mse = mean_squared_error(btc_test, predictions)
# r2 = r2_score(btc_test, predictions)

# print(f"Mean squared error: {mse:.2f}")
# print(f"Mean absolute error: {mae:.2f}")
# print(f"R-squared score : {r2:.2f}") 

In [15]:
from IPython.display import Markdown, display

display(Markdown(s.check_stats().to_markdown()))


|    | Test         | Test Name   | Data        | Property            | Setting                  |           Value |
|---:|:-------------|:------------|:------------|:--------------------|:-------------------------|----------------:|
|  0 | Summary      | Statistics  | Transformed | Length              |                          |  1825           |
|  1 | Summary      | Statistics  | Transformed | # Missing Values    |                          |     0           |
|  2 | Summary      | Statistics  | Transformed | Mean                |                          |    10.3939      |
|  3 | Summary      | Statistics  | Transformed | Median              |                          |    10.4841      |
|  4 | Summary      | Statistics  | Transformed | Standard Deviation  |                          |     0.663089    |
|  5 | Summary      | Statistics  | Transformed | Variance            |                          |     0.439687    |
|  6 | Summary      | Statistics  | Transformed | Kurtosis            |                          |    -0.35432     |
|  7 | Summary      | Statistics  | Transformed | Skewness            |                          |    -0.496181    |
|  8 | Summary      | Statistics  | Transformed | # Distinct Values   |                          |  1825           |
|  9 | White Noise  | Ljung-Box   | Transformed | Test Statictic      | {'alpha': 0.05, 'K': 24} | 40159.8         |
| 10 | White Noise  | Ljung-Box   | Transformed | Test Statictic      | {'alpha': 0.05, 'K': 48} | 73389.7         |
| 11 | White Noise  | Ljung-Box   | Transformed | p-value             | {'alpha': 0.05, 'K': 24} |     0           |
| 12 | White Noise  | Ljung-Box   | Transformed | p-value             | {'alpha': 0.05, 'K': 48} |     0           |
| 13 | White Noise  | Ljung-Box   | Transformed | White Noise         | {'alpha': 0.05, 'K': 24} |     0           |
| 14 | White Noise  | Ljung-Box   | Transformed | White Noise         | {'alpha': 0.05, 'K': 48} |     0           |
| 15 | Stationarity | ADF         | Transformed | Stationarity        | {'alpha': 0.05}          |     0           |
| 16 | Stationarity | ADF         | Transformed | p-value             | {'alpha': 0.05}          |     0.12407     |
| 17 | Stationarity | ADF         | Transformed | Test Statistic      | {'alpha': 0.05}          |    -2.4656      |
| 18 | Stationarity | ADF         | Transformed | Critical Value 1%   | {'alpha': 0.05}          |    -3.43394     |
| 19 | Stationarity | ADF         | Transformed | Critical Value 5%   | {'alpha': 0.05}          |    -2.86313     |
| 20 | Stationarity | ADF         | Transformed | Critical Value 10%  | {'alpha': 0.05}          |    -2.56761     |
| 21 | Stationarity | KPSS        | Transformed | Trend Stationarity  | {'alpha': 0.05}          |     0           |
| 22 | Stationarity | KPSS        | Transformed | p-value             | {'alpha': 0.05}          |     0.01        |
| 23 | Stationarity | KPSS        | Transformed | Test Statistic      | {'alpha': 0.05}          |     0.639423    |
| 24 | Stationarity | KPSS        | Transformed | Critical Value 10%  | {'alpha': 0.05}          |     0.119       |
| 25 | Stationarity | KPSS        | Transformed | Critical Value 5%   | {'alpha': 0.05}          |     0.146       |
| 26 | Stationarity | KPSS        | Transformed | Critical Value 2.5% | {'alpha': 0.05}          |     0.176       |
| 27 | Stationarity | KPSS        | Transformed | Critical Value 1%   | {'alpha': 0.05}          |     0.216       |
| 28 | Normality    | Shapiro     | Transformed | Normality           | {'alpha': 0.05}          |     0           |
| 29 | Normality    | Shapiro     | Transformed | p-value             | {'alpha': 0.05}          |     1.06847e-19 |