In [16]:
from pycaret.time_series import *
import pandas as pd

In [17]:
fig_kwargs = {
    "renderer": "png",
    "width": 1000,
    "height": 700,
}

In [18]:
# load the BTC long term dataset

btc_5y_df = pd.read_csv('../backend/data/BTC-past-5y-ohlc.csv', index_col=0, parse_dates=True)

print(btc_5y_df.head().to_markdown())

| timestamp           | symbol   |    open |    high |   low |   close |   volume |
|:--------------------|:---------|--------:|--------:|------:|--------:|---------:|
| 2020-04-20 00:00:00 | BTCUSDT  | 7121.4  | 7220    |  6751 | 6826.83 |  90149.5 |
| 2020-04-21 00:00:00 | BTCUSDT  | 6828.98 | 6940    |  6762 | 6841.37 |  60109.7 |
| 2020-04-22 00:00:00 | BTCUSDT  | 6841.36 | 7156.38 |  6818 | 7125.14 |  61486.4 |
| 2020-04-23 00:00:00 | BTCUSDT  | 7125.12 | 7738    |  7020 | 7482.39 | 102774   |
| 2020-04-24 00:00:00 | BTCUSDT  | 7483.96 | 7615.96 |  7388 | 7505    |  60182.1 |


In [19]:
# close prices
btc_5y_close_df = btc_5y_df.loc[:, 'close']
btc_5y_close_df.info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 1825 entries, 2020-04-20 to 2025-04-18
Series name: close
Non-Null Count  Dtype  
--------------  -----  
1825 non-null   float64
dtypes: float64(1)
memory usage: 28.5 KB


In [20]:
print("\n--- Setting up Data Split ---")

# hold off last month for testing
split_frame = btc_5y_close_df.index[-30-1]
btc_train = btc_5y_close_df.loc[btc_5y_close_df.index <= split_frame].copy()
btc_test = btc_5y_close_df.loc[btc_5y_close_df.index > split_frame].copy()
print(f"Training data: {btc_train.shape[0]} days")
print(f"Testing data: {btc_test.shape[0]} days")


--- Setting up Data Split ---
Training data: 1795 days
Testing data: 30 days


In [21]:
print("\n--- Setting up PyCaret Environment ---")
# Setup PyCaret environment
s = setup(data=btc_5y_close_df, target='close', transform_target='log',
          fh=30, 
          seasonal_period='D', fold=5, session_id=123) #fig_kwargs=fig_kwargs)


--- Setting up PyCaret Environment ---


Unnamed: 0,Description,Value
0,session_id,123
1,Target,close
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(1825, 1)"
5,Transformed data shape,"(1825, 1)"
6,Transformed train set shape,"(1795, 1)"
7,Transformed test set shape,"(30, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


In [22]:
s.plot_model(plot='decomp')
s.plot_model(plot='decomp', data_kwargs={'type': 'multiplicative'})
s.plot_model(plot='acf')
s.plot_model(plot='pacf')

In [23]:
print("\n --- Create Prophet model --- ")

Prophet = create_model('prophet')

print("--- model parameters: ---")
print(Prophet)


 --- Create Prophet model --- 


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2024-10-20,3.0916,2.7976,7156.6706,9552.0531,0.0863,0.0914,-0.0435
1,2024-11-19,7.3566,5.0065,17511.9287,17818.3319,0.1769,0.1946,-28.1198
2,2024-12-19,7.3373,5.2478,17611.1997,18776.0119,0.1826,0.1654,-36.5834
3,2025-01-18,8.5686,6.9687,20896.6412,25227.9249,0.2136,0.185,-56.2286
4,2025-02-17,15.6954,11.025,38455.1042,40010.9478,0.4469,0.3592,-52.5875
Mean,NaT,8.4099,6.2091,20326.3089,22277.0539,0.2213,0.1991,-34.7126
SD,NaT,4.0902,2.7488,10175.5631,10171.1531,0.1205,0.0878,20.1562


--- model parameters: ---
ProphetPeriodPatched()


In [24]:
# Tune the ARIMA model
print("\n--- Tuning Prophet model... ---")
tuned_prophet = tune_model(Prophet)
print("Tuned Prophet model parameters:")
print(tuned_prophet)



--- Tuning Prophet model... ---


Unnamed: 0,cutoff,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,R2
0,2024-10-20,3.1116,2.6978,7202.976,9211.323,0.0881,0.0922,0.0296
1,2024-11-19,8.5598,5.7919,20376.2234,20613.8423,0.2061,0.2302,-37.9738
2,2024-12-19,1.9737,1.5557,4737.4002,5566.0624,0.0495,0.0482,-2.3028
3,2025-01-18,5.1143,4.5414,12472.5503,16440.7347,0.1281,0.1154,-23.3048
4,2025-02-17,17.081,11.9316,41849.8882,43301.0128,0.4858,0.3848,-61.7628
Mean,NaT,7.1681,5.3037,17327.8076,19026.595,0.1915,0.1742,-25.0629
SD,NaT,5.438,3.6218,13380.8025,13236.1542,0.156,0.1213,23.0838


Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=-1)]: Done  30 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    4.6s finished


Tuned Prophet model parameters:
ProphetPeriodPatched(changepoint_prior_scale=0.013367098097447634,
                     holidays_prior_scale=0.9207970566887815,
                     seasonality_prior_scale=0.048393796358465295)


In [31]:
plot_model(tuned_prophet, plot='forecast')

print(pull().to_markdown(index=False))


| Model   |    MASE |   RMSSE |     MAE |    RMSE |   MAPE |   SMAPE |       R2 |
|:--------|--------:|--------:|--------:|--------:|-------:|--------:|---------:|
| Prophet | 17.2175 | 11.6892 | 42868.2 | 43149.7 | 0.5147 |   0.408 | -290.345 |


In [26]:
print("\n--- Finalizing Model and Making Predictions ---")
final_prophet = finalize_model(tuned_prophet)

# Generate predictions for the test set
predictions = predict_model(final_prophet)
print("Prediction results (first 5 rows):")
display(predictions.head())

plot_model(final_prophet, plot='forecast')


--- Finalizing Model and Making Predictions ---
Prediction results (first 5 rows):


Unnamed: 0,y_pred
2025-04-19,98325.437
2025-04-20,98163.9654
2025-04-21,98188.4027
2025-04-22,98012.1829
2025-04-23,98355.7024


In [27]:
# from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# print("\n--- Model Test Scoring ---")
# # Calculate evaluation metrics

# # y_true = btc_test
# # y_pred = predictions
# mae = mean_absolute_error(btc_train[-30:], predictions)
# mse = mean_squared_error(btc_train[-30:], predictions)
# r2 = r2_score(btc_train[-30:], predictions)

# # btc_train_last_30 = btc_train[-30:]
# # print(btc_train_last_30.index)
# # print(predictions.index)

# print(f"Mean squared error: {mse:.2f}")
# print(f"Mean absolute error: {mae:.2f}")
# print(f"R-squared score : {r2:.2f}") 

In [28]:
# print("\n--- Plot against valdation set ---")

# import plotly.express as px

# # create a dataframe for visualisation
# btc_vis = pd.DataFrame(
#     {
#         "date" : btc_test[-30:].index,
#         "actual" : btc_test[-30:].values,
#         "predicted" : predictions.values.flatten(),
#     }
# )

# # print(len(btc_test[-30:].index))

# # print(len(btc_test[-30:].values))
# # print(len(predictions.values.flatten()))
# # melt the dataframe for easier handling of multiple series for pyplot
# btc_vis = btc_vis.melt(id_vars=['date'], var_name="type", value_name='close')

# # plot validation set with predictions
# fig = px.line(
#     btc_vis,
#     x='date',
#     y='close',
#     color='type',
#     title="Prophet Forecast vs Actual",
#     labels=dict(close='close price', date='date')
# )

# fig.show()

In [29]:
# from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
# print("\n--- Model Evaluation ---")
# # Calculate evaluation metrics

# # y_true = btc_test
# # y_pred = predictions
# mae = mean_absolute_error(btc_test,predictions)
# mse = mean_squared_error(btc_test, predictions)
# r2 = r2_score(btc_test, predictions)

# print(f"Mean squared error: {mse:.2f}")
# print(f"Mean absolute error: {mae:.2f}")
# print(f"R-squared score : {r2:.2f}") 

In [30]:
from IPython.display import Markdown, display

display(Markdown(s.check_stats().to_markdown()))


|    | Test         | Test Name   | Data        | Property            | Setting                  |           Value |
|---:|:-------------|:------------|:------------|:--------------------|:-------------------------|----------------:|
|  0 | Summary      | Statistics  | Transformed | Length              |                          |  1825           |
|  1 | Summary      | Statistics  | Transformed | # Missing Values    |                          |     0           |
|  2 | Summary      | Statistics  | Transformed | Mean                |                          |    10.4473      |
|  3 | Summary      | Statistics  | Transformed | Median              |                          |    10.5268      |
|  4 | Summary      | Statistics  | Transformed | Standard Deviation  |                          |     0.632688    |
|  5 | Summary      | Statistics  | Transformed | Variance            |                          |     0.400294    |
|  6 | Summary      | Statistics  | Transformed | Kurtosis            |                          |    -0.535463    |
|  7 | Summary      | Statistics  | Transformed | Skewness            |                          |    -0.401146    |
|  8 | Summary      | Statistics  | Transformed | # Distinct Values   |                          |  1825           |
|  9 | White Noise  | Ljung-Box   | Transformed | Test Statictic      | {'alpha': 0.05, 'K': 24} | 40517           |
| 10 | White Noise  | Ljung-Box   | Transformed | Test Statictic      | {'alpha': 0.05, 'K': 48} | 74866.6         |
| 11 | White Noise  | Ljung-Box   | Transformed | p-value             | {'alpha': 0.05, 'K': 24} |     0           |
| 12 | White Noise  | Ljung-Box   | Transformed | p-value             | {'alpha': 0.05, 'K': 48} |     0           |
| 13 | White Noise  | Ljung-Box   | Transformed | White Noise         | {'alpha': 0.05, 'K': 24} |     0           |
| 14 | White Noise  | Ljung-Box   | Transformed | White Noise         | {'alpha': 0.05, 'K': 48} |     0           |
| 15 | Stationarity | ADF         | Transformed | Stationarity        | {'alpha': 0.05}          |     0           |
| 16 | Stationarity | ADF         | Transformed | p-value             | {'alpha': 0.05}          |     0.411878    |
| 17 | Stationarity | ADF         | Transformed | Test Statistic      | {'alpha': 0.05}          |    -1.73755     |
| 18 | Stationarity | ADF         | Transformed | Critical Value 1%   | {'alpha': 0.05}          |    -3.43396     |
| 19 | Stationarity | ADF         | Transformed | Critical Value 5%   | {'alpha': 0.05}          |    -2.86313     |
| 20 | Stationarity | ADF         | Transformed | Critical Value 10%  | {'alpha': 0.05}          |    -2.56762     |
| 21 | Stationarity | KPSS        | Transformed | Trend Stationarity  | {'alpha': 0.05}          |     0           |
| 22 | Stationarity | KPSS        | Transformed | p-value             | {'alpha': 0.05}          |     0.01        |
| 23 | Stationarity | KPSS        | Transformed | Test Statistic      | {'alpha': 0.05}          |     0.634062    |
| 24 | Stationarity | KPSS        | Transformed | Critical Value 10%  | {'alpha': 0.05}          |     0.119       |
| 25 | Stationarity | KPSS        | Transformed | Critical Value 5%   | {'alpha': 0.05}          |     0.146       |
| 26 | Stationarity | KPSS        | Transformed | Critical Value 2.5% | {'alpha': 0.05}          |     0.176       |
| 27 | Stationarity | KPSS        | Transformed | Critical Value 1%   | {'alpha': 0.05}          |     0.216       |
| 28 | Normality    | Shapiro     | Transformed | Normality           | {'alpha': 0.05}          |     0           |
| 29 | Normality    | Shapiro     | Transformed | p-value             | {'alpha': 0.05}          |     3.91754e-19 |