In [7]:
import pycaret.time_series
from pycaret.time_series import TSForecastingExperiment
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [8]:
weather_data = pd.read_csv('weather_data.csv')
weather_data['Date_Time'] = pd.to_datetime(weather_data['Date_Time'])


In [9]:
split_ratio = 0.8  # 80% training, 20% testing
split_index = int(len(weather_data) * split_ratio)
train = weather_data[:split_index]  
test = weather_data[split_index:]   

print(f"Training set shape: {train.shape}")
print(f"Test set shape: {test.shape}")


Training set shape: (800000, 6)
Test set shape: (200000, 6)


In [27]:
import pandas as pd
from pycaret.time_series import TSForecastingExperiment

# Load and preprocess data
weather_data = pd.read_csv('weather_data.csv')
weather_data['Date_Time'] = pd.to_datetime(weather_data['Date_Time'])

# Extract date only (ignore time), then aggregate data across all locations by date
weather_data['Date'] = weather_data['Date_Time'].dt.date
daily_data = weather_data.groupby('Date').agg({
    'Temperature_C': 'mean'
}).reset_index()

# Convert Date column to datetime and set it as a DatetimeIndex with daily frequency
daily_data['Date'] = pd.to_datetime(daily_data['Date'])
daily_data = daily_data.set_index('Date').asfreq('D')

# Fill missing values by interpolating
daily_data = daily_data.interpolate(method='time')

# Split the data into train and test
split_ratio = 0.8  # 80% training, 20% testing
split_index = int(len(daily_data) * split_ratio)
train = daily_data.iloc[:split_index].copy()
test = daily_data.iloc[split_index:].copy()

print(f"Training set shape: {train.shape}")
print(f"Test set shape: {test.shape}")

# Initialize and set up the PyCaret time series experiment
exp = TSForecastingExperiment()
exp.setup(
    data=train,
    target='Temperature_C',
    numeric_imputation_target='mean',  # Impute target missing values
    session_id=123
)

# Train and compare models
best = exp.compare_models(sort='MAE')

# Define forecast horizon as a list
forecast_horizon = list(range(1, len(test) + 1))

# Forecast for the length of the test set without using exogenous variables
forecast = exp.predict_model(best, fh=forecast_horizon)  # Explicitly set `fh`

print("Forecast:")
print(forecast)


Training set shape: (111, 1)
Test set shape: (28, 1)


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Temperature_C
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(111, 1)"
5,Transformed data shape,"(111, 1)"
6,Transformed train set shape,"(110, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
arima,ARIMA,0.0486,0.04,0.0112,0.0112,0.0008,0.0008,0.14
naive,Naive Forecaster,0.4943,0.4064,0.1142,0.1142,0.0077,0.0076,4.0667
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.6543,0.5375,0.1505,0.1505,0.0102,0.01,0.2133
grand_means,Grand Means Forecaster,0.7575,0.6228,0.1751,0.1751,0.0117,0.0118,2.67
croston,Croston,0.8001,0.6576,0.1844,0.1844,0.0124,0.0123,0.0667
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,0.8043,0.6609,0.1852,0.1852,0.0125,0.0124,1.1433
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8136,0.6684,0.1871,0.1871,0.0126,0.0125,0.4067
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.8251,0.6779,0.1898,0.1898,0.0128,0.0126,0.1633
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.8343,0.6856,0.192,0.192,0.0129,0.0128,0.1967
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.8994,0.7392,0.2073,0.2073,0.0139,0.0138,0.1733


Forecast:
             y_pred
2024-04-20  14.7686
2024-04-21  14.7494
2024-04-22  14.7370
2024-04-23  14.7290
2024-04-24  14.7239
2024-04-25  14.7206
2024-04-26  14.7185
2024-04-27  14.7171
2024-04-28  14.7163
2024-04-29  14.7157
2024-04-30  14.7153
2024-05-01  14.7151
2024-05-02  14.7149
2024-05-03  14.7149
2024-05-04  14.7148
2024-05-05  14.7148
2024-05-06  14.7147
2024-05-07  14.7147
2024-05-08  14.7147
2024-05-09  14.7147
2024-05-10  14.7147
2024-05-11  14.7147
2024-05-12  14.7147
2024-05-13  14.7147
2024-05-14  14.7147
2024-05-15  14.7147
2024-05-16  14.7147
2024-05-17  14.7147


In [None]:
final_model = finalize_model(best_model)

# Forecast future values
future_forecast = predict_model(final_model, fh=30)  # Forecasting for the next 30 days


Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,12.5503,210.0454,14.4929,-0.0003,0.9183,7.2909
1,12.5163,209.3696,14.4696,-0.0,0.9168,8.0271
2,12.5264,209.5865,14.4771,-0.0001,0.9143,10.9525
Mean,12.531,209.6672,14.4799,-0.0001,0.9165,8.7568
Std,0.0142,0.2817,0.0097,0.0001,0.0016,1.5814


Fitting 3 folds for each of 10 candidates, totalling 30 fits


In [4]:
plot_model(final_model, plot='forecast', data_kwargs={'fh': len(test) + 30})

NameError: name 'final_model' is not defined