In [None]:
import pandas as pd
from pycaret.time_series import TSForecastingExperiment

### Load and preprocess data

In [13]:
def loadAndPreprocessData(filePath):
    data = pd.read_csv(filePath)
    data['Date_Time'] = pd.to_datetime(data['Date_Time'])
    data['Date'] = data['Date_Time'].dt.date
    dailyData = data.groupby('Date').agg({'Temperature_C': 'mean'}).reset_index()
    dailyData['Date'] = pd.to_datetime(dailyData['Date'])
    dailyData = dailyData.set_index('Date').asfreq('D')
    dailyData = dailyData.interpolate(method='time')
    return dailyData

### Splitting the data into training & testing 

In [14]:
def splitData(data, splitRatio=0.8):
    splitIndex = int(len(data) * splitRatio)
    trainData = data.iloc[:splitIndex].copy()
    testData = data.iloc[splitIndex:].copy()
    print(f"Training set shape: {trainData.shape}")
    print(f"Test set shape: {testData.shape}")
    return trainData, testData

### Pycaret Setup

In [15]:
def setupExperiment(trainData, targetColumn, sessionId=123):
    experiment = TSForecastingExperiment()
    experiment.setup(
        data=trainData,
        target=targetColumn,
        numeric_imputation_target='mean',
        session_id=sessionId
    )
    return experiment

### Comparing models & Extrcting best model

In [16]:
def trainBestModel(experiment):
    bestModel = experiment.compare_models(sort='MAE')
    return bestModel

In [None]:
dailyData = loadAndPreprocessData('weather_data.csv')

trainData, testData = splitData(dailyData)

exp = setupExperiment(trainData, targetColumn='Temperature_C')

bestModel = trainBestModel(exp)


Training set shape: (111, 1)
Test set shape: (28, 1)


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Temperature_C
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(111, 1)"
5,Transformed data shape,"(111, 1)"
6,Transformed train set shape,"(110, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
arima,ARIMA,0.0486,0.04,0.0112,0.0112,0.0008,0.0008,0.19
naive,Naive Forecaster,0.4943,0.4064,0.1142,0.1142,0.0077,0.0076,3.6067
et_cds_dt,Extra Trees w/ Cond. Deseasonalize & Detrending,0.6543,0.5375,0.1505,0.1505,0.0102,0.01,0.11
grand_means,Grand Means Forecaster,0.7575,0.6228,0.1751,0.1751,0.0117,0.0118,2.2067
croston,Croston,0.8001,0.6576,0.1844,0.1844,0.0124,0.0123,0.0333
catboost_cds_dt,CatBoost Regressor w/ Cond. Deseasonalize & Detrending,0.8043,0.6609,0.1852,0.1852,0.0125,0.0124,0.7867
xgboost_cds_dt,Extreme Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.8136,0.6684,0.1871,0.1871,0.0126,0.0125,0.1433
dt_cds_dt,Decision Tree w/ Cond. Deseasonalize & Detrending,0.8251,0.6779,0.1898,0.1898,0.0128,0.0126,0.1733
rf_cds_dt,Random Forest w/ Cond. Deseasonalize & Detrending,0.8343,0.6856,0.192,0.192,0.0129,0.0128,0.13
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.8994,0.7392,0.2073,0.2073,0.0139,0.0138,0.1067
