In [7]:
import pandas as pd
from pycaret.time_series import TSForecastingExperiment

### Load and preprocess data

In [8]:
def loadAndPreprocessData(filePath):
    data = pd.read_csv(filePath)
    data['Date_Time'] = pd.to_datetime(data['Date_Time'])
    data['Date'] = data['Date_Time'].dt.date
    dailyData = data.groupby('Date').agg({'Humidity_pct': 'mean'}).reset_index()
    dailyData['Date'] = pd.to_datetime(dailyData['Date'])
    dailyData = dailyData.set_index('Date').asfreq('D')
    dailyData = dailyData.interpolate(method='time')
    return dailyData

### Splitting the data into training & testing 

In [9]:
def splitData(data, splitRatio=0.8):
    splitIndex = int(len(data) * splitRatio)
    trainData = data.iloc[:splitIndex].copy()
    testData = data.iloc[splitIndex:].copy()
    print(f"Training set shape: {trainData.shape}")
    print(f"Test set shape: {testData.shape}")
    return trainData, testData

### Pycaret Setup

In [10]:
def setupExperiment(trainData, targetColumn, sessionId=123):
    experiment = TSForecastingExperiment()
    experiment.setup(
        data=trainData,
        target=targetColumn,
        numeric_imputation_target='mean',
        session_id=sessionId
    )
    return experiment

### Comparing models & Extrcting best model

In [11]:
def trainBestModel(experiment):
    bestModel = experiment.compare_models(sort='MAE')
    return bestModel

In [12]:
dailyData = loadAndPreprocessData('weather_data.csv')

trainData, testData = splitData(dailyData)

exp = setupExperiment(trainData, targetColumn='Humidity_pct')

bestModel = trainBestModel(exp)


Training set shape: (111, 1)
Test set shape: (28, 1)


Unnamed: 0,Description,Value
0,session_id,123
1,Target,Humidity_pct
2,Approach,Univariate
3,Exogenous Variables,Not Present
4,Original data shape,"(111, 1)"
5,Transformed data shape,"(111, 1)"
6,Transformed train set shape,"(110, 1)"
7,Transformed test set shape,"(1, 1)"
8,Rows with missing values,0.0%
9,Fold Generator,ExpandingWindowSplitter


Unnamed: 0,Model,MASE,RMSSE,MAE,RMSE,MAPE,SMAPE,TT (Sec)
grand_means,Grand Means Forecaster,0.4235,0.3276,0.0881,0.0881,0.0015,0.0015,1.1733
auto_arima,Auto ARIMA,0.4235,0.3276,0.0881,0.0881,0.0015,0.0015,0.3133
ada_cds_dt,AdaBoost w/ Cond. Deseasonalize & Detrending,0.4254,0.3291,0.0886,0.0886,0.0015,0.0015,0.07
arima,ARIMA,0.4285,0.3315,0.0891,0.0891,0.0015,0.0015,0.0867
lightgbm_cds_dt,Light Gradient Boosting w/ Cond. Deseasonalize & Detrending,0.4332,0.3351,0.0901,0.0901,0.0015,0.0015,0.12
polytrend,Polynomial Trend Forecaster,0.4657,0.3604,0.0969,0.0969,0.0016,0.0016,1.4667
en_cds_dt,Elastic Net w/ Cond. Deseasonalize & Detrending,0.4668,0.3612,0.0971,0.0971,0.0016,0.0016,0.2
lasso_cds_dt,Lasso w/ Cond. Deseasonalize & Detrending,0.4668,0.3612,0.0971,0.0971,0.0016,0.0016,0.0533
llar_cds_dt,Lasso Least Angular Regressor w/ Cond. Deseasonalize & Detrending,0.4668,0.3612,0.0971,0.0971,0.0016,0.0016,0.0533
br_cds_dt,Bayesian Ridge w/ Cond. Deseasonalize & Detrending,0.4682,0.3623,0.0974,0.0974,0.0016,0.0016,0.0567
