In [1]:
%pip install "pycaret[full]"

Note: you may need to restart the kernel to use updated packages.


In [15]:
# read csv file
import pandas as pd
data = pd.read_csv('AirPassengers.csv')
data['Date'] = pd.to_datetime(data['Date'])
data.head()

Unnamed: 0,Date,Passengers
0,1949-01-01,112
1,1949-02-01,118
2,1949-03-01,132
3,1949-04-01,129
4,1949-05-01,121


In [16]:
data['MA12'] = data['Passengers'].rolling(12).mean()

import plotly.express as px
fig = px.line(data, x="Date", y=["Passengers", "MA12"], template= 'plotly_dark')
fig.show()

In [17]:
import numpy as np
data['Month'] = [i.month for i in data['Date']]
data['Year'] = [i.year for i in data['Date']]

data['Series'] = np.arange(1,len(data)+1)

data.drop(['Date', 'MA12'], axis=1, inplace=True)
data = data[['Series', 'Year', 'Month', 'Passengers']]

data.head()


Unnamed: 0,Series,Year,Month,Passengers
0,1,1949,1,112
1,2,1949,2,118
2,3,1949,3,132
3,4,1949,4,129
4,5,1949,5,121


In [18]:
train = data[data['Year'] < 1960]
test = data[data['Year'] >= 1960]

train.shape, test.shape


((132, 4), (12, 4))

In [20]:
# import the regression module**
from pycaret.regression import *

# initialize setup**
s = setup(data = train, test_data = test, target = 'Passengers', fold_strategy = 'timeseries', numeric_features = ['Year', 'Series'], fold = 3 , transform_target = True, session_id = 123, data_split_shuffle = False)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Passengers
2,Target type,Regression
3,Original data shape,"(144, 4)"
4,Transformed data shape,"(144, 4)"
5,Transformed train set shape,"(132, 4)"
6,Transformed test set shape,"(12, 4)"
7,Numeric features,2
8,Preprocess,True
9,Imputation type,simple


In [21]:
best = compare_models(sort = 'MAE')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
omp,Orthogonal Matching Pursuit,44.3192,3050.6952,53.8852,-0.4522,0.1714,0.1521,0.0167
br,Bayesian Ridge,45.5643,3233.7133,55.6766,-0.6553,0.1765,0.1576,0.0167
ridge,Ridge Regression,45.7835,3257.2206,55.8542,-0.6687,0.177,0.1585,0.7067
lar,Least Angle Regression,45.8053,3260.6009,55.8841,-0.6721,0.1771,0.1586,0.7067
lr,Linear Regression,45.8199,3263.7413,55.9068,-0.6728,0.1772,0.1586,0.7933
huber,Huber Regressor,46.042,3308.9717,56.46,-0.8186,0.1802,0.1618,0.02
lasso,Lasso Regression,51.7106,3939.279,61.4955,-1.4896,0.2374,0.183,0.7
llar,Lasso Least Angle Regression,51.7106,3939.279,61.4955,-1.4896,0.2374,0.183,0.5233
gbr,Gradient Boosting Regressor,52.4896,4355.4048,63.7434,-0.722,0.2142,0.1618,0.03
et,Extra Trees Regressor,52.5137,4610.3152,65.417,-0.7951,0.2162,0.1613,0.06


In [22]:
prediction_holdout = predict_model(best)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Orthogonal Matching Pursuit,64.0721,5685.5261,75.4024,-0.0264,0.1544,0.1373


In [24]:
# generate predictions on the original dataset**
predictions = predict_model(best, data=data)

# # add a date column in the dataset**
predictions['Date'] = pd.date_range(start='1949-01-01', end = '1960-12-01', freq = 'MS')


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Orthogonal Matching Pursuit,33.281,2048.6856,45.2624,0.8567,0.1356,0.1126


In [27]:
predictions

Unnamed: 0,Series,Year,Month,Passengers,prediction_label,Date
0,1,1949,1,112,120.962681,1949-01-01
1,2,1949,2,118,122.327867,1949-02-01
2,3,1949,3,132,123.706393,1949-03-01
3,4,1949,4,129,125.098368,1949-04-01
4,5,1949,5,121,126.503906,1949-05-01
...,...,...,...,...,...,...
139,140,1960,8,606,500.844927,1960-08-01
140,141,1960,9,508,505.542352,1960-09-01
141,142,1960,10,461,510.278166,1960-10-01
142,143,1960,11,390,515.052638,1960-11-01


In [23]:
# generate predictions on the original dataset**
predictions = predict_model(best, data=data)

# # add a date column in the dataset**
predictions['Date'] = pd.date_range(start='1949-01-01', end = '1960-12-01', freq = 'MS')

# line plot**
fig = px.line(predictions, x='Date', y=["Passengers", "prediction_label"], template = 'plotly_dark')

# add a vertical rectange for test-set separation**
fig.add_vrect(x0="1960-01-01", x1="1960-12-01", fillcolor="grey", opacity=0.25, line_width=0)
fig.show()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Orthogonal Matching Pursuit,33.281,2048.6856,45.2624,0.8567,0.1356,0.1126


ValueError: All arguments should have the same length. The length of argument `y` is 2, whereas the length of  previously-processed arguments ['Date'] is 144

In [14]:
fig = px.line(predictions, x='Date', y=["Passengers"], template = 'plotly_dark')


ValueError: All arguments should have the same length. The length of argument `y` is 1, whereas the length of  previously-processed arguments ['Date'] is 144

In [13]:
fig.add_vrect(x0="1960-01-01", x1="1960-12-01", fillcolor="grey", opacity=0.25, line_width=0)
fig.show()