In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
from statsmodels.graphics.tsaplots import plot_acf
from pandas import DataFrame
from pandas import Grouper
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
# data imported
data=pd.read_csv('Book3.csv',header=0, index_col=0, parse_dates=True)

# EDA & Preprocessing

In [None]:
#shape
data.shape

In [None]:
data.head()

# Upsampling data Yearly to Monthly

In [None]:
upsampled = data.resample('M').mean()
print(upsampled.head(24))

In [None]:
# interplote null value
interpolated = upsampled.interpolate(method='linear')
print(interpolated.head(24))


In [None]:
# export new montly data file
interpolated.to_csv('Inflation Rate  (montly).csv',index=True)

In [None]:
interpolated.describe()

In [None]:
# checking null values
interpolated.isnull().sum()

In [None]:
# getting information
interpolated.info()

In [None]:
# line plot
plt.figure(figsize=(18,5))
plt.plot(interpolated['inflation_rate'], color = 'blue', linewidth=3)
plt.xlabel('Year')
plt.ylabel('inflation rate')
plt.title ("Line Plot of Inflation Rate")
plt.show()

In [None]:
# histplot
sns.histplot(data=interpolated, x="inflation_rate", color="lime")
plt.show()

In [None]:
# Distplot
interpolated.inflation_rate.plot(kind='kde')
plt.show()

In [None]:
# boxplot
sns.boxplot(data=interpolated['inflation_rate'],color='yellow')
plt.show()

In [None]:
# seasonal decompose graph
plt.figure(figsize=(18,5))
decompose_ts_add = seasonal_decompose(interpolated.inflation_rate,period=12)
decompose_ts_add.plot()
plt.show()

In [None]:
# 1step lag graph
lag_plot(interpolated.inflation_rate)
plt.show()

In [None]:
# auto corelation plot
plot_acf(interpolated.inflation_rate,lags=30)
plt.show()

In [None]:
data=pd.read_csv('Inflation Rate  (montly).csv',header=0)
data

In [None]:
# creating some features for model based approach
data['t']=np.arange(1,(len(data)+1))
data['t_square']=data['t']*data['t']
data['log_rate']=np.log(data['inflation_rate'])
data['year_'] = pd.to_datetime(data['year']).dt.year
data['month'] = pd.to_datetime(data['year']).dt.month_name()


In [None]:
data.head()

In [None]:
# heat map of month and year
plt.figure(figsize=(17,22))
heatmap_y_month = pd.pivot_table(data=data,values="inflation_rate",index="year_",columns="month",aggfunc="mean",fill_value=0)
sns.heatmap(heatmap_y_month,annot=True,fmt="g") 

In [None]:
df=pd.get_dummies(data.month)

In [None]:
data_1=data.join(df)

In [None]:
data_1

# MODEL BASED METHODS

# LINEAR MODEL

In [None]:
Train = data_1.head(345)
Test = data_1.tail(148)

In [None]:
def RMSE(org, pred):
    rmse=np.sqrt(np.mean((np.array(org)-np.array(pred))**2))
    return rmse

In [None]:
import statsmodels.formula.api as smf 

linear_model = smf.ols('inflation_rate~t',data=Train).fit()
pred_linear =  pd.Series(linear_model.predict(pd.DataFrame(Test['t'])))
rmse_linear_model = RMSE(Test['inflation_rate'], pred_linear)
rmse_linear_model

# Exponential Model

In [None]:
Exp = smf.ols('log_rate~t',data=Train).fit()
pred_Exp = pd.Series(Exp.predict(pd.DataFrame(Test['t'])))
rmse_exp = RMSE(Test['inflation_rate'], np.exp(pred_Exp))
rmse_exp

# Quadratic Model

In [None]:
Quad = smf.ols('inflation_rate~t+t_square',data=Train).fit()
pred_Quad = pd.Series(Quad.predict(Test[["t","t_square"]]))
rmse_quad_model = RMSE(Test['inflation_rate'], pred_Quad)
rmse_quad_model

# Additive Seasonality model

In [None]:
add_sea = smf.ols('inflation_rate~January+February+March+April+May+June+July+August+September+October+November',data=Train).fit()
pred_add_sea = pd.Series(add_sea.predict(Test[['January','February','March','April','May','June','July','August','September','October','November']]))
rmse_add_sea = RMSE(Test['inflation_rate'], pred_add_sea)
rmse_add_sea

# Additive Seasonality Quadratic model

In [None]:
add_sea_Quad = smf.ols('inflation_rate~t+t_square+January+February+March+April+May+June+July+August+September+October+November',data=Train).fit()
pred_add_sea_quad = pd.Series(add_sea_Quad.predict(Test[['January','February','March','April','May','June','July','August','September','October','November','t','t_square']]))
rmse_add_sea_quad = RMSE(Test['inflation_rate'], pred_add_sea_quad)
rmse_add_sea_quad    

# Multiplicative Seasonality model

In [None]:
Mul_sea = smf.ols('log_rate~January+February+March+April+May+June+July+August+September+October+November',data=Train).fit()
pred_Mult_sea = pd.Series(Mul_sea.predict(Test))
rmse_Mul_sea = RMSE(Test['inflation_rate'], np.exp(pred_Mult_sea))
rmse_Mul_sea

# Multiplicative Addictive Seasonality Model

In [None]:
Mul_Add_sea = smf.ols('log_rate~t+January+February+March+April+May+June+July+August+September+October+November',data = Train).fit()
pred_Mul_add_sea = pd.Series(Mul_Add_sea.predict(Test))
rmse_Mul_add_sea = RMSE(Test['inflation_rate'], np.exp(pred_Mul_add_sea))
rmse_Mul_add_sea

# DATA DRIVEN METHODS

In [None]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing # SES
from statsmodels.tsa.holtwinters import Holt # Holts Exponential Smoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing 

# Simple Exponential Method

In [None]:
ses_model = SimpleExpSmoothing(Train["inflation_rate"]).fit(smoothing_level=0.2)
pred_ses = ses_model.predict(start = Test.index[0],end = Test.index[-1])
rmse_ses = RMSE(Test.inflation_rate, pred_ses)
rmse_ses

# Holt Method

In [None]:
hw_model = Holt(Train["inflation_rate"]).fit()
pred_hw = hw_model.predict(start = Test.index[0],end = Test.index[-1])
rmse_hw = RMSE(Test.inflation_rate, pred_hw)
rmse_hw

# Holts winter exponential smoothing with additive seasonality and additive trend

In [None]:
hwe_model_add_add = ExponentialSmoothing(Train["inflation_rate"],seasonal="add",trend="add",seasonal_periods=12).fit()
pred_hwe_add_add = hwe_model_add_add.predict(start = Test.index[0],end = Test.index[-1])
rmse_hwe_add_add = RMSE(Test.inflation_rate, pred_hwe_add_add)
rmse_hwe_add_add

# Holts winter exponential smoothing with additive seasonality and additive trend

In [None]:
hwe_model_mul_add = ExponentialSmoothing(Train["inflation_rate"],seasonal="mul",trend="add",seasonal_periods=12).fit() 
pred_hwe_mul_add = hwe_model_mul_add.predict(start = Test.index[0],end = Test.index[-1])
rmse_hwe_mul_add = RMSE(Test.inflation_rate, pred_hwe_mul_add)
rmse_hwe_mul_add

# P D Q ORDER FOR ARIMA MODEL

In [151]:
# grid search ARIMA parameters for time series
import warnings
from math import sqrt
from pandas import read_csv
from pandas import datetime
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
 
# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    train_size = int(len(X) * 0.66)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=arima_order)
        model_fit = model.fit()
        yhat = model_fit.forecast()[0]
        predictions.append(yhat)
        history.append(test[t])
    # calculate out of sample error
    rmse = sqrt(mean_squared_error(test, predictions))
    return rmse
 
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    rmse = evaluate_arima_model(dataset, order)
                    if rmse < best_score:
                        best_score, best_cfg = rmse, order
                    print('ARIMA%s RMSE=%.3f' % (order,rmse))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))
 
train = read_csv('dataset.csv', header=0, index_col=0, parse_dates=True, squeeze=True,)
# evaluate parameters
p_values = [0, 1, 2, 4, 6, 8, 10]
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(train.values, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=3.694
ARIMA(0, 0, 1) RMSE=1.853
ARIMA(0, 0, 2) RMSE=0.993
ARIMA(0, 1, 0) RMSE=0.145
ARIMA(0, 1, 1) RMSE=0.091
ARIMA(0, 1, 2) RMSE=0.075
ARIMA(0, 2, 0) RMSE=0.060
ARIMA(0, 2, 1) RMSE=0.060
ARIMA(0, 2, 2) RMSE=0.060
ARIMA(1, 0, 0) RMSE=0.147
ARIMA(1, 0, 1) RMSE=0.093
ARIMA(1, 0, 2) RMSE=0.078
ARIMA(1, 1, 0) RMSE=0.056
ARIMA(1, 1, 1) RMSE=0.056
ARIMA(1, 1, 2) RMSE=0.056
ARIMA(1, 2, 0) RMSE=0.060
ARIMA(1, 2, 1) RMSE=0.060
ARIMA(1, 2, 2) RMSE=0.057
ARIMA(2, 0, 0) RMSE=0.073
ARIMA(2, 0, 1) RMSE=0.073
ARIMA(2, 0, 2) RMSE=0.074
ARIMA(2, 1, 0) RMSE=0.056
ARIMA(2, 1, 1) RMSE=0.065
ARIMA(2, 1, 2) RMSE=0.066
ARIMA(2, 2, 0) RMSE=0.060
ARIMA(2, 2, 1) RMSE=0.060
ARIMA(2, 2, 2) RMSE=0.059
ARIMA(4, 0, 0) RMSE=0.073
ARIMA(4, 0, 1) RMSE=0.074
ARIMA(4, 0, 2) RMSE=0.074
ARIMA(4, 1, 0) RMSE=0.057
ARIMA(4, 1, 1) RMSE=0.065
ARIMA(4, 1, 2) RMSE=0.066
ARIMA(4, 2, 0) RMSE=0.060
ARIMA(4, 2, 1) RMSE=0.060
ARIMA(4, 2, 2) RMSE=0.060
ARIMA(6, 0, 0) RMSE=0.073
ARIMA(6, 0, 1) RMSE=0.073
ARIMA(6, 0, 

# RMSE SCORES FOR ALL MODELS

In [None]:
Arima_rmse=3.053524040082059
rmse_prophet_model=2.8906310978564043

In [47]:
list = [['Simple Exponential Method',rmse_ses], ['Holt method',rmse_hw],
          ['HW exp smoothing add',rmse_hwe_add_add],['HW exp smoothing mult',rmse_hwe_mul_add],
          ['Linear ModeL',rmse_linear_model],['Exp model',rmse_exp],['Quad model',rmse_quad_model],
          ['add seasonality',rmse_add_sea],['Quad add seasonality',rmse_add_sea_quad],
          ['Mult Seasonality',rmse_Mul_sea],['Mult add seasonality',rmse_Mul_add_sea],['ARIMA_MODEL',Arima_rmse],['FB_PROPHET',rmse_prophet_model]]

In [48]:
df = pd.DataFrame(list, columns =['Model', 'RMSE_Value']) 
df.sort_values(['RMSE_Value'])

Unnamed: 0,Model,RMSE_Value
9,Mult Seasonality,2.804364
12,FB_PROPHET,2.890631
7,add seasonality,2.935313
11,ARIMA_MODEL,3.053524
6,Quad model,3.268556
8,Quad add seasonality,3.279261
4,Linear ModeL,3.293238
5,Exp model,3.368274
10,Mult add seasonality,3.373431
0,Simple Exponential Method,4.288232
