In [None]:

#Importing neccessary packages

import numpy               # linear algebra, vectors and matrices
import pandas              # tables and data manipulations-data processing-CSV file I/O (e.g. pd.read_csv)
import warnings            # off warning mode
warnings.filterwarnings('ignore')

In [None]:

#Loading dataset of Chrysler-Canada and spliting data into train & validation data

from pandas import read_csv
Chry=read_csv("CHRYSLER_SALES.csv", header=0, index_col=0)
split_point = len(Chry) - 12
dataset, validation = Chry[0:split_point], Chry[split_point:]
print('Dataset %d, Validation %d' % (len(dataset), len(validation)))
dataset.to_csv('dataset.csv')
validation.to_csv('validation.csv')
dataset.to_csv('dataset.csv', index=False)
validation.to_csv('validation.csv', index=False)
Chry.head()

In [12]:
#Autoregressive Integrated Moving Average Model(Model Evaluation)


from sklearn.metrics import mean_squared_error
from math import sqrt

# load data
Chry= read_csv('dataset.csv')

# prepare data
X = Chry.values
X = X.astype('float32')
train_size = int(len(X) * 0.50)
train, test = X[0:train_size], X[train_size:]

# walk-forward validation
history = [x for x in train]
predictions = list()
for i in range(len(test)):
    # predict
    yhat = history[-1]
    predictions.append(yhat)
    # observation
    obs = test[i]
    history.append(obs)
    print('>Predicted=%.3f, Expected=%3.f' % (yhat, obs))

# report performance
mse = mean_squared_error(test, predictions)
rmse = sqrt(mse)
print('RMSE: %.3f' % rmse)

>Predicted=16529.000, Expected=15427
>Predicted=15427.000, Expected=22497
>Predicted=22497.000, Expected=23559
>Predicted=23559.000, Expected=21244
>Predicted=21244.000, Expected=18577
>Predicted=18577.000, Expected=23974
>Predicted=23974.000, Expected=17604
>Predicted=17604.000, Expected=54016
>Predicted=54016.000, Expected=34726
>Predicted=34726.000, Expected=31879
>Predicted=31879.000, Expected=29674
>Predicted=29674.000, Expected=26863
>Predicted=26863.000, Expected=20792
>Predicted=20792.000, Expected=28070
>Predicted=28070.000, Expected=24850
>Predicted=24850.000, Expected=22222
>Predicted=22222.000, Expected=18766
>Predicted=18766.000, Expected=25513
>Predicted=25513.000, Expected=20696
>Predicted=20696.000, Expected=25083
>Predicted=25083.000, Expected=33905
>Predicted=33905.000, Expected=27836
>Predicted=27836.000, Expected=29070
>Predicted=29070.000, Expected=27249
>Predicted=27249.000, Expected=19978
>Predicted=19978.000, Expected=28678
>Predicted=28678.000, Expected=25251
>

In [13]:
#Summary statistics

Chry= read_csv('dataset.csv')
print(Chry.describe())

             SALES
count    156.00000
mean   28329.75000
std    13787.61649
min     9046.00000
25%    18191.00000
50%    24151.00000
75%    34436.75000
max    65594.00000


In [17]:
# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]


In [None]:
#Evaluating Performance of ARIMA (Grid Search hyper parameters)

from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt

# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return numpy.array(diff)

# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

# evaluate an ARIMA model for a given order (p,d,q) and return RMSE
def evaluate_arima_model(X, arima_order):
    # prepare training dataset
    X = X.astype('float32')
    train_size = int(len(X) * 0.50)
    train, test = X[0:train_size], X[train_size:]
    history = [x for x in train]
    # make predictions
    predictions = list()
    for t in range(len(test)):
        # difference data
        months_in_year = 12
        diff = difference(history, months_in_year)
        model = ARIMA(diff, order=arima_order)
        model_fit = model.fit(trend='nc', disp=0)
        yhat = model_fit.forecast()[0]
        yhat = inverse_difference(history, yhat, months_in_year)
        predictions.append(yhat)
        history.append(test[t])
    # calculate out of sample error
    mse = mean_squared_error(test, predictions)
    rmse = sqrt(mse)
    return rmse

# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
    dataset = dataset.astype('float32')
    best_score, best_cfg = float("inf"), None
    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p,d,q)
                try:
                    mse = evaluate_arima_model(dataset, order)
                    if mse < best_score:
                        best_score, best_cfg = mse, order
                    print('ARIMA%s RMSE=%.3f' % (order,mse))
                except:
                    continue
    print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

# load dataset
Chry = read_csv('dataset.csv')
# evaluate parameters
p_values = range(0, 7)
d_values = range(0, 3)
q_values = range(0, 7)
warnings.filterwarnings("ignore")
evaluate_models(Chry.values, p_values, d_values, q_values)

In [None]:

#ARIMA (Residual error review)

# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return diff

# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

# load data
series = read_csv('dataset.csv')
# prepare data
X = series.values
X = X.astype('float32')
train_size = int(len(X) * 0.50)
train, test = X[0:train_size], X[train_size:]
# walk-forward validation
history = [x for x in train]
predictions = list()
for i in range(len(test)):
    # difference data
    months_in_year = 12
    diff = difference(history, months_in_year)
    # predict
    model = ARIMA(diff, order=(0,0,1))
    model_fit = model.fit(trend='nc', disp=0)
    yhat = model_fit.forecast()[0]
    yhat = inverse_difference(history, yhat, months_in_year)
    predictions.append(yhat)
    # observation
    obs = test[i]
    history.append(obs)
# errors
residuals = [test[i]-predictions[i] for i in range(len(test))]
residuals = DataFrame(residuals)
print(residuals.describe())
# plot
pyplot.figure()
pyplot.subplot(211)
residuals.hist(ax=pyplot.gca())
pyplot.subplot(212)
residuals.plot(kind='kde', ax=pyplot.gca())
pyplot.show()