## Setup

In [None]:
import os
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error , mean_squared_error , mean_absolute_percentage_error

## The weather dataset

In [None]:
zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True)
csv_path, _ = os.path.splitext(zip_path)

# We will just deal with hourly predictions, so we'll sub-sampling the data from 10-minute intervals to one-hour intervals:

df = pd.read_csv(csv_path)
# Slice [start:stop:step], starting from index 5 take every 6th record.
df = df[5::6]
date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S')
temperature_series = df['T (degC)']

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip


In [None]:
df.head()

Unnamed: 0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
5,996.5,-8.05,265.38,-8.78,94.4,3.33,3.14,0.19,1.96,3.15,1307.86,0.21,0.63,192.7
11,996.62,-8.88,264.54,-9.77,93.2,3.12,2.9,0.21,1.81,2.91,1312.25,0.25,0.63,190.3
17,996.84,-8.81,264.59,-9.66,93.5,3.13,2.93,0.2,1.83,2.94,1312.18,0.18,0.63,167.2
23,996.99,-9.05,264.34,-10.02,92.6,3.07,2.85,0.23,1.78,2.85,1313.61,0.1,0.38,240.0
29,997.46,-9.63,263.72,-10.65,92.2,2.94,2.71,0.23,1.69,2.71,1317.19,0.4,0.88,157.0


## Splitting of Data in 70:20:10 for ( training , validation , testing ) sets

In [None]:
n = len(temperature_series)
# Split the data into training and testing sets

train , val , test = temperature_series[:int(n * (70/100))], temperature_series[int(n * (70/100)):int(n * (90/100))], temperature_series[int(n * (90/100)):]

## Normalize the data

In [None]:
train_mean = train.mean()
train_std = train.std()

train = (train - train_mean) / train_std
val = (val - train_mean) / train_std
test = (test - train_mean) / train_std
train.size

49063

In [None]:
# Build and fit the ARIMA model to the training data
order = (5, 1, 0)  # You can experiment with different values for p, d, and q
arima = ARIMA(train, order=order)
model_fit = arima.fit()

In [None]:
# Make predictions on the test set
predictions = model_fit.forecast(steps=len(test))

In [None]:
# Evaluate the model's performance
mse = mean_squared_error(test, predictions)
mape = mean_absolute_percentage_error(test,predictions)
mae = mean_absolute_error(test,predictions);
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Percentage Error (MAPE):", mape)
print("Mean Absolute Error (MAE):", mae)

Mean Squared Error (MSE): 0.9091186799835864
Mean Absolute Percentage Error (MAPE): 5.035816260770443
Mean Absolute Error (MAE): 0.7873714073601915
