In [None]:
# Importieren der wichtigen packages
import csv
import warnings
import itertools
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 10, 6
plt.style.use('fivethirtyeight')

In [None]:
# Einlesen und Laden der Excel Datei


path = "C:\\Users\\JulianKoller\\Desktop\\Passengers.csv"
dataset = pd.read_csv(path)

# Umwandeln der Zeit in Datumsformat

dataset['Month'] = pd.to_datetime(dataset['Month'],infer_datetime_format=True) #convert from string to datetime
indexedDataset = dataset.set_index(['Month'])
indexedDataset.head(5)

In [None]:
# Aufspalten des Datensets in Training- und Testdaten
dataset_train = indexedDataset.loc[:'1956-01']
dataset_test = indexedDataset.loc['1956-01':]

# Erstellen einer Achse
fig, ax = plt.subplots()

# Visualisieren des Training- und Testdaten auf der ax-Achse
dataset_train.plot(ax=ax)
dataset_test.plot(ax=ax)
plt.show()

In [None]:
# Importieren der augmentierte dicky-fuller test funktion
from statsmodels.tsa.stattools import adfuller

# Testen der Daten
result = adfuller(dataset['#Passengers'])

# Zeigen der Test-Statistik
print(result[0])

# Zeigen des P-Wert
print(result[1])

In [None]:
# Errechnung der ersten Differenz der Zeitreihe
passengers_stationary = dataset['#Passengers'].diff().dropna()
print(passengers_stationary )
# Testen der Zeitreihe mithilfe des Dicky-Fuller Tests
result = adfuller(passengers_stationary)

#Zeigen der Zeitreihe
fig, ax = plt.subplots()
passengers_stationary.plot(ax=ax)
plt.show()

# Teststatistik und der p-Wert
print('ADF Statistic:', result[0])
print('p-value:', result[1])

In [None]:
# Zweifache Differenzierung der Zeitreihe
passengers_stationary = dataset['#Passengers'].diff().diff().dropna()

# Testen der Zeitreihe mithilfe des Dicky-Fuller Tests
result = adfuller(passengers_stationary)

# Zeigen der Zeitreihe
fig, ax = plt.subplots()
passengers_stationary.plot(ax=ax)
plt.show()

# Teststatistik und der p-Wert
print('ADF Statistic:', result[0])
print('p-value:', result[1])

In [None]:
passengers_stationary = np.log(dataset['#Passengers']/dataset['#Passengers'].shift(1))
passengers_stationary = passengers_stationary.dropna()
result = adfuller(passengers_stationary)

# Zeigen der Zeitreihe
fig, ax = plt.subplots()
passengers_stationary.plot(ax=ax)
plt.show()

# Teststatistik und der p-Wert
print('ADF Statistic:', result[0])
print('p-value:', result[1])


In [None]:
y=dataset['#Passengers']
# Definiere die p, d und q parameter sodass sie jeden Wert zwischen 0 und 2 annehmen k√∂nnen
p = d = q = range(0, 2)

# Erzeuge alle verschiedenen Kombinationen von p, q und q tupeln
pdq = list(itertools.product(p, d, q))

# Erzeuge alle verschiedenen Kombinationen von p, q und q tupeln + Saison
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

In [None]:
warnings.filterwarnings("ignore") # ignoriere warning messages

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)

            results = mod.fit()

            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue

In [None]:
mod = sm.tsa.statespace.SARIMAX(y,
                                order=(1, 1, 1),
                                seasonal_order=(1, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])

In [None]:
# Rechne 48 Schritte in die Zukunft
pred_uc = results.get_forecast(steps=48)

# Lege mir das Konfidenzintervall an
pred_ci = pred_uc.conf_int()

In [None]:
ax = y.plot(label='observed', figsize=(20, 15))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('CO2 Levels')

plt.legend()
plt.show()

In [None]:
#Gebe die ARIMA Prognosewerte aus
print(pred_uc.predicted_mean)