In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from scipy.stats import boxcox
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import statsmodels.tsa.api as smt

In [None]:
sales_of_company_x = pd.read_csv("Series/monthly-sales-of-company-x-jan-6.csv")
robberies_in_boston = pd.read_csv("Series/monthly-boston-armed-robberies-j.csv")
airlines_passengers = pd.read_csv("Series/international-airline-passengers.csv")
mean_monthly_temp = pd.read_csv("Series/mean-monthly-air-temperature-deg.csv")
dowjones_closing = pd.read_csv("Series/weekly-closings-of-the-dowjones-.csv")
female_births = pd.read_csv("Series/daily-total-female-births-in-cal.csv")

In [None]:
sales_of_company_x

In [None]:
all_series = {
    "Monthly sales of company X": sales_of_company_x["Count"],
    "Monthly Boston armed robberies": robberies_in_boston["Count"],
    "International airline passengers: monthly totals in thousands": airlines_passengers["Count"],
    "Mean monthly air temperature (Deg. F) Nottingham Castle": mean_monthly_temp["Deg"],
    "Weekly closings of the Dow-Jones industrial average": dowjones_closing["Close"],
    "Daily total female births in California": female_births["Count"]
}

In [None]:
all_series

In [None]:
with plt.style.context('bmh'):
    plt.figure(figsize=(16, 12))
    layout = (3, 2)
    for i, key in enumerate(all_series.keys()):
        x = i % 2
        y = int((i - x) / 2)
        
        ts_ax = plt.subplot2grid(layout, (y, x))
        all_series[key].plot(ax=ts_ax, color='blue')
        ts_ax.set_title(key)
        
    plt.tight_layout()

Проверяем ряды на стационарность

Ищем сезонность

In [None]:
def plot_ts_and_points(ts, start_point, step):
    new_series = [None for i in range(len(ts))]
    for i in range(len(ts)):
        pos = start_point + step * i
        if pos >= len(ts):
            break
        new_series[pos] = ts[pos]
    new_series = pd.Series(new_series)
    
    with plt.style.context('bmh'):
        plt.figure(figsize=(16, 8))
        ts_ax = plt.axes()
        ts.plot(ax=ts_ax, color='blue')
        new_series.plot(ax=ts_ax, style='ro')

In [None]:
plot_ts_and_points(sales_of_company_x['Count'], 2, 4)
plot_ts_and_points(robberies_in_boston['Count'], 2, 4)
plot_ts_and_points(airlines_passengers['Count'], 2, 4)
plot_ts_and_points(mean_monthly_temp['Deg'], 2, 4)
plot_ts_and_points(dowjones_closing['Close'], 2, 4)
plot_ts_and_points(female_births['Count'], 2, 4)

Нормализуем дисперсию

In [None]:
series1 = boxcox(all_series["Monthly sales of company X"], 0)
series2 = boxcox(all_series["Monthly Boston armed robberies"], 0)
series3 = boxcox(all_series["International airline passengers: monthly totals in thousands"], 0)
series4 = boxcox(all_series["Mean monthly air temperature (Deg. F) Nottingham Castle"], 0)
series5 = boxcox(all_series["Weekly closings of the Dow-Jones industrial average"], 0)
series6 = boxcox(all_series["Daily total female births in California"], 0)

In [None]:
plt.figure(figsize=(16, 12))
plt.subplot(3,2,1)
plt.plot(series1, color='blue')
plt.title("Monthly sales of company X")
plt.subplot(3,2,2)
plt.plot(series2, color='blue')
plt.title("Monthly Boston armed robberies")
plt.subplot(3,2,3)
plt.plot(series3, color='blue')
plt.title("International airline passengers: monthly totals in thousands")
plt.subplot(3,2,4)
plt.plot(series4, color='blue')
plt.title("Mean monthly air temperature (Deg. F) Nottingham Castle")
plt.subplot(3,2,5)
plt.plot(series5, color='blue')
plt.title("Weekly closings of the Dow-Jones industrial average")
plt.subplot(3,2,6)
plt.plot(series6, color='blue')
plt.title("Daily total female births in California")
plt.tight_layout()

Дифференцируем

In [None]:
series1 = np.diff(series1,1)
series2 = np.diff(series2,1)
series3 = np.diff(series3,1)
series4 = np.diff(series4,1)
series5 = np.diff(series5,1)
series6 = np.diff(series6,1)

In [None]:
with plt.style.context('bmh'):
        plt.figure(figsize=(18, 10))
        plt.subplot(3,2,1)
        plt.plot(series1, color='blue')
        plt.title("Monthly sales of company X")
        plt.subplot(322)
        plt.plot(series2, color='blue')
        plt.title("Monthly Boston armed robberies")
        plt.subplot(323)
        plt.plot(series3, color='blue')
        plt.title("International airline passengers: monthly totals in thousands")
        plt.subplot(324)
        plt.plot(series4, color='blue')
        plt.title("Mean monthly air temperature (Deg. F) Nottingham Castle")
        plt.subplot(325)
        plt.plot(series5, color='blue')
        plt.title("Weekly closings of the Dow-Jones industrial average")
        plt.subplot(326)
        plt.plot(series6, color='blue')
        plt.title("Daily total female births in California")

Тест Дики-Фуллера

In [None]:
def test_stationarity(timeseries):
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
    for [key, value] in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)

1. Согласно тесту Дики-Фуллера первый ряд "Monthly sales of company X" и второй ряд "Monthly Boston armed robberies" стационарны, о чем свидетельствует p-value < 2.5% и значение близкое к нулю соответственно
2. Ряд "International airline passengers: monthly totals in thousands" не является стационарным на основе p-value более 7%
3. Ряды "Mean monthly air temperature (Deg. F) Nottingham Castle", "Weekly closings of the Dow-Jones industrial average", "Daily total female births in California" на основе p-value практически равным 0 являются стационарными.

Коррелограммы

In [None]:
def tsplot(y, lags=None, figsize=(14, 8), style='bmh'):
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    with plt.style.context(style):
        plt.figure(figsize=figsize)
        layout = (4, 1)
        ts_ax = plt.subplot2grid(layout, (0, 0), rowspan=2)
        acf_ax = plt.subplot2grid(layout, (2, 0))
        pacf_ax = plt.subplot2grid(layout, (3, 0))

        y.plot(ax=ts_ax, color='blue', label='Or')
        ts_ax.set_title('Original')

        smt.graphics.plot_acf(y, lags=lags, ax=acf_ax, alpha=0.05)
        smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax, alpha=0.05)

        plt.tight_layout()
    return

In [None]:
print("Monthly sales of company X")
tsplot(series1)
tsplot(all_series["Monthly Boston armed robberies"])

In [None]:
print("Monthly Boston armed robberies")
tsplot(series2)
tsplot(all_series["Monthly Boston armed robberies"])

In [None]:
print("International airline passengers: monthly totals in thousands")
tsplot(series3)
tsplot(all_series["International airline passengers: monthly totals in thousands"])

In [None]:
print("Mean monthly air temperature (Deg. F) Nottingham Castle")
tsplot(series4)
tsplot(all_series["Mean monthly air temperature (Deg. F) Nottingham Castle"])

In [None]:
print("Weekly closings of the Dow-Jones industrial average")
tsplot(series5)
tsplot(all_series["Weekly closings of the Dow-Jones industrial average"])

In [None]:
print("Daily total female births in California")
tsplot(series6)
tsplot(all_series["Daily total female births in California"])