## Import

In [None]:
import numpy as np
import pandas as pd

from datetime import datetime

import matplotlib.pyplot as plt

plt.style.use('ggplot')

from sklearn.linear_model import LinearRegression

# Generate Series

In [None]:
df = pd.DataFrame({'x': [3.2, 2.1, 7.4, 5.2, 3.9, 0.7], 
                   'y': [5.3, 3.7, 11.2, 8.1, 9.2, 3.2], 
                   'z': [4.7, 3.0, 8.1, 7.2, 5.1, 1.8],},
                  index=[datetime(2017,8,1), datetime(2017,8,2), datetime(2017,8,3), 
                         datetime(2017,8,4), datetime(2017,8,5), datetime(2017,8,6)])

In [None]:
df

In [None]:
df.loc['2017-08-04']

In [None]:
df.iloc[3]

In [None]:
df['2017-08-02':'2017-08-03']

In [None]:
df['2017-08-05':]

In [None]:
df.iloc[4:]

In [None]:
df[["x", "z"]]

In [None]:
df[1:3]

In [None]:
df.plot()

In [None]:
df = pd.DataFrame({'x': [3.2, 2.1, None, 5.2, 3.9, 0.7], 
                   'y': [5.3, None, 11.2, 8.1, 9.2, 3.2], 
                   'z': [4.7, 3.0, 8.1, 7.2, 5.1, 1.8]},
                  index=[datetime(2017,8,1), datetime(2017,8,2), datetime(2017,8,3), 
                         datetime(2017,8,4), datetime(2017,8,5), datetime(2017,8,6)])

In [None]:
df

In [None]:
df.dropna()

In [None]:
df.dropna(axis=0)

In [None]:
df.dropna(axis=1)

In [None]:
df.fillna(0.0)

In [None]:
df.fillna(method='pad')

In [None]:
df.fillna(method='bfill')

In [None]:
df.fillna(method='ffill')

In [None]:
df.asfreq('12H')

In [None]:
df.asfreq('2D')

In [None]:
df.resample('2D')

In [None]:
df.resample('2D').agg(np.min)

In [None]:
df.resample('2D').asfreq()

In [None]:
df.resample('12H').ffill()

In [None]:
df.resample('12H').ffill()

In [None]:
df['x(t-1)'] = df['x'].shift(1)

In [None]:
df

In [None]:
df['x(t-2)'] = df['x'].shift(2)
df['x(t-3)'] = df['x'].shift(3)

In [None]:
df

In [None]:
i = 9
f"this is {i}"

In [None]:
i = 7
f'x(t-{i})'

In [None]:
for i in range(1,4):
    print('t-' +str(i), i)
    # df['x(t-'+str(i)+')'] = df['x'].shift(i)
    df[f'x(t-{i})'] = df['x'].shift(i)


In [None]:
df

In [None]:
df.shape

In [None]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    if type(data) is list:
        n_vars = 1
    else:
        n_vars = data.shape[1]
    # n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg = agg.dropna()
    return agg

In [None]:
df[['x', 'z']]

In [None]:
agg_df = series_to_supervised(df[['x', 'z']], n_in=3, n_out=1, dropnan=False)

In [None]:
agg_df

In [None]:
X, y = agg_df.drop(columns=['var1(t)', 'var2(t)']), agg_df['var1(t)']

In [None]:
X

In [None]:
y

# Airline

In [None]:
file_url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv"
airline_df = pd.read_csv(file_url, header=0, parse_dates=['Month'])
airline_df = airline_df.rename(columns={c: c.lower() for c in  airline_df.columns})
airline_df['month'] = pd.to_datetime(airline_df['month'])
airline_df = airline_df.set_index('month')
airline_df.head()

In [None]:
airline_df.tail()

In [None]:
airline_df.shape

In [None]:
fig, ax = plt.subplots(figsize=(8,6), dpi=100)

airline_df.plot(ax=ax)

plt.show()

In [None]:
shifted_airline_df = series_to_supervised(airline_df, n_in=12, n_out=1, dropnan=True)
shifted_airline_df.head()

In [None]:
shifted_airline_df.shape

In [None]:
X, y = shifted_airline_df.drop(columns="var1(t)"), shifted_airline_df["var1(t)"]


In [None]:
X.head()

In [None]:
y.head()

In [None]:
train_test_split_ratio = 0.8

split_index = round(len(X) * train_test_split_ratio)
print(split_index)

X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

test_len = y_test.shape[0]
test_len

In [None]:
lin_reg = LinearRegression().fit(X_train, y_train)

In [None]:
shifted_airline_df.tail()

In [None]:
airline_df.tail()

In [None]:
y_pred = lin_reg.predict(X_test)

In [None]:
test_idx = airline_df.iloc[-test_len:].index

predict_df = pd.DataFrame({'prediction': y_pred,},
                           index=test_idx
                          )

fig, ax = plt.subplots(figsize=(8,6), dpi=100)

airline_df.plot(ax=ax)
predict_df.plot(ax=ax)

plt.show()

In [None]:
predict_df.head()

## Autocorrelation

### with [pandas](https://pandas.pydata.org/)

In [None]:
lag = range(0, 31)
airline_acf = []
for l in lag:
    airline_acf.append(airline_df['passengers'].autocorr(l))

In [None]:
plt.figure(figsize=(5.5, 5.5))
plt.plot(airline_acf, marker='.', color='b')
plt.title('Autocorrelation function for Airline Passengers')
plt.xlabel('Lag in terms of number of months')
plt.ylabel('Autocorrelation function')
plt.show()

### with [statsmodels](https://www.statsmodels.org/)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
plt.figure(figsize=(5.5, 5.5))
plot_acf(airline_df['passengers'], lags=25)
plt.show()

In [None]:
plt.figure(figsize=(5.5, 5.5))
plot_pacf(airline_df['passengers'], lags=25)
plt.show()

## Further Resources


* Books
    * [Forecasting: Principles and Practice (3rd)](https://otexts.com/fpp3/)
    * [Time Series Forecasting in Python](https://www.manning.com/books/time-series-forecasting-in-python-book)

* Articles

    * [11 Classical Time Series Forecasting Methods in Python (Cheat Sheet)](https://machinelearningmastery.com/time-series-forecasting-methods-in-python-cheat-sheet/)

    * [How to Convert a Time Series to a Supervised Learning Problem in Python](https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/)
    * [Time Series Forecasting as Supervised Learning](https://machinelearningmastery.com/time-series-forecasting-supervised-learning/)

* Other
    * [Time Series Forecasting with LSTM (Deep Learning)](https://github.com/lperto/datafest2020)
    * [Time Series - Kaggle Courses](https://www.kaggle.com/learn/time-series)
    * [Time Series Cross-Validation Example](https://otexts.com/fpp3/tscv.html)
