# Time Series as Supervised Machine Learning
- See [Notion Notes](https://detraviousjbrinkley.notion.site/Introduction-to-Time-Series-Forecasting-with-Python-by-Jason-Brownlee-ba37f3ef52784171ab51f1c0d9ab68c5) for short-hand notation, explanations, my thoughts, etc.
- BOOKS:
    1. Deep Learning for Time Series Forecasting - Predict the Future with MLPs, CNNs and LSTMs in Python by Jason Brownlee
    2. Introduction to TSF with Python - How to Prepare Data and Develop Models to Predict the Future by Jason Brownlee

# Imports

In [1]:
import numpy as np
import pandas as pd

# Book Solution

In [2]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        
        # find the end of this pattern
        end_ix = i + n_steps
        
        # check if we are beyond the sequence 
        if end_ix > len(sequence) - 1 : 
            break
        # gather input and output parts of the pattern 
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix] 
        X.append(seq_x)
        y.append(seq_y)
    
    for i in range(len(X)): 
        X[i], y[i]
    
    # print(np.shape(X), np.shape(y))
    
    return np.array(X), np.array(y)

In [3]:
series = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# print(np.shape(series))
split_sequence(series, 3)

(array([[1, 2, 3],
        [2, 3, 4],
        [3, 4, 5],
        [4, 5, 6],
        [5, 6, 7],
        [6, 7, 8],
        [7, 8, 9]]),
 array([ 4,  5,  6,  7,  8,  9, 10]))

# Convert Univariate Time Series Sequence to Supervised Machine Learning

In [4]:
def convert_uts_sequence_to_sml(uts_observations, n_dimensions):
    """Splits a given UTS into multiple input rows where each input row has a specified number of timestamps and the output is a single timestamp.
    
    Parameters:
    uts_observations -- 1D np array (of UTS data to transform to SML data with size  b rows/length x 1 dimension)
    n_dimensions -- int (of the number of columns to use for SML 1 row x j dimensions)
    
    Return:
    X and y -- 2D np array (of the split sequence with size b rows x j dimensions)
    """
    X = []
    y = []
    
    print("Input Univariate Time Series:")
    print(uts_observations, "\nX of size", np.shape(uts_observations))
    print()
    
    for uts_observations_idx in range(len(uts_observations)):
        
        yth_idx = uts_observations_idx + n_dimensions
        # print(yth_idx)
        
        if yth_idx == len(uts_observations):
            break
        
        # set the size of all X inputs
        new_X_inputs = uts_observations[uts_observations_idx : yth_idx]
        # print(new_X_inputs)
        
        X.append(new_X_inputs)
        # print(X)
        
        new_y_true_labels = uts_observations[yth_idx]
        # print(new_y_true_labels)
        
        y.append(new_y_true_labels)
        # print(y)
        # print()
    print("Univariate Time Series as Supervised Machine Learning:")
    for i in range(len(X)):
        print(X[i], y[i])
     
    
    print("X of size", np.shape(X))
    print("y of size", np.shape(y))
    
    return np.array(X), np.array(y)

In [5]:
time_series_data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
window_size = 3
X, y = convert_uts_sequence_to_sml(time_series_data, window_size)

Input Univariate Time Series:
[ 1  2  3  4  5  6  7  8  9 10] 
X of size (10,)

Univariate Time Series as Supervised Machine Learning:
[1 2 3] 4
[2 3 4] 5
[3 4 5] 6
[4 5 6] 7
[5 6 7] 8
[6 7 8] 9
[7 8 9] 10
X of size (7, 3)
y of size (7,)


In [6]:
# X

In [7]:
# y

# Convert Univariate Time Series Sequence to Supervised Machine Learning using Pandas `shift()` Function

In [24]:
def convert_uts_sequence_to_sml_with_shift(data, n_in, n_out=2):
    """
    Parameters:
    data -- py list or np array (of the input univariate time series)
    n_in -- 
    n_out -- 
    
    """
    
    df = pd.DataFrame(data)
    # print(df)
    cols = list()
    # print(cols)
    
    # print(n_in)
    for k in range(n_in, 0, -1):
        lag = 1 * k
        # print("df.shift(i) = ", df.shift(lag))
        cols.append(df.shift(lag))
        
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    
    agg = pd.concat(cols, axis=1)
    print(agg)
    agg.dropna(inplace=True)
    # print(agg)
    
    return agg.values

In [25]:
time_series_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

a = convert_uts_sequence_to_sml_with_shift(time_series_data, window_size)
a

     0    0    0   0     0
0  NaN  NaN  NaN   1   2.0
1  NaN  NaN  1.0   2   3.0
2  NaN  1.0  2.0   3   4.0
3  1.0  2.0  3.0   4   5.0
4  2.0  3.0  4.0   5   6.0
5  3.0  4.0  5.0   6   7.0
6  4.0  5.0  6.0   7   8.0
7  5.0  6.0  7.0   8   9.0
8  6.0  7.0  8.0   9  10.0
9  7.0  8.0  9.0  10   NaN


array([[ 1.,  2.,  3.,  4.,  5.],
       [ 2.,  3.,  4.,  5.,  6.],
       [ 3.,  4.,  5.,  6.,  7.],
       [ 4.,  5.,  6.,  7.,  8.],
       [ 5.,  6.,  7.,  8.,  9.],
       [ 6.,  7.,  8.,  9., 10.]])

In [10]:
type(a)

numpy.ndarray

In [11]:
def create_lag_values(data, N):
    """
    Parameters:
    df -- pd df (dataset)
    N -- int (length of df)
    
    Return:
    lag_values -- list (lags)
    lag_col_names -- list (column names with lag, respectively) 
    """
    df = pd.DataFrame(data)
    lag_values = []
    lag_col_names = []
    
    for k in range(1, N):
       
        lag = -1 * k
        lag_values.append(df.shift(lag))
        new_col_name = "zt @ lag " + str(k)
        lag_col_names.append(new_col_name)
        
    # df.dropna(inplace=True)
    return lag_values, lag_col_names


In [12]:
lag_ks, lag_cols = create_lag_values(time_series_data, window_size)
lag_ks_df = pd.concat(lag_ks, axis=1)
lag_ks_df.columns=[lag_cols]
lag_ks_df

Unnamed: 0,zt @ lag 1,zt @ lag 2
0,2.0,3.0
1,3.0,4.0
2,4.0,5.0
3,5.0,6.0
4,6.0,7.0
5,7.0,8.0
6,8.0,9.0
7,9.0,10.0
8,10.0,
9,,


In [118]:
def convert_uts_sequence_to_sml_with_pd(uts_observations, prior_observations, forecasting_step):
    """Splits a given UTS into multiple input rows where each input row has a specified number of timestamps and the output is a single timestamp.
    
    Parameters:
    uts_observations -- 1D np array (of UTS data to transform to SML data with size  b rows/length x 1 dimension)
    prior_observations -- py int (of all observations before we get to where we want to start making the predictions)
    forecasting_step -- py int (of how far out to forecast, 1 only the next timestamp, 2 the next two timestamps, ... n the next n timestamps)
    
    Return:
    agg.values -- np array (of new sml data)
    """
    
    df = pd.DataFrame(uts_observations)
    cols = list()
    
    lag_col_names = []
    count_lag = 0
    
    print("Input Univariate Time Series:")
    print(uts_observations, "\nX of size", np.shape(uts_observations))
    print()
    
    # input sequence (t-n, ... t-1)
    for prior_observation in range(prior_observations, 0, -1):
        # print("prior_observation: ", prior_observation)
        cols.append(df.shift(prior_observation))
        new_col_name = "t-" + str(prior_observation)
        # print(new_col_name)
        lag_col_names.append(new_col_name)
        
    
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, forecasting_step):
        cols.append(df.shift(-i))
        
        new_col_name = "t" 
        if forecasting_step == 1:
            # print(new_col_name)
            lag_col_names.append(new_col_name)
        
        else:
            if i == 0:
                lag_col_names.append(new_col_name)
            else:
                new_col_name = "t+" + str(i)
                # print(new_col_name)
                lag_col_names.append(new_col_name)
        
        # put it all together
        uts_sml_df = pd.concat(cols, axis=1) 
        uts_sml_df.columns=[lag_col_names]
        # drop rows with NaN values
        uts_sml_df.dropna(inplace=True)
    
    # print(uts_sml_df)
    print("Univariate Time Series as Supervised Machine Learning:")
    # colums to use to make prediction for last col
    
    X_train = uts_sml_df.iloc[:, :prior_observations]
    print("X_train: \n", X_train)
    
    # last column
    y_train = uts_sml_df.iloc[:, -forecasting_step:]
    print("y_train: \n", y_train)
    return X_train, y_train, uts_sml_df

In [121]:
train_data = [10, 20, 30, 40, 50, 60, 70, 80, 90]
prior_observations, forecasting_step = [3, 2]
X_train_df, y_train_df, X_y_df = convert_uts_sequence_to_sml_with_pd(train_data, prior_observations, forecasting_step) 

Input Univariate Time Series:
[10, 20, 30, 40, 50, 60, 70, 80, 90] 
X of size (9,)

Univariate Time Series as Supervised Machine Learning:
X_train: 
     t-3   t-2   t-1
3  10.0  20.0  30.0
4  20.0  30.0  40.0
5  30.0  40.0  50.0
6  40.0  50.0  60.0
7  50.0  60.0  70.0
y_train: 
     t   t+1
3  40  50.0
4  50  60.0
5  60  70.0
6  70  80.0
7  80  90.0
