In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@author: omayr
@description: POC: Converting Time Series to Supervised Learning Problem
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import listdir
from os import path
from IPython.display import display
from pandas import DataFrame
from pandas import concat

def read_file(multivariate=False):
    
    if multivariate == False:
        data = pd.read_csv("data/sample/room_temperature_1710876.csv")
        data['utc_org_rec_time'] = pd.to_datetime(data['utc_org_rec_time'])
        data['utc_org_rec_time'] = pd.DatetimeIndex(data['utc_org_rec_time'])
        data.columns = ['time','RT']
        data = data.set_index(['time'])
        return data['RT']
    
    if multivariate == True:
        p = str(path.dirname(path.abspath("__file__")))+"/data/sample/"
        files = [f for f in listdir(p) if path.isfile(path.join(p,f))]
        cols = ['utc_org_rec_time']
        cols.extend([x.split('.')[0] for x in files])
        data = pd.DataFrame()

        for file in files:
            temp = pd.read_csv("data/sample/"+file)
            data = pd.concat((data,temp[['value']]),axis=1)

        data = pd.concat((temp[['utc_org_rec_time']],data),axis=1)

        data['utc_org_rec_time'] = temp['utc_org_rec_time']
        data['utc_org_rec_time'] = pd.to_datetime(data['utc_org_rec_time'])
        data['utc_org_rec_time'] = pd.DatetimeIndex(data['utc_org_rec_time'])

        data.columns = ['time', 'ACS', 'AQ', 'ART', 'CS', 'CV', 'EAT', 'HV', 'PI', 'RT', 'STATUS']

        return data

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg