https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/

In [1]:
# prepare data for lstm
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

In [2]:
# https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df =  DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [3]:
# load dataset
dataset = read_csv('SeoulBikeData2.csv', encoding= 'unicode_escape', header=0, index_col=0)
values = dataset.values

In [4]:
# encode the categorical variables into integers
'''label_encoder = LabelEncoder()
one_hot = OneHotEncoder()
values[:,10] = label_encoder.fit_transform(values[:,10])
values[:,11] = label_encoder.fit_transform(values[:,11])
values[:,12] = label_encoder.fit_transform(values[:,12])'''

'label_encoder = LabelEncoder()\none_hot = OneHotEncoder()\nvalues[:,10] = label_encoder.fit_transform(values[:,10])\nvalues[:,11] = label_encoder.fit_transform(values[:,11])\nvalues[:,12] = label_encoder.fit_transform(values[:,12])'

In [5]:
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

In [6]:
# frame as supervised learning
'''reframed = series_to_supervised(scaled_data, past_days_to_consider, number_of_predictions_into_the_future)
# drop columns we don't want to predict
reframed.drop(reframed.columns[[number,of,columns]], axis=1, inplace=True)
print(reframed.head())'''

"reframed = series_to_supervised(scaled_data, past_days_to_consider, number_of_predictions_into_the_future)\n# drop columns we don't want to predict\nreframed.drop(reframed.columns[[number,of,columns]], axis=1, inplace=True)\nprint(reframed.head())"

# Test 3:
<h1>24 hours
    hour1: all parameters,
    <p>hour23 to hour24: bike_count
</h1>

In [10]:
# frame as supervised learning
hours_to_consider = 23
reframed = series_to_supervised(scaled, hours_to_consider, 1)
# drop columns we don't want to predict
columns_to_keep = list()
for i in range(10,reframed.shape[1],9):
    columns_to_keep.append(i)
x = 11
n_cols = [i for i in range (10, 216)]
for i in columns_to_keep:
    n_cols.remove(i)
reframed.drop(reframed.columns[[n_cols]], axis=1, inplace=True)
print(reframed.columns)
print(reframed.head())

Index(['var1(t-23)', 'var2(t-23)', 'var3(t-23)', 'var4(t-23)', 'var5(t-23)',
       'var6(t-23)', 'var7(t-23)', 'var8(t-23)', 'var9(t-23)', 'var1(t-22)',
       'var2(t-22)', 'var2(t-21)', 'var2(t-20)', 'var2(t-19)', 'var2(t-18)',
       'var2(t-17)', 'var2(t-16)', 'var2(t-15)', 'var2(t-14)', 'var2(t-13)',
       'var2(t-12)', 'var2(t-11)', 'var2(t-10)', 'var2(t-9)', 'var2(t-8)',
       'var2(t-7)', 'var2(t-6)', 'var2(t-5)', 'var2(t-4)', 'var2(t-3)',
       'var2(t-2)', 'var2(t-1)', 'var2(t)'],
      dtype='object')
    var1(t-23)  var2(t-23)  var3(t-23)  var4(t-23)  var5(t-23)  var6(t-23)  \
23    0.000000    0.220280    0.377551    0.297297         1.0    0.224914   
24    0.043478    0.215035    0.387755    0.108108         1.0    0.224914   
25    0.086957    0.206294    0.397959    0.135135         1.0    0.223183   
26    0.130435    0.202797    0.408163    0.121622         1.0    0.224914   
27    0.173913    0.206294    0.367347    0.310811         1.0    0.207612   

    var7(