In [1]:
import numpy as np
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


In [2]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [3]:
# for reproducability of the results, lets fix a seed function
np.random.seed(1234)

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
dataset = pd.read_csv('international-airline-passengers.csv', usecols=[1], engine='python',skipfooter=3)

In [6]:
dataset

Unnamed: 0,International airline passengers: monthly totals in thousands. Jan 49 ? Dec 60
0,112
1,118
2,132
3,129
4,121
...,...
139,606
140,508
141,461
142,390


In [7]:
dataset = dataset.values
dataset = dataset.astype('float32')

In [8]:
#Normalize the data
scaler = MinMaxScaler(feature_range=(0,1))
dataset = scaler.fit_transform(dataset)

In [9]:
dataset

array([[0.01544401],
       [0.02702703],
       [0.05405405],
       [0.04826255],
       [0.03281853],
       [0.05984557],
       [0.08494207],
       [0.08494207],
       [0.06177607],
       [0.02895753],
       [0.        ],
       [0.02702703],
       [0.02123553],
       [0.04247104],
       [0.07142857],
       [0.05984557],
       [0.04054055],
       [0.08687258],
       [0.12741312],
       [0.12741312],
       [0.10424709],
       [0.05598456],
       [0.01930502],
       [0.06949806],
       [0.07915059],
       [0.08880308],
       [0.14285713],
       [0.11389962],
       [0.13127413],
       [0.14285713],
       [0.18339768],
       [0.18339768],
       [0.15444016],
       [0.11196911],
       [0.08108109],
       [0.1196911 ],
       [0.12934363],
       [0.14671814],
       [0.17181468],
       [0.14864865],
       [0.15250966],
       [0.22007722],
       [0.24324325],
       [0.26640925],
       [0.2027027 ],
       [0.16795367],
       [0.13127413],
       [0.173

In [10]:
#spliting the train and test set
train_size = int(len(dataset)*0.67)

In [11]:
print(train_size)

96


In [12]:
test_size = len(dataset)-train_size

In [13]:
print(test_size)

48


In [14]:
train, test = dataset[0:train_size, :], dataset[train_size: len(dataset), :]

In [15]:
print(len(train), len(test))

96 48


In [16]:
# create dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [],[]
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back),0]
        dataX.append(a)
        dataY.append(dataset[i+look_back, 0])
    return np.array(dataX), np.array(dataY)

In [17]:
#Reshape dataset X= current time, Y= future time 
look_back=1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

In [18]:
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

In [19]:
print(trainX)

[[[0.01544401]]

 [[0.02702703]]

 [[0.05405405]]

 [[0.04826255]]

 [[0.03281853]]

 [[0.05984557]]

 [[0.08494207]]

 [[0.08494207]]

 [[0.06177607]]

 [[0.02895753]]

 [[0.        ]]

 [[0.02702703]]

 [[0.02123553]]

 [[0.04247104]]

 [[0.07142857]]

 [[0.05984557]]

 [[0.04054055]]

 [[0.08687258]]

 [[0.12741312]]

 [[0.12741312]]

 [[0.10424709]]

 [[0.05598456]]

 [[0.01930502]]

 [[0.06949806]]

 [[0.07915059]]

 [[0.08880308]]

 [[0.14285713]]

 [[0.11389962]]

 [[0.13127413]]

 [[0.14285713]]

 [[0.18339768]]

 [[0.18339768]]

 [[0.15444016]]

 [[0.11196911]]

 [[0.08108109]]

 [[0.1196911 ]]

 [[0.12934363]]

 [[0.14671814]]

 [[0.17181468]]

 [[0.14864865]]

 [[0.15250966]]

 [[0.22007722]]

 [[0.24324325]]

 [[0.26640925]]

 [[0.2027027 ]]

 [[0.16795367]]

 [[0.13127413]]

 [[0.17374519]]

 [[0.17760617]]

 [[0.17760617]]

 [[0.25482625]]

 [[0.25289574]]

 [[0.24131274]]

 [[0.26833975]]

 [[0.3088803 ]]

 [[0.32432434]]

 [[0.25675675]]

 [[0.20656371]]

 [[0.14671814]

In [20]:
print(testX)

[[[0.4073359 ]]

 [[0.3803089 ]]

 [[0.48648646]]

 [[0.47104248]]

 [[0.484556  ]]

 [[0.6138996 ]]

 [[0.6969112 ]]

 [[0.70077217]]

 [[0.57915056]]

 [[0.46911195]]

 [[0.38803086]]

 [[0.44787642]]

 [[0.45559844]]

 [[0.4131274 ]]

 [[0.4980695 ]]

 [[0.47104248]]

 [[0.49999997]]

 [[0.6389961 ]]

 [[0.7471043 ]]

 [[0.7741313 ]]

 [[0.57915056]]

 [[0.492278  ]]

 [[0.3976834 ]]

 [[0.44980696]]

 [[0.49420848]]

 [[0.45945945]]

 [[0.5830116 ]]

 [[0.5637065 ]]

 [[0.61003864]]

 [[0.71042466]]

 [[0.8571429 ]]

 [[0.8783784 ]]

 [[0.69305015]]

 [[0.5849421 ]]

 [[0.4980695 ]]

 [[0.58108103]]

 [[0.6042471 ]]

 [[0.554054  ]]

 [[0.60810804]]

 [[0.6891892 ]]

 [[0.71042466]]

 [[0.8320464 ]]

 [[1.        ]]

 [[0.96911204]]

 [[0.7799227 ]]

 [[0.6891892 ]]]


In [21]:
# Lets create a LSTM(RNN) model
model = Sequential()
model.add(LSTM(4, input_shape = (1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


In [22]:
# fit the model
model.fit(trainX, trainY, batch_size =1, verbose = 2)

Epoch 1/1
 - 1s - loss: 0.0329


<keras.callbacks.callbacks.History at 0x19f1459bc48>

In [23]:
#make predictions
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

In [24]:
print(trainPredict)

[[0.10235997]
 [0.10424852]
 [0.10865176]
 [0.10770871]
 [0.10519251]
 [0.1095945 ]
 [0.11367524]
 [0.11367524]
 [0.10990867]
 [0.10456321]
 [0.09984111]
 [0.10424852]
 [0.10330433]
 [0.10676535]
 [0.11147889]
 [0.1095945 ]
 [0.10645084]
 [0.1139888 ]
 [0.12055959]
 [0.12055959]
 [0.11680833]
 [0.10896605]
 [0.10298955]
 [0.11116492]
 [0.11273424]
 [0.11430231]
 [0.1230545 ]
 [0.11837256]
 [0.12118379]
 [0.1230545 ]
 [0.12957689]
 [0.12957689]
 [0.12492222]
 [0.11805985]
 [0.11304796]
 [0.11931028]
 [0.12087172]
 [0.12367742]
 [0.12771764]
 [0.12398876]
 [0.12461115]
 [0.1354387 ]
 [0.13911848]
 [0.14277896]
 [0.13266717]
 [0.12709711]
 [0.12118379]
 [0.12802778]
 [0.12864774]
 [0.12864774]
 [0.14095125]
 [0.14064613]
 [0.13881254]
 [0.1430831 ]
 [0.14943455]
 [0.15183553]
 [0.14125623]
 [0.1332839 ]
 [0.12367742]
 [0.13019583]
 [0.1311234 ]
 [0.12616557]
 [0.14064613]
 [0.13820028]
 [0.14034086]
 [0.14943455]
 [0.16073816]
 [0.15808488]
 [0.14792857]
 [0.13881254]
 [0.13081431]
 [0.13

In [25]:
print(testPredict)

[[0.16454259]
 [0.16044413]
 [0.17630622]
 [0.17404033]
 [0.17602378]
 [0.19439912]
 [0.20556024]
 [0.20606652]
 [0.18957569]
 [0.17375606]
 [0.1616191 ]
 [0.17061414]
 [0.1717598 ]
 [0.16541563]
 [0.17799582]
 [0.17404033]
 [0.17827657]
 [0.19782811]
 [0.21205024]
 [0.21546122]
 [0.18957569]
 [0.17715208]
 [0.16308337]
 [0.17090088]
 [0.17743358]
 [0.17233129]
 [0.1901159 ]
 [0.18740445]
 [0.19386749]
 [0.20732714]
 [0.22556156]
 [0.22805245]
 [0.20505278]
 [0.1903856 ]
 [0.17799582]
 [0.18984592]
 [0.193068  ]
 [0.186039  ]
 [0.19360127]
 [0.20454417]
 [0.20732714]
 [0.22256857]
 [0.24157254]
 [0.2382602 ]
 [0.21618441]
 [0.20454417]]


In [26]:
# Reverse the predicted value to actual values

trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)

trainY = scaler.inverse_transform([trainY])
testY = scaler.inverse_transform([testY])

In [27]:
print(trainPredict, trainY)

[[157.02248]
 [158.00073]
 [160.28162]
 [159.79312]
 [158.48972]
 [160.76994]
 [162.88377]
 [162.88377]
 [160.9327 ]
 [158.16374]
 [155.7177 ]
 [158.00073]
 [157.51164]
 [159.30446]
 [161.74606]
 [160.76994]
 [159.14153]
 [163.04619]
 [166.44987]
 [166.44987]
 [164.50671]
 [160.44441]
 [157.34859]
 [161.58344]
 [162.39633]
 [163.2086 ]
 [167.74223]
 [165.31699]
 [166.7732 ]
 [167.74223]
 [171.12083]
 [171.12083]
 [168.7097 ]
 [165.155  ]
 [162.55884]
 [165.80273]
 [166.61156]
 [168.0649 ]
 [170.15775]
 [168.22617]
 [168.54857]
 [174.15726]
 [176.06337]
 [177.9595 ]
 [172.72159]
 [169.8363 ]
 [166.7732 ]
 [170.31839]
 [170.63953]
 [170.63953]
 [177.01274]
 [176.85469]
 [175.90489]
 [178.11705]
 [181.40709]
 [182.6508 ]
 [177.17073]
 [173.04105]
 [168.0649 ]
 [171.44144]
 [171.92192]
 [169.35376]
 [176.85469]
 [175.58775]
 [176.69656]
 [181.40709]
 [187.26236]
 [185.88799]
 [180.627  ]
 [175.90489]
 [171.76181]
 [175.90489]
 [177.9595 ]
 [176.53838]
 [181.87415]
 [182.18507]
 [182.34041]

In [28]:
print(testPredict, testY)

[[189.23306]
 [187.11005]
 [195.32663]
 [194.15291]
 [195.18031]
 [204.69875]
 [210.48021]
 [210.74246]
 [202.20021]
 [194.00565]
 [187.71869]
 [192.37813]
 [192.97159]
 [189.68529]
 [196.20184]
 [194.15291]
 [196.34726]
 [206.47496]
 [213.84203]
 [215.60892]
 [202.20021]
 [195.76477]
 [188.47719]
 [192.52666]
 [195.9106 ]
 [193.26762]
 [202.48004]
 [201.07552]
 [204.42336]
 [211.39546]
 [220.8409 ]
 [222.13118]
 [210.21733]
 [202.61975]
 [196.20184]
 [202.3402 ]
 [204.00923]
 [200.36821]
 [204.28546]
 [209.95387]
 [211.39546]
 [219.29053]
 [229.13457]
 [227.41878]
 [215.98352]
 [209.95387]] [[301.00001152 355.999993   348.00001085 355.00000681 421.9999924
  465.0000161  466.99998848 403.99999397 346.99999379 304.99998716
  335.99999132 339.99999783 317.99999288 362.00000277 348.00001085
  362.99998896 434.99999813 491.00002756 505.00001948 403.99999397
  359.00001333 310.00001073 337.00000838 359.99999952 342.00000109
  406.0000281  395.99998094 420.00002002 471.99998119 548.00004318


In [29]:
# Calculate RMSE
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
print('Train : %.2f RMSE' % (trainScore))

Train : 72.85 RMSE


In [30]:
testScore= math.sqrt(mean_squared_error(testY[0], testPredict[:, 0]))
print('Test : %.2f RMSE' % (testScore))

Test : 223.26 RMSE
