In [1]:
# Import our dependencies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.callbacks import EarlyStopping, ModelCheckpoint

# Fix a random seed so the data can be reproduced
np.random.seed(24)

# Upload and read the csv files
ais_df = pd.read_csv("full_ais_data.csv", index_col=0)

In [2]:
# Look at our data
ais_df.head()

Unnamed: 0_level_0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-01-01,9.0,37.0,145.0,44.0,27.0,13.0,12.0,7.0
2018-01-02,13.0,45.0,119.0,41.0,27.0,14.0,19.0,10.0
2018-01-03,13.0,44.0,106.0,40.0,36.0,14.0,20.0,11.0
2018-01-04,15.0,44.0,103.0,45.0,30.0,10.0,15.0,12.0
2018-01-05,10.0,45.0,107.0,41.0,26.0,13.0,21.0,10.0


In [3]:
# Creating a total column for our data
ais_df['Total']= ais_df.sum(axis=1)
ais_df.head()

Unnamed: 0_level_0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable,Total
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-01,9.0,37.0,145.0,44.0,27.0,13.0,12.0,7.0,294.0
2018-01-02,13.0,45.0,119.0,41.0,27.0,14.0,19.0,10.0,288.0
2018-01-03,13.0,44.0,106.0,40.0,36.0,14.0,20.0,11.0,284.0
2018-01-04,15.0,44.0,103.0,45.0,30.0,10.0,15.0,12.0,274.0
2018-01-05,10.0,45.0,107.0,41.0,26.0,13.0,21.0,10.0,273.0


In [None]:
# And then check DataFrame for type and any nans
ais_df.info()

In [None]:
# Create nine DataFrames, one for each type of boat and the total:

# Fishing
ais_fishing_df = ais_df.loc[:, ['Fishing']]

# TugTow
ais_tugtow_df = ais_df.loc[:, ['TugTow']]

# Recreational
ais_recreational_df = ais_df.loc[:, ['Recreational']]

# Passenger
ais_passenger_df = ais_df.loc[:, ['Passenger']]

# Cargo
ais_cargo_df = ais_df.loc[:, ['Cargo']]

# Tanker
ais_tanker_df = ais_df.loc[:, ['Tanker']]

# Other
ais_other_df = ais_df.loc[:, ['Other']]

# Unavailable
ais_unavailable_df = ais_df.loc[:, ['Unavailable']]

# Total
ais_total_df = ais_df.loc[:, ['Total']]

In [None]:
# From here, the code is heavily inspired by the code found on the following sites:
# https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
# https://towardsdatascience.com/time-series-forecasting-with-recurrent-neural-networks-74674e289816

In [None]:
# Creating a function to automate making a model for all nine Dataframes
def BoatModel (x):
    # Convert the DataFrame into an array, and change the type to floats for the Neural Network
    data = x.values
    data = data.astype('float32')
    
    # Normalize the data by using a scaler
    scaler = MinMaxScaler(feature_range=(0, 1))
    data = scaler.fit_transform(data)
    
    # Split our data into training and testing using slicing, and check the length

    # Determin the length of what our split will be
    data_split = int(len(data) * 0.75)
    
    #Slice the data and print the results
    train, test = data[:data_split], data[data_split:]
    
    # Make a function that creates both X and y values for the data
    def create_dataset(dataset, look_back=1):
        dataX, dataY = [], []
        for i in range(len(dataset)-look_back-1):
            a = dataset[i:(i+look_back), 0]
            dataX.append(a)
            dataY.append(dataset[i + look_back, 0])
        return np.array(dataX), np.array(dataY)
    
    # Define how much time we're looking into the past, 
    # and split our values into X=t and Y=t+1, where t is that time
    look_back = 1
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    
    # Reshape the data to incorperate into the LSTM
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    
    # Create and fit the LSTM network
    model = Sequential()
    model.add(LSTM(4, activation='relu', input_shape=(1, look_back)))
    model.add(Dense(2))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae', 'mape'])
    es = [EarlyStopping(monitor='loss', patience=15)]
    fit_model = model.fit(trainX, trainY, epochs=100, validation_split=0.3, batch_size=1, verbose=2, callbacks=[es])
    
    # Make predictions
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    
    # Invert the predictions to graph later
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])
    
    # Calculate root mean squared error
    trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
    testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
    
    # And catch the results to print later
    show_trainScore = 'Train Score: %.2f RMSE' % (trainScore)
    show_testScore = 'Test Score: %.2f RMSE' % (testScore)
    
    # Shift the train predictions for plotting
    trainPredictPlot = np.empty_like(data)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
    
    # Shift the test predictions for plotting
    testPredictPlot = np.empty_like(data)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(trainPredict)+(look_back*2)+1:len(data)-1, :] = testPredict
     
   # Create a function for future predictions
    
    def predict(num_prediction, model):
        prediction_list = data[-look_back:]
    
        for _ in range(num_prediction):
            x = prediction_list[-look_back:]
            x = x.reshape((1, look_back, 1))
            out = model.predict(x)[0][0]
            prediction_list = np.append(prediction_list, out)
        prediction_list = prediction_list[look_back-1:]
        
        return prediction_list
    
    # Predict the next 30 days of data
    forecast = predict(30, model)
    forecast = forecast.reshape((-1,1))
    forecast = scaler.inverse_transform(forecast)
    
    # Plot the prediction on a graph
    
    future = len(data) + len(forecast)

    futurePlot = np.zeros((future ,1))
    futurePlot[:, :] = np.nan
    futurePlot[-len(forecast): ] = forecast

    
     # Plot the root data, train, test, and future outcomes
    plt.plot(scaler.inverse_transform(data))
    plt.plot(trainPredictPlot)
    plt.plot(testPredictPlot)
    plt.plot(futurePlot)
    graph = plt.show()
  
    return show_trainScore, show_testScore, graph

### Fishing

In [None]:
BoatModel(ais_fishing_df)

### TugTow

In [None]:
BoatModel(ais_tugtow_df)

### Recreational

In [None]:
BoatModel(ais_recreational_df)

### Passenger

In [None]:
BoatModel(ais_passenger_df)

### Cargo

In [None]:
BoatModel(ais_cargo_df)

### Tanker

In [None]:
BoatModel(ais_tanker_df)

### Other

In [None]:
BoatModel(ais_other_df)

### Unavailable 

In [None]:
BoatModel(ais_unavailable_df)

### Total

In [None]:
BoatModel(ais_total_df)