In [1]:
# Import our dependencies
import math

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import numpy as np
import pandas as pd
import time

import os

# Upload and read the csv files
ais_df = pd.read_csv("ml_data_daily_2018.csv", index_col=0)

In [2]:
# Fix a random seed so the data can be reproduced
np.random.seed(24)

In [3]:
# Look at our data
ais_df.head()

Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable
2018/01/01,9,37,145,44,27,13,12,7
2018/01/02,13,45,119,41,27,14,19,10
2018/01/03,13,44,106,40,36,14,20,11
2018/01/04,15,44,103,45,30,10,15,12
2018/01/05,10,45,107,41,26,13,21,10


In [None]:
# # Renaming the "Unnamed:0" column to "Date"
# ais_df.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
# ais_df.head()

In [None]:
# Check the info of the DataFrame to see the type of data in each column and to make sure we have clean data
# ais_df.info()

In [None]:
# # Convert "Date" to datetime
# ais_df['Date']=pd.to_datetime(ais_df['Date'])

# # And recheck info
# ais_df.info()

In [4]:
# Creating a total column for our data
ais_df['Total']= ais_df.sum(numeric_only=True, axis=1)
ais_df.head()

Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable,Total
2018/01/01,9,37,145,44,27,13,12,7,294
2018/01/02,13,45,119,41,27,14,19,10,288
2018/01/03,13,44,106,40,36,14,20,11,284
2018/01/04,15,44,103,45,30,10,15,12,274
2018/01/05,10,45,107,41,26,13,21,10,273


In [5]:
# Check the info one last time now that we have our new column
ais_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 365 entries, 2018/01/01 to 2018/12/31
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   Fishing       365 non-null    int64
 1   TugTow        365 non-null    int64
 2   Recreational  365 non-null    int64
 3   Passenger     365 non-null    int64
 4   Cargo         365 non-null    int64
 5   Tanker        365 non-null    int64
 6   Other         365 non-null    int64
 7   Unavailable   365 non-null    int64
 8   Total         365 non-null    int64
dtypes: int64(9)
memory usage: 28.5+ KB


In [6]:
# Convert the values in the DataFrame to floating point values, which will help with the Neural Network,
ais_df = ais_df.astype('float32')

# And then check the type
ais_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 365 entries, 2018/01/01 to 2018/12/31
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Fishing       365 non-null    float32
 1   TugTow        365 non-null    float32
 2   Recreational  365 non-null    float32
 3   Passenger     365 non-null    float32
 4   Cargo         365 non-null    float32
 5   Tanker        365 non-null    float32
 6   Other         365 non-null    float32
 7   Unavailable   365 non-null    float32
 8   Total         365 non-null    float32
dtypes: float32(9)
memory usage: 15.7+ KB


In [7]:
# Create nine DataFrames, each with the Date and one unique column:

# Fishing
#ais_fishing_df = ais_df.loc[:, ['Date', 'Fishing']]
ais_fishing_df = ais_df.loc[:, ['Fishing']]
# TugTow
#ais_tugtow_df = ais_df.loc[:, ['Date', 'TugTow']]
ais_tugtow_df = ais_df.loc[:, ['TugTow']]
# Recreational
#ais_recreational_df = ais_df.loc[:, ['Date', 'Recreational']]
ais_recreational_df = ais_df.loc[:, ['Recreational']]
# Passenger
#ais_passenger_df = ais_df.loc[:, ['Date', 'Passenger']]
ais_passenger_df = ais_df.loc[:, ['Passenger']]
# Cargo
#ais_cargo_df = ais_df.loc[:, ['Date', 'Cargo']]
ais_cargo_df = ais_df.loc[:, ['Cargo']]
# Tanker
#ais_tanker_df = ais_df.loc[:, ['Date', 'Tanker']]
ais_tanker_df = ais_df.loc[:, ['Tanker']]
# Other
#ais_other_df = ais_df.loc[:, ['Date', 'Other']]
ais_other_df = ais_df.loc[:, ['Other']]
# Unavailable
#ais_unavailable_df = ais_df.loc[:, ['Date', 'Unavailable']]
ais_unavailable_df = ais_df.loc[:, ['Unavailable']]
# Total
#ais_total_df = ais_df.loc[:, ['Date', 'Total']]
ais_total_df = ais_df.loc[:, ['Total']]

In [8]:
# From here we will be using 'Total' as an example, and repeating the code for the rest of the data

# Check the new Dataframe
ais_total_df

Unnamed: 0,Total
2018/01/01,294.0
2018/01/02,288.0
2018/01/03,284.0
2018/01/04,274.0
2018/01/05,273.0
...,...
2018/12/27,298.0
2018/12/28,298.0
2018/12/29,306.0
2018/12/30,308.0


In [9]:
# Normalize the data by using a scaler
scaler = MinMaxScaler(feature_range=(0, 1))
ais_total_df = scaler.fit_transform(ais_total_df)

In [10]:
# Split our data into training and testing, using slicing

#total_val_ind = int(len(ais_total_df)*0.8)
total_train_size = int(len(ais_total_df)*0.75)

total_train_df = ais_total_df[:total_train_size]
total_test_df = ais_total_df[total_train_size:]

#total_val_df = ais_total_df[total_val_ind:total_train_ind]

In [11]:
#Check out the datasets
print(len(total_train_df), len(total_test_df))
#print(total_val_df)


273 92


In [12]:
# Convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)


In [13]:
# Reshape the values into X=t and Y=t+1
look_back = 1
total_train_X, total_train_Y = create_dataset(total_train_df, look_back)
total_test_X, total_test_Y = create_dataset(total_test_df, look_back)

In [14]:
# Reshape the data to incorperate into the LSTM
total_train_X = np.reshape(total_train_X, (total_train_X.shape[0], 1, total_train_X.shape[1]))
total_test_X = np.reshape(total_test_X, (total_test_X.shape[0], 1, total_test_X.shape[1]))

In [16]:
total_train_X.shape

(271, 1, 1)

In [17]:
# Create the LSTM Model

model = Sequential()

# Recurrent layer
model.add(LSTM(4, input_shape=(1, look_back)))

# Fully connected layer
model.add(Dense(1))

# Dropout for regularization
model.add(Dropout(0.2))

# Output layer
model.add(Dense(units=1, activation='softmax'))

# Recurrent layer
#model.add(LSTM(292, activation='relu', input_shape=(total_train_df.shape[0], total_train_df.shape[1]), return_sequences=False, dropout=0.1, recurrent_dropout=0.1))

# Fully connected layer
#model.add(Dense(units=64, activation='relu'))

In [18]:
# Check the structure of the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 4)                 96        
                                                                 
 dense (Dense)               (None, 1)                 5         
                                                                 
 dropout (Dropout)           (None, 1)                 0         
                                                                 
 dense_1 (Dense)             (None, 1)                 2         
                                                                 
Total params: 103
Trainable params: 103
Non-trainable params: 0
_________________________________________________________________


In [19]:
#Compile the Model
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

In [20]:
# Train the model
fit_model = model.fit(total_train_X, total_train_Y, epochs=20, batch_size=1, verbose=2)

Epoch 1/20
271/271 - 1s - loss: 0.3499 - accuracy: 0.0000e+00 - 1s/epoch - 5ms/step
Epoch 2/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 255ms/epoch - 943us/step
Epoch 3/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 245ms/epoch - 905us/step
Epoch 4/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 277ms/epoch - 1ms/step
Epoch 5/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 316ms/epoch - 1ms/step
Epoch 6/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 285ms/epoch - 1ms/step
Epoch 7/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 253ms/epoch - 933us/step
Epoch 8/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 258ms/epoch - 951us/step
Epoch 9/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 345ms/epoch - 1ms/step
Epoch 10/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 284ms/epoch - 1ms/step
Epoch 11/20
271/271 - 0s - loss: 0.3499 - accuracy: 0.0000e+00 - 313ms/epoch - 1ms/step
Epoch 12/20
271/271 - 0s - loss: 0.3