In [1]:
# Import our dependencies
import math

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import numpy as np
import pandas as pd
import time

import os

# Upload and read the csv files
ais_df = pd.read_csv("ml_data_daily_2018.csv", index_col=0)

In [2]:
# Look at our data
ais_df.head()

Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable
2018/01/01,9,37,145,44,27,13,12,7
2018/01/02,13,45,119,41,27,14,19,10
2018/01/03,13,44,106,40,36,14,20,11
2018/01/04,15,44,103,45,30,10,15,12
2018/01/05,10,45,107,41,26,13,21,10


In [3]:
# # Renaming the "Unnamed:0" column to "Date"
# ais_df.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
# ais_df.head()

In [4]:
# Check the info of the DataFrame to see the type of data in each column and to make sure we have clean data
ais_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 365 entries, 2018/01/01 to 2018/12/31
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   Fishing       365 non-null    int64
 1   TugTow        365 non-null    int64
 2   Recreational  365 non-null    int64
 3   Passenger     365 non-null    int64
 4   Cargo         365 non-null    int64
 5   Tanker        365 non-null    int64
 6   Other         365 non-null    int64
 7   Unavailable   365 non-null    int64
dtypes: int64(8)
memory usage: 25.7+ KB


In [5]:
# # Convert "Date" to datetime
# ais_df['Date']=pd.to_datetime(ais_df['Date'])

# # And recheck info
# ais_df.info()

In [6]:
# Creating a total column for our data
ais_df['Total']= ais_df.sum(numeric_only=True, axis=1)
ais_df.head()

Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable,Total
2018/01/01,9,37,145,44,27,13,12,7,294
2018/01/02,13,45,119,41,27,14,19,10,288
2018/01/03,13,44,106,40,36,14,20,11,284
2018/01/04,15,44,103,45,30,10,15,12,274
2018/01/05,10,45,107,41,26,13,21,10,273


In [7]:
# Check the info one last time now that we have our new column
ais_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 365 entries, 2018/01/01 to 2018/12/31
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   Fishing       365 non-null    int64
 1   TugTow        365 non-null    int64
 2   Recreational  365 non-null    int64
 3   Passenger     365 non-null    int64
 4   Cargo         365 non-null    int64
 5   Tanker        365 non-null    int64
 6   Other         365 non-null    int64
 7   Unavailable   365 non-null    int64
 8   Total         365 non-null    int64
dtypes: int64(9)
memory usage: 28.5+ KB


In [8]:
# Create nine DataFrames, each with the Date and one unique column:

# Fishing
#ais_fishing_df = ais_df.loc[:, ['Date', 'Fishing']]
ais_fishing_df = ais_df.loc[:, ['Fishing']]
# TugTow
#ais_tugtow_df = ais_df.loc[:, ['Date', 'TugTow']]
ais_tugtow_df = ais_df.loc[:, ['TugTow']]
# Recreational
#ais_recreational_df = ais_df.loc[:, ['Date', 'Recreational']]
ais_recreational_df = ais_df.loc[:, ['Recreational']]
# Passenger
#ais_passenger_df = ais_df.loc[:, ['Date', 'Passenger']]
ais_passenger_df = ais_df.loc[:, ['Passenger']]
# Cargo
#ais_cargo_df = ais_df.loc[:, ['Date', 'Cargo']]
ais_cargo_df = ais_df.loc[:, ['Cargo']]
# Tanker
#ais_tanker_df = ais_df.loc[:, ['Date', 'Tanker']]
ais_tanker_df = ais_df.loc[:, ['Tanker']]
# Other
#ais_other_df = ais_df.loc[:, ['Date', 'Other']]
ais_other_df = ais_df.loc[:, ['Other']]
# Unavailable
#ais_unavailable_df = ais_df.loc[:, ['Date', 'Unavailable']]
ais_unavailable_df = ais_df.loc[:, ['Unavailable']]
# Total
#ais_total_df = ais_df.loc[:, ['Date', 'Total']]
ais_total_df = ais_df.loc[:, ['Total']]

In [9]:
# From here we will be using 'Total' as an example, and repeating the code for the rest of the data

# Check the new Dataframe
ais_total_df

Unnamed: 0,Total
2018/01/01,294
2018/01/02,288
2018/01/03,284
2018/01/04,274
2018/01/05,273
...,...
2018/12/27,298
2018/12/28,298
2018/12/29,306
2018/12/30,308


In [10]:
# Split our data into training, testing, and validation sets using slicing

total_val_ind = int(len(ais_total_df)*0.8)
total_train_ind = int(len(ais_total_df)*0.9)

total_train_df = ais_total_df[:total_val_ind]
total_val_df = ais_total_df[total_val_ind:total_train_ind]
total_test_df = ais_total_df[total_train_ind:]

In [11]:
#Check out the datasets
print(total_train_df)
print(total_val_df)
print(total_test_df)

            Total
2018/01/01    294
2018/01/02    288
2018/01/03    284
2018/01/04    274
2018/01/05    273
...           ...
2018/10/15    397
2018/10/16    395
2018/10/17    378
2018/10/18    378
2018/10/19    392

[292 rows x 1 columns]
            Total
2018/10/20    437
2018/10/21    410
2018/10/22    399
2018/10/23    375
2018/10/24    377
2018/10/25    373
2018/10/26    387
2018/10/27    403
2018/10/28    387
2018/10/29    369
2018/10/30    348
2018/10/31    353
2018/11/01    361
2018/11/02    363
2018/11/03    392
2018/11/04    379
2018/11/05    361
2018/11/06    335
2018/11/07    337
2018/11/08    335
2018/11/09    345
2018/11/10    357
2018/11/11    360
2018/11/12    353
2018/11/13    342
2018/11/14    333
2018/11/15    334
2018/11/16    334
2018/11/17    318
2018/11/18    303
2018/11/19    320
2018/11/20    308
2018/11/21    302
2018/11/22    279
2018/11/23    274
2018/11/24    307
            Total
2018/11/25    349
2018/11/26    335
2018/11/27    317
2018/11/28    321
2018

In [12]:
# Create the LSTM Model

model = Sequential()

# Recurrent layer
model.add(LSTM(292, activation='relu', input_shape=(total_train_df.shape[0], total_train_df.shape[1]), return_sequences=False, dropout=0.1, recurrent_dropout=0.1))

# Fully connected layer
model.add(Dense(units=64, activation='relu'))

# Dropout for regularization
model.add(Dropout(0.2))

# Output layer
model.add(Dense(units=1, activation='softmax'))

In [13]:
# Check the structure of the model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 292)               343392    
                                                                 
 dense (Dense)               (None, 64)                18752     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 362,209
Trainable params: 362,209
Non-trainable params: 0
_________________________________________________________________


In [14]:
#Compile the Model
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])