In [1]:
# Import our dependencies
import math

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import Sequence
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

import numpy as np
import pandas as pd
import time

import os

# Upload and read the csv files
ais_df = pd.read_csv("ml_data_daily_2018.csv")

In [2]:
# Look at our data
ais_df.head()

Unnamed: 0.1,Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable
0,2018/01/01,9,37,145,44,27,13,12,7
1,2018/01/02,13,45,119,41,27,14,19,10
2,2018/01/03,13,44,106,40,36,14,20,11
3,2018/01/04,15,44,103,45,30,10,15,12
4,2018/01/05,10,45,107,41,26,13,21,10


In [3]:
# Renaming the "Unnamed:0" column to "Date"
ais_df.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)
ais_df.head()

Unnamed: 0,Date,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable
0,2018/01/01,9,37,145,44,27,13,12,7
1,2018/01/02,13,45,119,41,27,14,19,10
2,2018/01/03,13,44,106,40,36,14,20,11
3,2018/01/04,15,44,103,45,30,10,15,12
4,2018/01/05,10,45,107,41,26,13,21,10


In [4]:
# Check the info of the DataFrame to see the type of data in each column and to make sure we have clean data
ais_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Date          365 non-null    object
 1   Fishing       365 non-null    int64 
 2   TugTow        365 non-null    int64 
 3   Recreational  365 non-null    int64 
 4   Passenger     365 non-null    int64 
 5   Cargo         365 non-null    int64 
 6   Tanker        365 non-null    int64 
 7   Other         365 non-null    int64 
 8   Unavailable   365 non-null    int64 
dtypes: int64(8), object(1)
memory usage: 25.8+ KB


In [5]:
# Convert "Date" to datetime
ais_df['Date']=pd.to_datetime(ais_df['Date'])

# And recheck info
ais_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          365 non-null    datetime64[ns]
 1   Fishing       365 non-null    int64         
 2   TugTow        365 non-null    int64         
 3   Recreational  365 non-null    int64         
 4   Passenger     365 non-null    int64         
 5   Cargo         365 non-null    int64         
 6   Tanker        365 non-null    int64         
 7   Other         365 non-null    int64         
 8   Unavailable   365 non-null    int64         
dtypes: datetime64[ns](1), int64(8)
memory usage: 25.8 KB


In [6]:
# Creating a total column for our data
ais_df['Total']= ais_df.sum(numeric_only=True, axis=1)
ais_df.head()

Unnamed: 0,Date,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable,Total
0,2018-01-01,9,37,145,44,27,13,12,7,294
1,2018-01-02,13,45,119,41,27,14,19,10,288
2,2018-01-03,13,44,106,40,36,14,20,11,284
3,2018-01-04,15,44,103,45,30,10,15,12,274
4,2018-01-05,10,45,107,41,26,13,21,10,273


In [7]:
# Check the info one last time now that we have our new column
ais_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Date          365 non-null    datetime64[ns]
 1   Fishing       365 non-null    int64         
 2   TugTow        365 non-null    int64         
 3   Recreational  365 non-null    int64         
 4   Passenger     365 non-null    int64         
 5   Cargo         365 non-null    int64         
 6   Tanker        365 non-null    int64         
 7   Other         365 non-null    int64         
 8   Unavailable   365 non-null    int64         
 9   Total         365 non-null    int64         
dtypes: datetime64[ns](1), int64(9)
memory usage: 28.6 KB


In [8]:
# Create nine DataFrames, each with the Date and one unique column:

# Fishing
ais_fishing_df = ais_df.loc[:, ['Fishing', 'Total']]
# TugTow
ais_tugtow_df = ais_df.loc[:, ['TugTow', 'Total']]
# Recreational
ais_recreational_df = ais_df.loc[:, ['Recreational', 'Total']]
# Passenger
ais_passenger_df = ais_df.loc[:, ['Passenger', 'Total']]
# Cargo
ais_cargo_df = ais_df.loc[:, ['Cargo', 'Total']]
# Tanker
ais_tanker_df = ais_df.loc[:, ['Date', 'Tanker']]
# Other
ais_other_df = ais_df.loc[:, ['Date', 'Other']]
# Unavailable
ais_unavailable_df = ais_df.loc[:, ['Date', 'Unavailable']]
# Total
ais_total_df = ais_df.loc[:, ['Date', 'Total']]

In [9]:
# From here we will be using 'Total' as an example, and repeating the code for the rest of the data

# Check the new Dataframe
ais_total_df

Unnamed: 0,Date,Total
0,2018-01-01,294
1,2018-01-02,288
2,2018-01-03,284
3,2018-01-04,274
4,2018-01-05,273
...,...,...
360,2018-12-27,298
361,2018-12-28,298
362,2018-12-29,306
363,2018-12-30,308


In [10]:
# Split our data into training, testing, and validation sets using slicing

total_val_ind = int(len(ais_total_df)*0.8)
total_train_ind = int(len(ais_total_df)*0.9)

total_train_df = ais_total_df[:total_val_ind]
total_val_df = ais_total_df[total_val_ind:total_train_ind]
total_test_df = ais_total_df[total_train_ind:]

In [11]:
#Check out the datasets
print(total_train_df)
print(total_val_df)
print(total_test_df)

          Date  Total
0   2018-01-01    294
1   2018-01-02    288
2   2018-01-03    284
3   2018-01-04    274
4   2018-01-05    273
..         ...    ...
287 2018-10-15    397
288 2018-10-16    395
289 2018-10-17    378
290 2018-10-18    378
291 2018-10-19    392

[292 rows x 2 columns]
          Date  Total
292 2018-10-20    437
293 2018-10-21    410
294 2018-10-22    399
295 2018-10-23    375
296 2018-10-24    377
297 2018-10-25    373
298 2018-10-26    387
299 2018-10-27    403
300 2018-10-28    387
301 2018-10-29    369
302 2018-10-30    348
303 2018-10-31    353
304 2018-11-01    361
305 2018-11-02    363
306 2018-11-03    392
307 2018-11-04    379
308 2018-11-05    361
309 2018-11-06    335
310 2018-11-07    337
311 2018-11-08    335
312 2018-11-09    345
313 2018-11-10    357
314 2018-11-11    360
315 2018-11-12    353
316 2018-11-13    342
317 2018-11-14    333
318 2018-11-15    334
319 2018-11-16    334
320 2018-11-17    318
321 2018-11-18    303
322 2018-11-19    320
323 2018