In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import h5py
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Upload and read the csv files
ais_2018 = pd.read_csv("ml_data_daily_2018.csv", index_col=0)

In [2]:
# Look at our data from 2018
ais_2018.head()

Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable
1/1/2018,9,37,145,44,27,13,12,7
1/2/2018,13,45,119,41,27,14,19,10
1/3/2018,13,44,106,40,36,14,20,11
1/4/2018,15,44,103,45,30,10,15,12
1/5/2018,10,45,107,41,26,13,21,10


In [3]:
# Check the info of the 2018 dataframe
ais_2018.info()

<class 'pandas.core.frame.DataFrame'>
Index: 365 entries, 1/1/2018 to 12/31/2018
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype
---  ------        --------------  -----
 0   Fishing       365 non-null    int64
 1   TugTow        365 non-null    int64
 2   Recreational  365 non-null    int64
 3   Passenger     365 non-null    int64
 4   Cargo         365 non-null    int64
 5   Tanker        365 non-null    int64
 6   Other         365 non-null    int64
 7   Unavailable   365 non-null    int64
dtypes: int64(8)
memory usage: 25.7+ KB


In [4]:
# Creating a total column for our 2018 data
ais_2018['Total']= ais_2018.sum(axis=1)
ais_2018.head()

Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable,Total
1/1/2018,9,37,145,44,27,13,12,7,294
1/2/2018,13,45,119,41,27,14,19,10,288
1/3/2018,13,44,106,40,36,14,20,11,284
1/4/2018,15,44,103,45,30,10,15,12,274
1/5/2018,10,45,107,41,26,13,21,10,273


In [5]:
# Create a DataFrame with just the Index and Total column
ais_2018_y = ais_2018.drop(["Fishing","TugTow","Recreational","Passenger","Cargo","Tanker","Other","Unavailable"], axis=1)
ais_2018_y.head()

Unnamed: 0,Total
1/1/2018,294
1/2/2018,288
1/3/2018,284
1/4/2018,274
1/5/2018,273


In [6]:
#ais_2018_y["5days"] = ais_2018_y["Total"].rolling(5).mean()
#ais_2018_y["50days"] = ais_2018_y["Total"].rolling(50).mean()
# 

In [7]:
#ais_2018_y.dropna().plot()

In [8]:
# technical indecator 
# facebook profit
# finta
# RNN
# LSTM


In [9]:
# Creating a Dataframe with the Index and the rest of the columns
ais_2018_X = ais_2018.drop(["Total"], axis=1)
ais_2018_X.head()

Unnamed: 0,Fishing,TugTow,Recreational,Passenger,Cargo,Tanker,Other,Unavailable
1/1/2018,9,37,145,44,27,13,12,7
1/2/2018,13,45,119,41,27,14,19,10
1/3/2018,13,44,106,40,36,14,20,11
1/4/2018,15,44,103,45,30,10,15,12
1/5/2018,10,45,107,41,26,13,21,10


In [10]:
# Split our preprocessed data into our features and target arrays
y = ais_2018_y
X = ais_2018_X

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=24)

In [11]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

In [13]:
# Create a callback that saves the model's weights every fifth epoch
cp_callback = ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_weights_only=True, save_freq=5)

In [29]:
hidden_nodes_layer1

8

In [42]:
X_train.shape[1]

8

In [57]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = X_train.shape[1]
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 6
hidden_nodes_layer3 = 4
hidden_nodes_layer4 = 2

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="softmax"))

# Check the structure of the model
nn.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_35 (Dense)            (None, 8)                 72        
                                                                 
 dense_36 (Dense)            (None, 6)                 54        
                                                                 
 dense_37 (Dense)            (None, 4)                 28        
                                                                 
 dense_38 (Dense)            (None, 2)                 10        
                                                                 
 dense_39 (Dense)            (None, 1)                 3         
                                                                 
Total params: 167
Trainable params: 167
Non-trainable params: 0
_________________________________________________________________


In [58]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [59]:
tf.keras.losses.Loss

keras.losses.Loss

In [60]:
print(len(X_train_scaled))

273


In [61]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train,epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [36]:
X_train_scaled.shape

(273, 8)

In [37]:
y_train.shape

(273, 1)

In [None]:
# # Saving this for ater
# # Generate our categorical variable lists
# ais_2018_cat = ais_2018.dtypes[ais_2018.dtypes == "int64"].index.tolist()

In [None]:
# ## Saving this for later

# # Create a OneHotEncoder instance
# enc = OneHotEncoder(sparse=False)

# # Fit and transform the OneHotEncoder using the categorical variable list
# encode_df = pd.DataFrame(enc.fit_transform(ais_2018[ais_2018_cat]))

# # Add the encoded variable names to the dataframe
# encode_df.columns = enc.get_feature_names(ais_2018_cat)
# encode_df.head()