## GRU - With conventional data representation approach *lat, lon

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import GRU, Dense
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint

import tensorflow as tf
from tensorflow.keras.models import load_model


In [None]:
to_store_path = r'D:\2 Thesis\2 models\2023.07.02 all models\GRU_02\prediction csv\v1_gru_pred_output.csv'

In [None]:
X_train = np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\1_10_train\data_2019_train_X_v8_out.npy').astype('float32')
Y_train_lat =np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\1_10_train\data_2019_train_Y_lat_v8_out.npy').astype('float32')
Y_train_lon =np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\1_10_train\data_2019_train_Y_lon_v8_out.npy').astype('float32')

X_val = np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\11_val\data_2019_11_X.npy').astype('float32')
Y_val_lat = np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\11_val\data_2019_11_Y_lat.npy').astype('float32')
Y_val_lon = np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\11_val\data_2019_11_Y_lon.npy').astype('float32')

X_test = np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\12_test\data_2019_12_X.npy').astype('float32')
Y_test_lat = np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\12_test\data_2019_12_Y_lat.npy').astype('float32')
Y_test_lon = np.load(r'D:\2 Thesis\1 train datasets\1 train geocoordinate\12_test\data_2019_12_Y_lon.npy').astype('float32')

In [None]:
## Checkpoint save - lat
checkpoint_filepath_lat = r'D:\2 Thesis\2 models\2023.07.02 all models\GRU_02\best model normal ds\gru_lat_best_model_v1.h5'

## Checkpoint save - lon
checkpoint_filepath_lon = r'D:\2 Thesis\2 models\2023.07.02 all models\GRU_02\best model normal ds\gru_lon_best_model_v1.h5'

In [None]:
from tensorflow.keras.utils import Sequence
import numpy as np   

class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y


In [None]:
train_gen_lat = DataGenerator(X_train, Y_train_lat, 200)
val_gen_lat = DataGenerator(X_val, Y_val_lat, 200)

train_gen_lon = DataGenerator(X_train, Y_train_lon, 200)
val_gen_lon = DataGenerator(X_val, Y_val_lon, 200)

In [None]:
## the GRU model
def gru_model(n_steps_input, n_steps_output, n_features):
    model = Sequential()
    model.add(GRU(20, activation="tanh", input_shape=(n_steps_input, n_features), return_sequences=True))
    model.add(GRU(40, activation="tanh"))
    model.add(Dense(n_steps_output))  
    return model



In [None]:
model_summary = gru_model(10,10,8).summary()

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
model_lat = gru_model(n_steps_input = 10, n_steps_output = 10  , n_features = 8 )
# Compile the model
model_lat.compile(optimizer= Adam(learning_rate = 0.0001), loss = 'mean_squared_error', metrics=['mae'] )
checkpoint_lat = ModelCheckpoint(checkpoint_filepath_lat, monitor='val_loss', save_best_only=True, mode='min')
# Train the model
history_lat = model_lat.fit(train_gen_lat, validation_data = val_gen_lat, epochs=10, verbose=1, callbacks = [checkpoint_lat])


In [None]:
# Plot the training and validation loss
plt.plot(history_lat.history['loss'], label='Training loss')
plt.plot(history_lat.history['val_loss'], label='Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot the training and validation error
plt.plot(history_lat.history['mae'], label='Training error')
plt.plot(history_lat.history['val_mae'], label='Validation error')
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.legend()
plt.show()

In [None]:
## Loading best latitude model
best_model_lat = load_model(checkpoint_filepath_lat)
# Evaluate the model
Y_test_pred_lat = best_model_lat.predict(X_test)

In [None]:
Y_test_lat = Y_test_lat.squeeze()

In [None]:
mse_lat = mean_squared_error(Y_test_lat, Y_test_pred_lat)
print("Mean Squared Error for lat predictions:", mse_lat)

### LONGITUDE PREDICTIONS - GRU

In [None]:
model_lon = gru_model(n_steps_input = 10, n_steps_output = 10  , n_features = 8 )

model_lon.compile(optimizer = Adam(learning_rate = 0.0001), loss = 'mean_squared_error', metrics=['mae'])
checkpoint_lon = ModelCheckpoint(checkpoint_filepath_lon, monitor='val_loss', save_best_only=True, mode='min')

history_lon = model_lon.fit(train_gen_lon, validation_data = val_gen_lon, epochs=10, verbose=1 , callbacks = [checkpoint_lon])

In [None]:
# Plot the training and validation loss
plt.plot(history_lon.history['loss'], label='Training loss')
plt.plot(history_lon.history['val_loss'], label='Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot the training and validation error
plt.plot(history_lon.history['mae'], label='Training error')
plt.plot(history_lon.history['val_mae'], label='Validation error')
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.legend()
plt.show()

In [None]:
## Loading best longitude model
best_model_lon = load_model(checkpoint_filepath_lon)
# Evaluate the model
Y_test_pred_lon = best_model_lon.predict(X_test)

In [None]:
# Equalize shapes
Y_test_lon = Y_test_lon.squeeze()

In [None]:
mse_lon = mean_squared_error(Y_test_lon, Y_test_pred_lon)
print("Mean Squared Error Y_test_pred_lon:", mse_lon)

In [None]:
Y_pred_lat_flat = Y_test_pred_lat.flatten()
Y_pred_lat_df = pd.DataFrame(Y_pred_lat_flat, columns=['lat_s_pred'])

Y_pred_lon_flat = Y_test_pred_lon.flatten()
Y_pred_lon_df = pd.DataFrame(Y_pred_lon_flat, columns=['lon_s_pred'])

Y_pred_df = pd.concat([Y_pred_lat_df, Y_pred_lon_df], axis=1)

In [None]:
Y_test_lat_flat = Y_test_lat.flatten()
Y_test_lat_df = pd.DataFrame(Y_test_lat_flat, columns=['lat_s_test'])

Y_test_lon_flat = Y_test_lon.flatten()
Y_test_lon_df = pd.DataFrame(Y_test_lon_flat, columns=['lon_s_test'])

Y_test_df = pd.concat([Y_test_lat_df, Y_test_lon_df], axis=1)

In [None]:
# converting back to lat long degrees

min_lat, max_lat = 58.6, 59.93
min_lon, max_lon = 9.4, 11.45

Y_pred_df['lat_pred'] = Y_pred_df['lat_s_pred'] * (max_lat - min_lat) + min_lat
Y_pred_df['lon_pred'] = Y_pred_df['lon_s_pred'] * (max_lon - min_lon) + min_lon

Y_test_df['lat_test'] = Y_test_df['lat_s_test'] * (max_lat - min_lat) + min_lat
Y_test_df['lon_test'] = Y_test_df['lon_s_test'] * (max_lon - min_lon) + min_lon


In [None]:
Y_test_and_pred_df = pd.concat([Y_pred_df, Y_test_df], axis=1)

In [None]:
from haversine import haversine, Unit

Y_test_and_pred_df['deviation_dist_m'] = Y_test_and_pred_df.apply(lambda row: haversine(
    (row['lat_pred'], 
    row['lon_pred']),
    (row['lat_test'], 
    row['lon_test']),
    unit=Unit.METERS
    ),axis=1)

In [None]:
mean_deviation = Y_test_and_pred_df['deviation_dist_m'].mean()
median_deviation = Y_test_and_pred_df['deviation_dist_m'].median()

print('GRU_model mean_displacement_error:', mean_deviation)
print('GRU_model median_displacement_error:', median_deviation)

In [None]:
step_means = []
for step in range(10):
    step_mean = Y_test_and_pred_df['deviation_dist_m'][step::9].mean()
    step_means.append(step_mean)


In [None]:
import matplotlib.pyplot as plt

step_means = []
for step in range(10):
    step_mean = Y_test_and_pred_df['deviation_dist_m'][step::9].mean()
    step_means.append(step_mean)

plt.plot(range(1, 11), step_means)
plt.xlabel('Step')
plt.ylabel('Mean Error (m)')
plt.title('Mean Error for each prediction step')
# plt.ylim(0, 200)
plt.show()


In [None]:
import matplotlib.pyplot as plt

step_medians = []
for step in range(10):
    step_median = Y_test_and_pred_df['deviation_dist_m'][step::9].median()
    step_medians.append(step_median)

plt.plot(range(1, 11), step_medians)
plt.xlabel('Step')
plt.ylabel('Median Error (m)')
plt.title('Median Error for each prediction step')
# plt.ylim(0,125)
plt.show()


In [None]:

step_FDE = Y_test_and_pred_df['deviation_dist_m'][9::9].mean()
print('Final displacement error (meters) is:', step_FDE)

In [None]:
Y_test_and_pred_df['voyage_id'] = Y_test_and_pred_df.index // 10
Y_test_and_pred_df['step'] = np.arange(len(Y_test_and_pred_df)) % 10


In [None]:
Y_test_and_pred_df.to_csv(to_store_path)
