In [42]:
df = pd.read_csv('/content/segmented_trajectories.csv')
df.head()

Unnamed: 0,BaseDateTime,LAT,LON,MMSI,PatternDescriptor,Pattern_High Speed,Pattern_Slow Movement,Pattern_Stationary,SOG,SegmentID
0,2022-03-31 00:00:17,26.11809,-80.14815,1056261,Stationary,,,1.0,0.1,0
1,2022-03-31 00:00:27,26.11809,-80.148148,1056261,Stationary,,,1.0,0.083607,0
2,2022-03-31 00:00:37,26.11809,-80.148147,1056261,Stationary,,,1.0,0.067213,0
3,2022-03-31 00:00:47,26.11809,-80.148145,1056261,Stationary,,,1.0,0.05082,0
4,2022-03-31 00:00:57,26.11809,-80.148143,1056261,Stationary,,,1.0,0.034426,0


In [45]:
df['Pattern_High Speed'].unique()

array([nan,  1.])

In [46]:
df[['Pattern_High Speed',	'Pattern_Slow Movement']] = df[['Pattern_High Speed',	'Pattern_Slow Movement']].fillna(0)

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm  # Correct import of tqdm

from sklearn.model_selection import KFold

def k_fold_sequence_to_sequence(df, window_size, prediction_horizon, k=5, resample_interval=10):
    """
    Apply K-Fold Cross-Validation for sequence-to-sequence data preparation.

    Parameters:
    - df: Pandas DataFrame containing AIS data with columns 'BaseDateTime', 'LAT', 'LON', 'SOG', and one-hot encoded pattern descriptors.
    - window_size: The number of data points in each input sequence (length of observed states X_k,).
    - prediction_horizon: The number of time steps to predict (length of target sequence Y_k,h).
    - k: Number of folds for cross-validation.
    - resample_interval: Resampling interval for the AIS data, in seconds (assumed to be already applied in previous steps).

    Returns:
    - folds_train: List of training data for each fold (X_train, Y_train).
    - folds_val: List of validation data for each fold (X_val, Y_val).
    - journey_descriptors: List of journey descriptors for each sequence.
    """
    folds_train = []
    folds_val = []
    journey_descriptors = []

    # Step 1: Prepare the K-Fold split
    kf = KFold(n_splits=k, shuffle=True, random_state=42)

    # Group by MMSI
    grouped = df.groupby('MMSI')

    # Store all sequences in a list (we will split this into k-folds)
    all_sequences_X = []
    all_sequences_Y = []
    all_journey_descriptors = []

    # Process each MMSI group
    for mmsi, group in tqdm(grouped, desc="Processing MMSI", unit="MMSI"):  # Corrected tqdm usage
        group = group.sort_values(by='BaseDateTime')
        group['BaseDateTime'] = pd.to_datetime(group['BaseDateTime'])

        # Check if there's enough data for the sequence-to-sequence model
        if len(group) < window_size + prediction_horizon:
            continue  # Skip this MMSI if it doesn't have enough data

        # Step 2: One-hot encode the pattern descriptors
        one_hot_columns = ['Pattern_Stationary', 'Pattern_Slow Movement', 'Pattern_High Speed']

        # Step 3: Create sliding windows for sequences
        for start in range(0, len(group) - window_size - prediction_horizon + 1):
            # X_k, input sequence: past `window_size` time steps
            X_seq = group.iloc[start:start + window_size][['LAT', 'LON', 'SOG'] + one_hot_columns].values

            # Y_k,h output sequence: next `prediction_horizon` time steps
            Y_seq = group.iloc[start + window_size:start + window_size + prediction_horizon][['LAT', 'LON', 'SOG']].values

            # Append the sequences and the journey descriptor
            all_sequences_X.append(X_seq)
            all_sequences_Y.append(Y_seq)
            all_journey_descriptors.append(group['PatternDescriptor'].iloc[start + window_size])

    # Convert sequences into numpy arrays for model training
    all_sequences_X = np.array(all_sequences_X)
    all_sequences_Y = np.array(all_sequences_Y)
    all_journey_descriptors = np.array(all_journey_descriptors)

    # Step 4: Perform K-Fold Cross Validation
    for train_index, val_index in kf.split(all_sequences_X):
        # Split into training and validation sets for each fold
        X_train, X_val = all_sequences_X[train_index], all_sequences_X[val_index]
        Y_train, Y_val = all_sequences_Y[train_index], all_sequences_Y[val_index]

        # Store training and validation sets for each fold
        folds_train.append((X_train, Y_train))
        folds_val.append((X_val, Y_val))

        # Store journey descriptors for each fold
        journey_descriptors.append(all_journey_descriptors[val_index])

    return folds_train, folds_val, journey_descriptors


# Define the window size, prediction horizon, and the number of folds
window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
k = 5  # Number of folds

# Load the data from the CSV file (already preprocessed)
file_path = '/content/segmented_trajectories.csv'
ais_data = pd.read_csv(file_path)

# Apply K-Fold Cross-Validation to prepare data
folds_train, folds_val, journey_descriptors = k_fold_sequence_to_sequence(
    ais_data, window_size, prediction_horizon, k=k
)

# Print out the shapes of the sequences for the first fold
X_train, Y_train = folds_train[0]
X_val, Y_val = folds_val[0]

print(f"Training Input Shape: {X_train.shape}")
print(f"Training Output Shape: {Y_train.shape}")
print(f"Validation Input Shape: {X_val.shape}")
print(f"Validation Output Shape: {Y_val.shape}")


Processing MMSI: 100%|██████████| 1482/1482 [00:06<00:00, 216.54MMSI/s]


Training Input Shape: (4537, 10, 6)
Training Output Shape: (4537, 5, 3)
Validation Input Shape: (1135, 10, 6)
Validation Output Shape: (1135, 5, 3)


In [47]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

def k_fold_sequence_to_sequence(df, window_size, prediction_horizon, k=5):
    """
    Apply K-Fold Cross-Validation for sequence-to-sequence data preparation with scaling.

    Parameters:
    - df: Pandas DataFrame containing AIS data.
    - window_size: Number of time steps in each input sequence.
    - prediction_horizon: Number of time steps to predict.
    - k: Number of folds for cross-validation.

    Returns:
    - folds_train: List of training data for each fold (X_train, Y_train).
    - folds_val: List of validation data for each fold (X_val, Y_val).
    - journey_descriptors: List of journey descriptors for validation sequences.
    """
    folds_train = []
    folds_val = []
    journey_descriptors = []

    # Group by MMSI
    grouped = df.groupby('MMSI')

    # Store all sequences
    all_sequences_X = []
    all_sequences_Y = []
    all_journey_descriptors = []

    # Define features for scaling
    continuous_features = ['LAT', 'LON', 'SOG']

    # Initialize scaler
    scaler = StandardScaler()

    # Fit the scaler on the entire dataset
    df_continuous = df[continuous_features]
    scaler.fit(df_continuous)

    # Process each MMSI group
    for mmsi, group in tqdm(grouped, desc="Processing MMSI", unit="MMSI"):
        group = group.sort_values(by='BaseDateTime')
        group['BaseDateTime'] = pd.to_datetime(group['BaseDateTime'])

        # Scale continuous features
        group[continuous_features] = scaler.transform(group[continuous_features])

        # Check if the group has enough data for the sequence-to-sequence model
        if len(group) < window_size + prediction_horizon:
            continue

        # Generate sliding windows
        for start in range(0, len(group) - window_size - prediction_horizon + 1):
            # Input sequence
            X_seq = group.iloc[start:start + window_size][continuous_features].values

            # Output sequence
            Y_seq = group.iloc[start + window_size:start + window_size + prediction_horizon][continuous_features].values

            # Append to sequences
            all_sequences_X.append(X_seq)
            all_sequences_Y.append(Y_seq)
            all_journey_descriptors.append(group['PatternDescriptor'].iloc[start + window_size])

    # Convert sequences into numpy arrays
    all_sequences_X = np.array(all_sequences_X)
    all_sequences_Y = np.array(all_sequences_Y)
    all_journey_descriptors = np.array(all_journey_descriptors)

    # Apply K-Fold Cross Validation
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    for train_index, val_index in kf.split(all_sequences_X):
        X_train, X_val = all_sequences_X[train_index], all_sequences_X[val_index]
        Y_train, Y_val = all_sequences_Y[train_index], all_sequences_Y[val_index]
        folds_train.append((X_train, Y_train))
        folds_val.append((X_val, Y_val))
        journey_descriptors.append(all_journey_descriptors[val_index])

    return folds_train, folds_val, journey_descriptors


# Define parameters
window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
k = 5  # Number of folds

# Load the data
file_path = '/content/segmented_trajectories.csv'
ais_data = pd.read_csv(file_path)
ais_data[['Pattern_High Speed',	'Pattern_Slow Movement']] = ais_data[['Pattern_High Speed',	'Pattern_Slow Movement']].fillna(0)

# Ensure proper datetime formatting
ais_data['BaseDateTime'] = pd.to_datetime(ais_data['BaseDateTime'])

# Check for and handle null values
if ais_data.isnull().any().any():
    print("Null values detected. Dropping rows with null values...")
    ais_data = ais_data.dropna()
    print(f"Remaining rows after dropping nulls: {len(ais_data)}")

# Apply K-Fold Cross-Validation with scaling
folds_train, folds_val, journey_descriptors = k_fold_sequence_to_sequence(
    ais_data, window_size, prediction_horizon, k=k
)

# Output the shapes of the sequences for the first fold
X_train, Y_train = folds_train[0]
X_val, Y_val = folds_val[0]

print(f"Training Input Shape: {X_train.shape}")
print(f"Training Output Shape: {Y_train.shape}")
print(f"Validation Input Shape: {X_val.shape}")
print(f"Validation Output Shape: {Y_val.shape}")


# Check for NaN or infinite values in training and validation data
print(f"NaN in X_train: {np.isnan(X_train).any()}")
print(f"NaN in Y_train: {np.isnan(Y_train).any()}")
print(f"NaN in X_val: {np.isnan(X_val).any()}")
print(f"NaN in Y_val: {np.isnan(Y_val).any()}")

print(f"Infinite in X_train: {np.isinf(X_train).any()}")
print(f"Infinite in Y_train: {np.isinf(Y_train).any()}")
print(f"Infinite in X_val: {np.isinf(X_val).any()}")
print(f"Infinite in Y_val: {np.isinf(Y_val).any()}")


Null values detected. Dropping rows with null values...
Remaining rows after dropping nulls: 16035


Processing MMSI: 100%|██████████| 982/982 [00:06<00:00, 151.56MMSI/s]

Training Input Shape: (3118, 10, 3)
Training Output Shape: (3118, 5, 3)
Validation Input Shape: (780, 10, 3)
Validation Output Shape: (780, 5, 3)
NaN in X_train: False
NaN in Y_train: False
NaN in X_val: False
NaN in Y_val: False
Infinite in X_train: False
Infinite in Y_train: False
Infinite in X_val: False
Infinite in Y_val: False





In [52]:
import tensorflow as tf
import numpy as np
# Define the Seq2Seq model with Attention
class Seq2SeqWithAttention(tf.keras.Model):
    def __init__(self, input_dim, output_dim, latent_dim, num_layers, timesteps_input, timesteps_output):
        super(Seq2SeqWithAttention, self).__init__()
        self.timesteps_input = timesteps_input
        self.timesteps_output = timesteps_output

        self.encoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)
        self.decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)

        self.attention_dense = tf.keras.layers.Dense(1)
        self.attention_softmax = tf.keras.layers.Softmax(axis=1)

        self.output_dense = tf.keras.layers.Dense(output_dim)

    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs = inputs

        # Encoder
        encoder_outputs, state_h, state_c = self.encoder_lstm(encoder_inputs)  # (batch_size, timesteps_input, latent_dim)

        # Decoder
        decoder_outputs, _, _ = self.decoder_lstm(decoder_inputs, initial_state=[state_h, state_c])  # (batch_size, timesteps_decoder, latent_dim)

        # Attention mechanism
        attention_weights = self.attention_score(encoder_outputs, decoder_outputs)  # (batch_size, timesteps_decoder, timesteps_input)

        # Compute context vector
        attention_weights_expanded = tf.expand_dims(attention_weights, axis=-1)  # (batch_size, timesteps_decoder, timesteps_input, 1)
        encoder_outputs_expanded = tf.expand_dims(encoder_outputs, axis=1)  # (batch_size, 1, timesteps_input, latent_dim)
        context_vector = tf.reduce_sum(encoder_outputs_expanded * attention_weights_expanded, axis=2)  # (batch_size, timesteps_decoder, latent_dim)

        # Combine context vector and decoder outputs
        decoder_combined_context = tf.concat([context_vector, decoder_outputs], axis=-1)  # (batch_size, timesteps_decoder, 2 * latent_dim)

        # Output layer
        outputs = self.output_dense(decoder_combined_context)  # (batch_size, timesteps_decoder, output_dim)

        return outputs


    def attention_score(self, encoder_outputs, decoder_outputs):
        timesteps_decoder = tf.shape(decoder_outputs)[1]

        # Tile decoder outputs to match the encoder sequence length
        decoder_expanded = tf.expand_dims(decoder_outputs, axis=2)  # (batch_size, timesteps_decoder, 1, latent_dim)
        decoder_tiled = tf.tile(decoder_expanded, [1, 1, self.timesteps_input, 1])  # (batch_size, timesteps_decoder, timesteps_input, latent_dim)

        # Expand encoder outputs to match decoder timesteps
        encoder_expanded = tf.expand_dims(encoder_outputs, axis=1)  # (batch_size, 1, timesteps_input, latent_dim)
        encoder_tiled = tf.tile(encoder_expanded, [1, timesteps_decoder, 1, 1])  # (batch_size, timesteps_decoder, timesteps_input, latent_dim)

        # Concatenate encoder and decoder outputs
        concat = tf.concat([encoder_tiled, decoder_tiled], axis=-1)  # (batch_size, timesteps_decoder, timesteps_input, 2 * latent_dim)

        # Compute attention scores
        attention_scores = self.attention_dense(concat)  # (batch_size, timesteps_decoder, timesteps_input, 1)
        attention_scores = tf.squeeze(attention_scores, axis=-1)  # (batch_size, timesteps_decoder, timesteps_input)

        # Apply softmax to calculate attention weights
        attention_weights = self.attention_softmax(attention_scores)  # (batch_size, timesteps_decoder, timesteps_input)

        return attention_weights



# Parameters
input_dim = X_train.shape[2]  # Number of features in the input sequence (e.g., 6)
output_dim = Y_train.shape[2]  # Number of features in the output sequence (e.g., 3)
latent_dim = 64  # Latent dimension for LSTM layers
num_layers = 2  # Number of LSTM layers
timesteps_input = X_train.shape[1]  # Window size
timesteps_output = Y_train.shape[1]  # Prediction horizon

# Initialize the model
model = Seq2SeqWithAttention(
    input_dim=input_dim,
    output_dim=output_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    timesteps_input=timesteps_input,
    timesteps_output=timesteps_output
)

# Define the custom R² metric
def r2_score(y_true, y_pred):
    # Calculate the residual sum of squares
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=None)
    # Calculate the total sum of squares
    y_true_mean = tf.reduce_mean(y_true, axis=None)
    ss_tot = tf.reduce_sum(tf.square(y_true - y_true_mean), axis=None)
    # Calculate R² score
    r2 = 1 - (ss_res / (ss_tot + tf.keras.backend.epsilon()))
    return r2


# Compile the model with the R² metric
model.compile(optimizer='adam', loss='mse', metrics=[r2_score])

# Train the model
history = model.fit(
    [X_train, Y_train[:, :-1, :]],  # Encoder input: X_train, Decoder input: Y_train[:, :-1, :]
    Y_train[:, 1:, :],  # Actual target sequence for training
    epochs=15,
    batch_size=32,
    validation_data=([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :]),  # For validation, use the same format
    verbose=1
)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [53]:
# Evaluate the model and capture loss and metrics
test_results = model.evaluate([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :], verbose=1)
test_loss, test_r2 = test_results

# Print test loss and R² score
print(f"Test Loss: {test_loss}")
print(f"Test R² Score (Accuracy): {test_r2}")

Test Loss: 0.0009903283789753914
Test R² Score (Accuracy): 0.9987214803695679


In [54]:
import tensorflow as tf
import numpy as np

# Define the Seq2Seq model with Attention
class Seq2SeqWithAttention(tf.keras.Model):
    def __init__(self, input_dim, output_dim, latent_dim, num_layers, timesteps_input, timesteps_output):
        super(Seq2SeqWithAttention, self).__init__()
        self.timesteps_input = timesteps_input
        self.timesteps_output = timesteps_output

        self.encoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)
        self.decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)

        self.attention_dense = tf.keras.layers.Dense(1)
        self.attention_softmax = tf.keras.layers.Softmax(axis=1)

        self.output_dense = tf.keras.layers.Dense(output_dim)

    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs = inputs

        # Encoder
        encoder_outputs, state_h, state_c = self.encoder_lstm(encoder_inputs)

        # Decoder
        decoder_outputs, _, _ = self.decoder_lstm(decoder_inputs, initial_state=[state_h, state_c])

        # Attention mechanism
        attention_weights = self.attention_score(encoder_outputs, decoder_outputs)

        # Compute context vector
        attention_weights_expanded = tf.expand_dims(attention_weights, axis=-1)
        encoder_outputs_expanded = tf.expand_dims(encoder_outputs, axis=1)
        context_vector = tf.reduce_sum(encoder_outputs_expanded * attention_weights_expanded, axis=2)

        # Combine context vector and decoder outputs
        decoder_combined_context = tf.concat([context_vector, decoder_outputs], axis=-1)

        # Output layer
        outputs = self.output_dense(decoder_combined_context)

        return outputs

    def attention_score(self, encoder_outputs, decoder_outputs):
        timesteps_decoder = tf.shape(decoder_outputs)[1]

        # Tile decoder outputs to match the encoder sequence length
        decoder_expanded = tf.expand_dims(decoder_outputs, axis=2)
        decoder_tiled = tf.tile(decoder_expanded, [1, 1, self.timesteps_input, 1])

        # Expand encoder outputs to match decoder timesteps
        encoder_expanded = tf.expand_dims(encoder_outputs, axis=1)
        encoder_tiled = tf.tile(encoder_expanded, [1, timesteps_decoder, 1, 1])

        # Concatenate encoder and decoder outputs
        concat = tf.concat([encoder_tiled, decoder_tiled], axis=-1)

        # Compute attention scores
        attention_scores = self.attention_dense(concat)
        attention_scores = tf.squeeze(attention_scores, axis=-1)

        # Apply softmax to calculate attention weights
        attention_weights = self.attention_softmax(attention_scores)

        return attention_weights

    def get_config(self):
        return {
            "input_dim": self.output_dense.units,
            "output_dim": self.timesteps_output,
            "latent_dim": self.encoder_lstm.units,
            "num_layers": 1,  # Modify as needed
            "timesteps_input": self.timesteps_input,
            "timesteps_output": self.timesteps_output,
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)


# Define the custom R² metric
def r2_score(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=None)
    y_true_mean = tf.reduce_mean(y_true, axis=None)
    ss_tot = tf.reduce_sum(tf.square(y_true - y_true_mean), axis=None)
    r2 = 1 - (ss_res / (ss_tot + tf.keras.backend.epsilon()))
    return r2


In [58]:

# Parameters
input_dim = X_train.shape[2]

# Ensure output_dim matches the target data
output_dim = Y_train.shape[2]

# Initialize the model
model = Seq2SeqWithAttention(
    input_dim=X_train.shape[2],
    output_dim=output_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    timesteps_input=X_train.shape[1],
    timesteps_output=Y_train.shape[1]
)

# Compile and train the model
model.compile(optimizer='adam', loss='mse', metrics=[r2_score])
model.fit([X_train, Y_train[:, :-1, :]], Y_train[:, 1:, :], epochs=15, batch_size=32,
          validation_data=([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :]), verbose=1)

# Save the model
model.save("./seq2seq_with_attention_model", save_format="tf")



Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [59]:
# Ensure to use consistent output_dim during reinitialization
loaded_model = tf.keras.models.load_model(
    "./seq2seq_with_attention_model",
    custom_objects={"r2_score": r2_score}
)

# Evaluate the model
test_results = loaded_model.evaluate([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :], verbose=1)

# Print results
print(f"Test Loss: {test_results[0]}")
print(f"Test R² Score: {test_results[1]}")


Test Loss: 0.0012528256047517061
Test R² Score: 0.9983580112457275
