In [None]:
import pandas as pd
df = pd.read_csv('/content/segmented_trajectories.csv')
df.head()

Unnamed: 0,BaseDateTime,LAT,LON,MMSI,PatternDescriptor,Pattern_High Speed,Pattern_Slow Movement,Pattern_Stationary,SOG,SegmentID
0,2022-03-31 00:00:17,26.11809,-80.14815,1056261,Stationary,,,1.0,0.1,0
1,2022-03-31 00:00:27,26.11809,-80.148148,1056261,Stationary,,,1.0,0.083607,0
2,2022-03-31 00:00:37,26.11809,-80.148147,1056261,Stationary,,,1.0,0.067213,0
3,2022-03-31 00:00:47,26.11809,-80.148145,1056261,Stationary,,,1.0,0.05082,0
4,2022-03-31 00:00:57,26.11809,-80.148143,1056261,Stationary,,,1.0,0.034426,0


In [None]:
df['Pattern_High Speed'].unique()

array([nan,  1.])

In [None]:
df[['Pattern_High Speed',	'Pattern_Slow Movement']] = df[['Pattern_High Speed',	'Pattern_Slow Movement']].fillna(0)

In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm  # Correct import of tqdm

from sklearn.model_selection import KFold

def k_fold_sequence_to_sequence(df, window_size, prediction_horizon, k=5, resample_interval=10):
    """
    Apply K-Fold Cross-Validation for sequence-to-sequence data preparation.

    Parameters:
    - df: Pandas DataFrame containing AIS data with columns 'BaseDateTime', 'LAT', 'LON', 'SOG', and one-hot encoded pattern descriptors.
    - window_size: The number of data points in each input sequence (length of observed states X_k,).
    - prediction_horizon: The number of time steps to predict (length of target sequence Y_k,h).
    - k: Number of folds for cross-validation.
    - resample_interval: Resampling interval for the AIS data, in seconds (assumed to be already applied in previous steps).

    Returns:
    - folds_train: List of training data for each fold (X_train, Y_train).
    - folds_val: List of validation data for each fold (X_val, Y_val).
    - journey_descriptors: List of journey descriptors for each sequence.
    """
    folds_train = []
    folds_val = []
    journey_descriptors = []

    # Step 1: Prepare the K-Fold split
    kf = KFold(n_splits=k, shuffle=True, random_state=42)

    # Group by MMSI
    grouped = df.groupby('MMSI')

    # Store all sequences in a list (we will split this into k-folds)
    all_sequences_X = []
    all_sequences_Y = []
    all_journey_descriptors = []

    # Process each MMSI group
    for mmsi, group in tqdm(grouped, desc="Processing MMSI", unit="MMSI"):  # Corrected tqdm usage
        group = group.sort_values(by='BaseDateTime')
        group['BaseDateTime'] = pd.to_datetime(group['BaseDateTime'])

        # Check if there's enough data for the sequence-to-sequence model
        if len(group) < window_size + prediction_horizon:
            continue  # Skip this MMSI if it doesn't have enough data

        # Step 2: One-hot encode the pattern descriptors
        one_hot_columns = ['Pattern_Stationary', 'Pattern_Slow Movement', 'Pattern_High Speed']

        # Step 3: Create sliding windows for sequences
        for start in range(0, len(group) - window_size - prediction_horizon + 1):
            # X_k, input sequence: past `window_size` time steps
            X_seq = group.iloc[start:start + window_size][['LAT', 'LON', 'SOG'] + one_hot_columns].values

            # Y_k,h output sequence: next `prediction_horizon` time steps
            Y_seq = group.iloc[start + window_size:start + window_size + prediction_horizon][['LAT', 'LON', 'SOG']].values

            # Append the sequences and the journey descriptor
            all_sequences_X.append(X_seq)
            all_sequences_Y.append(Y_seq)
            all_journey_descriptors.append(group['PatternDescriptor'].iloc[start + window_size])

    # Convert sequences into numpy arrays for model training
    all_sequences_X = np.array(all_sequences_X)
    all_sequences_Y = np.array(all_sequences_Y)
    all_journey_descriptors = np.array(all_journey_descriptors)

    # Step 4: Perform K-Fold Cross Validation
    for train_index, val_index in kf.split(all_sequences_X):
        # Split into training and validation sets for each fold
        X_train, X_val = all_sequences_X[train_index], all_sequences_X[val_index]
        Y_train, Y_val = all_sequences_Y[train_index], all_sequences_Y[val_index]

        # Store training and validation sets for each fold
        folds_train.append((X_train, Y_train))
        folds_val.append((X_val, Y_val))

        # Store journey descriptors for each fold
        journey_descriptors.append(all_journey_descriptors[val_index])

    return folds_train, folds_val, journey_descriptors


# Define the window size, prediction horizon, and the number of folds
window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
k = 5  # Number of folds

# Load the data from the CSV file (already preprocessed)
file_path = '/content/segmented_trajectories.csv'
ais_data = pd.read_csv(file_path)

# Apply K-Fold Cross-Validation to prepare data
folds_train, folds_val, journey_descriptors = k_fold_sequence_to_sequence(
    ais_data, window_size, prediction_horizon, k=k
)

# Print out the shapes of the sequences for the first fold
X_train, Y_train = folds_train[0]
X_val, Y_val = folds_val[0]

print(f"Training Input Shape: {X_train.shape}")
print(f"Training Output Shape: {Y_train.shape}")
print(f"Validation Input Shape: {X_val.shape}")
print(f"Validation Output Shape: {Y_val.shape}")


Processing MMSI: 100%|██████████| 1482/1482 [00:09<00:00, 151.72MMSI/s]


Training Input Shape: (4537, 10, 6)
Training Output Shape: (4537, 5, 3)
Validation Input Shape: (1135, 10, 6)
Validation Output Shape: (1135, 5, 3)


In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler

def k_fold_sequence_to_sequence(df, window_size, prediction_horizon, k=5):
    """
    Apply K-Fold Cross-Validation for sequence-to-sequence data preparation with scaling.

    Parameters:
    - df: Pandas DataFrame containing AIS data.
    - window_size: Number of time steps in each input sequence.
    - prediction_horizon: Number of time steps to predict.
    - k: Number of folds for cross-validation.

    Returns:
    - folds_train: List of training data for each fold (X_train, Y_train).
    - folds_val: List of validation data for each fold (X_val, Y_val).
    - journey_descriptors: List of journey descriptors for validation sequences.
    """
    folds_train = []
    folds_val = []
    journey_descriptors = []

    # Group by MMSI
    grouped = df.groupby('MMSI')

    # Store all sequences
    all_sequences_X = []
    all_sequences_Y = []
    all_journey_descriptors = []

    # Define features for scaling
    continuous_features = ['LAT', 'LON', 'SOG']

    # Initialize scaler
    scaler = StandardScaler()

    # Fit the scaler on the entire dataset
    df_continuous = df[continuous_features]
    scaler.fit(df_continuous)

    # Process each MMSI group
    for mmsi, group in tqdm(grouped, desc="Processing MMSI", unit="MMSI"):
        group = group.sort_values(by='BaseDateTime')
        group['BaseDateTime'] = pd.to_datetime(group['BaseDateTime'])

        # Scale continuous features
        group[continuous_features] = scaler.transform(group[continuous_features])

        # Check if the group has enough data for the sequence-to-sequence model
        if len(group) < window_size + prediction_horizon:
            continue

        # Generate sliding windows
        for start in range(0, len(group) - window_size - prediction_horizon + 1):
            # Input sequence
            X_seq = group.iloc[start:start + window_size][continuous_features].values

            # Output sequence
            Y_seq = group.iloc[start + window_size:start + window_size + prediction_horizon][continuous_features].values

            # Append to sequences
            all_sequences_X.append(X_seq)
            all_sequences_Y.append(Y_seq)
            all_journey_descriptors.append(group['PatternDescriptor'].iloc[start + window_size])

    # Convert sequences into numpy arrays
    all_sequences_X = np.array(all_sequences_X)
    all_sequences_Y = np.array(all_sequences_Y)
    all_journey_descriptors = np.array(all_journey_descriptors)

    # Apply K-Fold Cross Validation
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    for train_index, val_index in kf.split(all_sequences_X):
        X_train, X_val = all_sequences_X[train_index], all_sequences_X[val_index]
        Y_train, Y_val = all_sequences_Y[train_index], all_sequences_Y[val_index]
        folds_train.append((X_train, Y_train))
        folds_val.append((X_val, Y_val))
        journey_descriptors.append(all_journey_descriptors[val_index])

    return folds_train, folds_val, journey_descriptors


# Define parameters
window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
k = 5  # Number of folds

# Load the data
file_path = '/content/segmented_trajectories.csv'
ais_data = pd.read_csv(file_path)
ais_data[['Pattern_High Speed',	'Pattern_Slow Movement']] = ais_data[['Pattern_High Speed',	'Pattern_Slow Movement']].fillna(0)

# Ensure proper datetime formatting
ais_data['BaseDateTime'] = pd.to_datetime(ais_data['BaseDateTime'])

# Check for and handle null values
if ais_data.isnull().any().any():
    print("Null values detected. Dropping rows with null values...")
    ais_data = ais_data.dropna()
    print(f"Remaining rows after dropping nulls: {len(ais_data)}")

# Apply K-Fold Cross-Validation with scaling
folds_train, folds_val, journey_descriptors = k_fold_sequence_to_sequence(
    ais_data, window_size, prediction_horizon, k=k
)

# Output the shapes of the sequences for the first fold
X_train, Y_train = folds_train[0]
X_val, Y_val = folds_val[0]

print(f"Training Input Shape: {X_train.shape}")
print(f"Training Output Shape: {Y_train.shape}")
print(f"Validation Input Shape: {X_val.shape}")
print(f"Validation Output Shape: {Y_val.shape}")


# Check for NaN or infinite values in training and validation data
print(f"NaN in X_train: {np.isnan(X_train).any()}")
print(f"NaN in Y_train: {np.isnan(Y_train).any()}")
print(f"NaN in X_val: {np.isnan(X_val).any()}")
print(f"NaN in Y_val: {np.isnan(Y_val).any()}")

print(f"Infinite in X_train: {np.isinf(X_train).any()}")
print(f"Infinite in Y_train: {np.isinf(Y_train).any()}")
print(f"Infinite in X_val: {np.isinf(X_val).any()}")
print(f"Infinite in Y_val: {np.isinf(Y_val).any()}")


Null values detected. Dropping rows with null values...
Remaining rows after dropping nulls: 16035


Processing MMSI: 100%|██████████| 982/982 [00:09<00:00, 101.55MMSI/s]

Training Input Shape: (3118, 10, 3)
Training Output Shape: (3118, 5, 3)
Validation Input Shape: (780, 10, 3)
Validation Output Shape: (780, 5, 3)
NaN in X_train: False
NaN in Y_train: False
NaN in X_val: False
NaN in Y_val: False
Infinite in X_train: False
Infinite in Y_train: False
Infinite in X_val: False
Infinite in Y_val: False





In [None]:
import tensorflow as tf
import numpy as np
# Define the Seq2Seq model with Attention
class Seq2SeqWithAttention(tf.keras.Model):
    def __init__(self, input_dim, output_dim, latent_dim, num_layers, timesteps_input, timesteps_output):
        super(Seq2SeqWithAttention, self).__init__()
        self.timesteps_input = timesteps_input
        self.timesteps_output = timesteps_output

        self.encoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)
        self.decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)

        self.attention_dense = tf.keras.layers.Dense(1)
        self.attention_softmax = tf.keras.layers.Softmax(axis=1)

        self.output_dense = tf.keras.layers.Dense(output_dim)

    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs = inputs

        # Encoder
        encoder_outputs, state_h, state_c = self.encoder_lstm(encoder_inputs)  # (batch_size, timesteps_input, latent_dim)

        # Decoder
        decoder_outputs, _, _ = self.decoder_lstm(decoder_inputs, initial_state=[state_h, state_c])  # (batch_size, timesteps_decoder, latent_dim)

        # Attention mechanism
        attention_weights = self.attention_score(encoder_outputs, decoder_outputs)  # (batch_size, timesteps_decoder, timesteps_input)

        # Compute context vector
        attention_weights_expanded = tf.expand_dims(attention_weights, axis=-1)  # (batch_size, timesteps_decoder, timesteps_input, 1)
        encoder_outputs_expanded = tf.expand_dims(encoder_outputs, axis=1)  # (batch_size, 1, timesteps_input, latent_dim)
        context_vector = tf.reduce_sum(encoder_outputs_expanded * attention_weights_expanded, axis=2)  # (batch_size, timesteps_decoder, latent_dim)

        # Combine context vector and decoder outputs
        decoder_combined_context = tf.concat([context_vector, decoder_outputs], axis=-1)  # (batch_size, timesteps_decoder, 2 * latent_dim)

        # Output layer
        outputs = self.output_dense(decoder_combined_context)  # (batch_size, timesteps_decoder, output_dim)

        return outputs


    def attention_score(self, encoder_outputs, decoder_outputs):
        timesteps_decoder = tf.shape(decoder_outputs)[1]

        # Tile decoder outputs to match the encoder sequence length
        decoder_expanded = tf.expand_dims(decoder_outputs, axis=2)  # (batch_size, timesteps_decoder, 1, latent_dim)
        decoder_tiled = tf.tile(decoder_expanded, [1, 1, self.timesteps_input, 1])  # (batch_size, timesteps_decoder, timesteps_input, latent_dim)

        # Expand encoder outputs to match decoder timesteps
        encoder_expanded = tf.expand_dims(encoder_outputs, axis=1)  # (batch_size, 1, timesteps_input, latent_dim)
        encoder_tiled = tf.tile(encoder_expanded, [1, timesteps_decoder, 1, 1])  # (batch_size, timesteps_decoder, timesteps_input, latent_dim)

        # Concatenate encoder and decoder outputs
        concat = tf.concat([encoder_tiled, decoder_tiled], axis=-1)  # (batch_size, timesteps_decoder, timesteps_input, 2 * latent_dim)

        # Compute attention scores
        attention_scores = self.attention_dense(concat)  # (batch_size, timesteps_decoder, timesteps_input, 1)
        attention_scores = tf.squeeze(attention_scores, axis=-1)  # (batch_size, timesteps_decoder, timesteps_input)

        # Apply softmax to calculate attention weights
        attention_weights = self.attention_softmax(attention_scores)  # (batch_size, timesteps_decoder, timesteps_input)

        return attention_weights



# Parameters
input_dim = X_train.shape[2]  # Number of features in the input sequence (e.g., 6)
output_dim = Y_train.shape[2]  # Number of features in the output sequence (e.g., 3)
latent_dim = 64  # Latent dimension for LSTM layers
num_layers = 2  # Number of LSTM layers
timesteps_input = X_train.shape[1]  # Window size
timesteps_output = Y_train.shape[1]  # Prediction horizon

# Initialize the model
model = Seq2SeqWithAttention(
    input_dim=input_dim,
    output_dim=output_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    timesteps_input=timesteps_input,
    timesteps_output=timesteps_output
)

# Define the custom R² metric
def r2_score(y_true, y_pred):
    # Calculate the residual sum of squares
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=None)
    # Calculate the total sum of squares
    y_true_mean = tf.reduce_mean(y_true, axis=None)
    ss_tot = tf.reduce_sum(tf.square(y_true - y_true_mean), axis=None)
    # Calculate R² score
    r2 = 1 - (ss_res / (ss_tot + tf.keras.backend.epsilon()))
    return r2


# Compile the model with the R² metric
model.compile(optimizer='adam', loss='mse', metrics=[r2_score])

# Train the model
history = model.fit(
    [X_train, Y_train[:, :-1, :]],  # Encoder input: X_train, Decoder input: Y_train[:, :-1, :]
    Y_train[:, 1:, :],  # Actual target sequence for training
    epochs=15,
    batch_size=32,
    validation_data=([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :]),  # For validation, use the same format
    verbose=1
)


Epoch 1/15
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 37ms/step - loss: 0.2407 - r2_score: 0.6801 - val_loss: 0.0319 - val_r2_score: 0.9643
Epoch 2/15
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0239 - r2_score: 0.9733 - val_loss: 0.0229 - val_r2_score: 0.9753
Epoch 3/15
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 0.0181 - r2_score: 0.9812 - val_loss: 0.0144 - val_r2_score: 0.9853
Epoch 4/15
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 0.0071 - r2_score: 0.9917 - val_loss: 0.0083 - val_r2_score: 0.9915
Epoch 5/15
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 0.0050 - r2_score: 0.9944 - val_loss: 0.0050 - val_r2_score: 0.9941
Epoch 6/15
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 0.0046 - r2_score: 0.9953 - val_loss: 0.0031 - val_r2_score: 0.9961
Epoch 7/15
[1m98/98[0m [32m━━━

In [None]:
# Evaluate the model and capture loss and metrics
test_results = model.evaluate([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :], verbose=1)
test_loss, test_r2 = test_results

# Print test loss and R² score
print(f"Test Loss: {test_loss}")
print(f"Test R² Score (Accuracy): {test_r2}")

[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 8.5378e-04 - r2_score: 0.9989
Test Loss: 0.0009665137040428817
Test R² Score (Accuracy): 0.9986791014671326


In [None]:
import tensorflow as tf
import numpy as np

# Define the Seq2Seq model with Attention
class Seq2SeqWithAttention(tf.keras.Model):
    def __init__(self, input_dim, output_dim, latent_dim, num_layers, timesteps_input, timesteps_output):
        super(Seq2SeqWithAttention, self).__init__()
        self.timesteps_input = timesteps_input
        self.timesteps_output = timesteps_output

        self.encoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)
        self.decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)

        self.attention_dense = tf.keras.layers.Dense(1)
        self.attention_softmax = tf.keras.layers.Softmax(axis=1)

        self.output_dense = tf.keras.layers.Dense(output_dim)

    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs = inputs

        # Encoder
        encoder_outputs, state_h, state_c = self.encoder_lstm(encoder_inputs)

        # Decoder
        decoder_outputs, _, _ = self.decoder_lstm(decoder_inputs, initial_state=[state_h, state_c])

        # Attention mechanism
        attention_weights = self.attention_score(encoder_outputs, decoder_outputs)

        # Compute context vector
        attention_weights_expanded = tf.expand_dims(attention_weights, axis=-1)
        encoder_outputs_expanded = tf.expand_dims(encoder_outputs, axis=1)
        context_vector = tf.reduce_sum(encoder_outputs_expanded * attention_weights_expanded, axis=2)

        # Combine context vector and decoder outputs
        decoder_combined_context = tf.concat([context_vector, decoder_outputs], axis=-1)

        # Output layer
        outputs = self.output_dense(decoder_combined_context)

        return outputs

    def attention_score(self, encoder_outputs, decoder_outputs):
        timesteps_decoder = tf.shape(decoder_outputs)[1]

        # Tile decoder outputs to match the encoder sequence length
        decoder_expanded = tf.expand_dims(decoder_outputs, axis=2)
        decoder_tiled = tf.tile(decoder_expanded, [1, 1, self.timesteps_input, 1])

        # Expand encoder outputs to match decoder timesteps
        encoder_expanded = tf.expand_dims(encoder_outputs, axis=1)
        encoder_tiled = tf.tile(encoder_expanded, [1, timesteps_decoder, 1, 1])

        # Concatenate encoder and decoder outputs
        concat = tf.concat([encoder_tiled, decoder_tiled], axis=-1)

        # Compute attention scores
        attention_scores = self.attention_dense(concat)
        attention_scores = tf.squeeze(attention_scores, axis=-1)

        # Apply softmax to calculate attention weights
        attention_weights = self.attention_softmax(attention_scores)

        return attention_weights

    def get_config(self):
        return {
            "input_dim": self.output_dense.units,
            "output_dim": self.timesteps_output,
            "latent_dim": self.encoder_lstm.units,
            "num_layers": 1,  # Modify as needed
            "timesteps_input": self.timesteps_input,
            "timesteps_output": self.timesteps_output,
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)


# Define the custom R² metric
def r2_score(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=None)
    y_true_mean = tf.reduce_mean(y_true, axis=None)
    ss_tot = tf.reduce_sum(tf.square(y_true - y_true_mean), axis=None)
    r2 = 1 - (ss_res / (ss_tot + tf.keras.backend.epsilon()))
    return r2


In [23]:

# Parameters
input_dim = X_train.shape[2]

# Ensure output_dim matches the target data
output_dim = Y_train.shape[2]

# Initialize the model
model = Seq2SeqWithAttention(
    input_dim=X_train.shape[2],
    output_dim=output_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    timesteps_input=X_train.shape[1],
    timesteps_output=Y_train.shape[1]
)

# Compile and train the model
model.compile(optimizer='adam', loss='mse', metrics=[r2_score])
model.fit([X_train, Y_train[:, :-1, :]], Y_train[:, 1:, :], epochs=5, batch_size=32,
          validation_data=([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :]), verbose=1)

# Save the model
# model.load_weights("/content/seq2seq_with_attention_model.h5")
# Save the model including both architecture and weights
model.save("./seq2seq_with_attention_model.h5", save_format="tf")



Epoch 1/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 25ms/step - loss: 0.3066 - r2_score: 0.6396 - val_loss: 0.0330 - val_r2_score: 0.9630
Epoch 2/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - loss: 0.0237 - r2_score: 0.9731 - val_loss: 0.0200 - val_r2_score: 0.9783
Epoch 3/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 0.0132 - r2_score: 0.9839 - val_loss: 0.0122 - val_r2_score: 0.9863
Epoch 4/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0079 - r2_score: 0.9893 - val_loss: 0.0067 - val_r2_score: 0.9924
Epoch 5/5
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.0055 - r2_score: 0.9940 - val_loss: 0.0040 - val_r2_score: 0.9953




In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

def preprocess_input_data(df, window_size, continuous_features, scaler):
    """
    Preprocess the input data for prediction based on the last `window_size` time steps.
    """
    # Scale the continuous features for the input
    df_scaled = df.copy()
    df_scaled[continuous_features] = scaler.transform(df[continuous_features])

    # Get the last `window_size` steps of data
    X_seq = df_scaled[continuous_features].values[-window_size:]

    # Reshape to (1, window_size, features) for model input
    X_seq = np.expand_dims(X_seq, axis=0)  # Shape becomes (1, window_size, len(continuous_features))

    return X_seq

def predict_sequence(model, df, window_size, continuous_features, scaler):
    """
    Predict the next `prediction_horizon` time steps using the trained Seq2Seq model.
    """
    # Preprocess the input data for prediction
    X_seq = preprocess_input_data(df, window_size, continuous_features, scaler)

    # Generate predictions using the model
    predictions = model.predict([X_seq, X_seq])  # Here, we provide both encoder and decoder inputs

    # The output shape should be (1, prediction_horizon, features)
    predicted_values = predictions[0]  # Get the prediction for the first batch

    return predicted_values

# Example Usage:

# Define parameters
window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
continuous_features = ['LAT', 'LON', 'SOG']

# Assuming your `ais_data` DataFrame contains the historical data and is already preprocessed
# Fit the scaler on the historical data (this should be done once during training)
scaler = StandardScaler()

# Fit the scaler on the continuous features of the entire dataset or the training dataset
scaler.fit(ais_data[continuous_features])

# Prepare data for the last available timestamp
last_data_point = ais_data.iloc[-1:]

# Make a prediction for the next `prediction_horizon` time steps
predicted_values = predict_sequence(model, ais_data, window_size, continuous_features, scaler)

# Output the predicted values
predicted_df = pd.DataFrame(predicted_values, columns=continuous_features)
print(predicted_df)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 991ms/step
        LAT       LON       SOG
0 -0.314065  0.148184  0.035697
1 -0.323168  0.152857  0.177661
2 -0.325256  0.155183  0.350927
3 -0.325709  0.156682  0.410154
4 -0.326120  0.157343  0.363400
5 -0.326072  0.157043  0.243034
6 -0.325038  0.155837  0.088761
7 -0.323136  0.154071 -0.068715
8 -0.320919  0.152154 -0.213066
9 -0.318966  0.150375 -0.339225


In [27]:
import tensorflow as tf
import numpy as np

# Define the Seq2Seq model with Attention
class Seq2SeqWithAttention(tf.keras.Model):
    def __init__(self, input_dim, output_dim, latent_dim, num_layers, timesteps_input, timesteps_output):
        super(Seq2SeqWithAttention, self).__init__()
        self.timesteps_input = timesteps_input
        self.timesteps_output = timesteps_output

        self.encoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)
        self.decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)

        self.attention_dense = tf.keras.layers.Dense(1)
        self.attention_softmax = tf.keras.layers.Softmax(axis=1)

        self.output_dense = tf.keras.layers.Dense(output_dim)

    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs = inputs

        # Encoder
        encoder_outputs, state_h, state_c = self.encoder_lstm(encoder_inputs)

        # Decoder
        decoder_outputs, _, _ = self.decoder_lstm(decoder_inputs, initial_state=[state_h, state_c])

        # Attention mechanism
        attention_weights = self.attention_score(encoder_outputs, decoder_outputs)

        # Compute context vector
        attention_weights_expanded = tf.expand_dims(attention_weights, axis=-1)
        encoder_outputs_expanded = tf.expand_dims(encoder_outputs, axis=1)
        context_vector = tf.reduce_sum(encoder_outputs_expanded * attention_weights_expanded, axis=2)

        # Combine context vector and decoder outputs
        decoder_combined_context = tf.concat([context_vector, decoder_outputs], axis=-1)

        # Output layer
        outputs = self.output_dense(decoder_combined_context)

        return outputs

    def attention_score(self, encoder_outputs, decoder_outputs):
        timesteps_decoder = tf.shape(decoder_outputs)[1]

        # Tile decoder outputs to match the encoder sequence length
        decoder_expanded = tf.expand_dims(decoder_outputs, axis=2)
        decoder_tiled = tf.tile(decoder_expanded, [1, 1, self.timesteps_input, 1])

        # Expand encoder outputs to match decoder timesteps
        encoder_expanded = tf.expand_dims(encoder_outputs, axis=1)
        encoder_tiled = tf.tile(encoder_expanded, [1, timesteps_decoder, 1, 1])

        # Concatenate encoder and decoder outputs
        concat = tf.concat([encoder_tiled, decoder_tiled], axis=-1)

        # Compute attention scores
        attention_scores = self.attention_dense(concat)
        attention_scores = tf.squeeze(attention_scores, axis=-1)

        # Apply softmax to calculate attention weights
        attention_weights = self.attention_softmax(attention_scores)

        return attention_weights

    def get_config(self):
        return {
            "input_dim": self.output_dense.units,
            "output_dim": self.timesteps_output,
            "latent_dim": self.encoder_lstm.units,
            "num_layers": 1,  # Modify as needed
            "timesteps_input": self.timesteps_input,
            "timesteps_output": self.timesteps_output,
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)


# Define the custom R² metric
def r2_score(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=None)
    y_true_mean = tf.reduce_mean(y_true, axis=None)
    ss_tot = tf.reduce_sum(tf.square(y_true - y_true_mean), axis=None)
    r2 = 1 - (ss_res / (ss_tot + tf.keras.backend.epsilon()))
    return r2


In [40]:

# Parameters
input_dim = X_train.shape[2]

# Ensure output_dim matches the target data
output_dim = Y_train.shape[2]

# Initialize the model
model = Seq2SeqWithAttention(
    input_dim=X_train.shape[2],
    output_dim=output_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    timesteps_input=X_train.shape[1],
    timesteps_output=Y_train.shape[1]
)

# Compile and train the model
model.compile(optimizer='adam', loss='mse', metrics=[r2_score])
model.fit([X_train, Y_train[:, :-1, :]], Y_train[:, 1:, :], epochs=2, batch_size=32,
          validation_data=([X_val, Y_val[:, :-1, :]], Y_val[:, 1:, :]), verbose=1)

model.save_weights("seq2seq_with_attention.weights.h5")


Epoch 1/2
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - loss: 0.3729 - r2_score: 0.6282 - val_loss: 0.0383 - val_r2_score: 0.9607
Epoch 2/2
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.0202 - r2_score: 0.9759 - val_loss: 0.0207 - val_r2_score: 0.9780


In [41]:
# Reinitialize the model with the same architecture
model = Seq2SeqWithAttention(
    input_dim=X_train.shape[2],
    output_dim=output_dim,
    latent_dim=latent_dim,
    num_layers=num_layers,
    timesteps_input=X_train.shape[1],
    timesteps_output=Y_train.shape[1]
)

# Compile the model (same optimizer and loss function used during training)
model.compile(optimizer='adam', loss='mse', metrics=[r2_score])

# Load the saved weights
model.load_weights("seq2seq_with_attention.weights.h5")



  saveable.load_own_variables(weights_store.get(inner_path))


In [43]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

def preprocess_input_data(df, window_size, continuous_features, scaler):
    """
    Preprocess the input data for prediction based on the last `window_size` time steps.
    """
    # Scale the continuous features for the input
    df_scaled = df.copy()
    df_scaled[continuous_features] = scaler.transform(df[continuous_features])

    # Get the last `window_size` steps of data
    X_seq = df_scaled[continuous_features].values[-window_size:]

    # Reshape to (1, window_size, features) for model input
    X_seq = np.expand_dims(X_seq, axis=0)  # Shape becomes (1, window_size, len(continuous_features))

    return X_seq

def predict_sequence(model, df, window_size, continuous_features, scaler, weights_path):
    """
    Predict the next `prediction_horizon` time steps using the trained Seq2Seq model.
    The model weights are loaded from the specified weights file.
    """
    # Reinitialize the model (this should match the architecture used during training)
    model = Seq2SeqWithAttention(
        input_dim=df[continuous_features].shape[1],  # Number of features in your input
        output_dim=3,
        latent_dim=128,  # Modify based on  model's latent dimension
        num_layers=2,  # Modify based on  model's layers
        timesteps_input=window_size,
        timesteps_output=prediction_horizon
    )

    # Compile the model before loading weights (same as during training)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    # Load the saved model weights
    model.load_weights(weights_path)

    # Preprocess the input data for prediction
    X_seq = preprocess_input_data(df, window_size, continuous_features, scaler)

    # Generate predictions using the model
    predictions = model.predict([X_seq, X_seq])  # provide both encoder and decoder inputs

    # The output shape should be (1, prediction_horizon, features), e.g. (1, 5, 3) if predicting 3 features
    predicted_values = predictions[0]  # Get the prediction for the first batch

    return predicted_values


window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
continuous_features = ['LAT', 'LON', 'SOG']

scaler = StandardScaler()
scaler.fit(ais_data[continuous_features])
weights_path = '/content/seq2seq_with_attention.weights.h5'
last_data_point = ais_data.iloc[-1:]
predicted_values = predict_sequence(model=None, df=ais_data, window_size=window_size,
                                    continuous_features=continuous_features, scaler=scaler,
                                    weights_path=weights_path)
predicted_df = pd.DataFrame(predicted_values, columns=continuous_features)
print(predicted_df)


  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
        LAT       LON       SOG
0  0.062287 -0.026731  0.039613
1  0.062649 -0.019276  0.029238
2  0.063329 -0.009782  0.021983
3  0.063997 -0.001607  0.014890
4  0.064392  0.004644  0.008200
5  0.064430  0.008967  0.001949
6  0.064109  0.011505 -0.003882
7  0.063461  0.012467 -0.009339
8  0.062537  0.012075 -0.014474
9  0.061387  0.010544 -0.019344


In [55]:
def preprocess_input_data_with_time(df, window_size, continuous_features, scaler):
    """
    Preprocess the input data for prediction based on the last `window_size` time steps,
    including time as a feature.
    """
    # Scale the continuous features
    df_scaled = df.copy()
    df_scaled[continuous_features] = scaler.transform(df[continuous_features])

    # Convert timestamp to numeric values (for example, to the number of seconds)
    df_scaled['BaseDateTime'] = (df_scaled['BaseDateTime'] - df_scaled['BaseDateTime'].min()).dt.total_seconds()

    # Get the last `window_size` steps of data, including time
    X_seq = df_scaled[continuous_features + ['BaseDateTime']].values[-window_size:]

    # Reshape to (1, window_size, features) for model input
    X_seq = np.expand_dims(X_seq, axis=0)  # Shape becomes (1, window_size, len(continuous_features) + 1)

    return X_seq

def predict_sequence_with_time(model, df, window_size, continuous_features, scaler, weights_path, prediction_horizon):
    """
    Predict the next `prediction_horizon` time steps and their corresponding timestamps using the trained Seq2Seq model.
    """
    # Reinitialize and compile the model (ensure the architecture matches)
    model = Seq2SeqWithAttention(
        input_dim=df[continuous_features].shape[1] + 1,  # Number of features + 1 for time
        output_dim=3 + 1,  # 3 features (LAT, LON, SOG) + 1 for predicted time
        latent_dim=128,  # Latent dimension size for the LSTM layers
        num_layers=2,  # Number of LSTM layers
        timesteps_input=window_size,
        timesteps_output=prediction_horizon
    )

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    # Load the saved weights (the pre-trained model)
    model.load_weights(weights_path)

    # Preprocess the input data (get last `window_size` steps)
    X_seq = preprocess_input_data_with_time(df, window_size, continuous_features, scaler)

    # Make predictions (use both encoder and decoder inputs)
    predictions = model.predict([X_seq, X_seq])  # Here, we provide both encoder and decoder inputs

    # Extract the predicted values for the batch
    predicted_values = predictions[0]  # Get predictions for the first batch

    # Extract only the predicted values for the next `prediction_horizon` steps
    predicted_values = predicted_values[-prediction_horizon:]  # Slice only the predicted part

    # Convert time predictions back to actual timestamps
    last_timestamp = df['BaseDateTime'].iloc[-1]  # Get the last known timestamp

    # Convert predicted time delta (in seconds) to a Timedelta object
    predicted_timestamps = pd.to_timedelta(predicted_values[:, -1], unit='s') + pd.to_datetime(last_timestamp)

    # Extract predicted features (LAT, LON, SOG)
    predicted_features = predicted_values[:, :-1]

    # Convert the predictions into a DataFrame
    predicted_df = pd.DataFrame(predicted_features, columns=continuous_features)
    predicted_df['BaseDateTime'] = predicted_timestamps

    # Get the observations (the last `window_size` steps from the original data)
    observed_df = df.iloc[-window_size:].copy()

    # Prepare observed DataFrame (same features, scaled values)
    observed_df[continuous_features] = scaler.transform(observed_df[continuous_features])

    # Convert the observed DataFrame into a format that includes time
    observed_df['BaseDateTime'] = pd.to_datetime(observed_df['BaseDateTime'])

    # Return both observed and predicted DataFrames
    return observed_df, predicted_df

# Example Usage:

window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
continuous_features = ['LAT', 'LON', 'SOG']

# Assume the `ais_data` DataFrame is already loaded with data
scaler = StandardScaler()
scaler.fit(ais_data[continuous_features])

weights_path = '/content/seq2seq_with_attention.weights.h5'

# Predict the next `prediction_horizon` time steps
observed_df, predicted_df = predict_sequence_with_time(
    model=None,
    df=ais_data,
    window_size=window_size,
    continuous_features=continuous_features,
    scaler=scaler,
    weights_path=weights_path,
    prediction_horizon=prediction_horizon  # Pass prediction_horizon here
)

# Print the observed data and predicted data separately
print("Observed Data:")
print(observed_df)

print("\nPredicted Data:")
print(predicted_df)



  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Observed Data:
             BaseDateTime       LAT       LON       MMSI PatternDescriptor  \
23770 2022-03-31 00:00:52 -0.495197  0.256379  985346207        Stationary   
23771 2022-03-31 00:01:02 -0.495197  0.256378  985346207        Stationary   
23772 2022-03-31 00:01:12 -0.495196  0.256378  985346207        Stationary   
23773 2022-03-31 00:01:22 -0.495196  0.256378  985346207        Stationary   
23774 2022-03-31 00:01:32 -0.495196  0.256378  985346207        Stationary   
23775 2022-03-31 00:01:42 -0.495196  0.256378  985346207        Stationary   
23776 2022-03-31 00:01:52 -0.495196  0.256378  985346207        Stationary   
23777 2022-03-31 00:02:02 -0.495195  0.256378  985346207        Stationary   
23778 2022-03-31 00:02:12 -0.495195  0.256378  985346207        Stationary   
23779 2022-03-31 00:02:22 -0.495195  0.256378  985346207        Stationary   

       Pattern_High Speed  Pattern_Slow Movement  Patter

### Version -2

Key Enhancements:

    Time Handling: Time should be treated more explicitly as a separate feature. You may want to include the BaseDateTime as a feature during training and use it properly during prediction to handle time deltas.

    Separate Time Prediction Output: As we already have a separate layer for predicting the time delta (self.time_dense), we need to ensure the output from this layer is used and treated properly for time-based predictions.

    Investigating Time Prediction: We should output the predicted time deltas (in seconds) and then investigate how accurately the model is predicting these time deltas. The model will predict the change in time (time delta) between the last timestamp in the input sequence and the predicted timestamps.

Step-by-Step Implementation
1. Time Handling in the Model

To handle time effectively, the model will use time as an input feature (possibly encoded as a number or time-related feature). Additionally, we will focus on making time predictions explicit by having the model predict the time deltas separately. These time deltas will be added to the last timestamp to generate future timestamps.
2. Separate Time Prediction Output

We already have the time_dense layer for time prediction, which predicts the time delta. We'll make sure that this output is handled properly.
3. Investigating Time Prediction

We will add a way to calculate the difference between predicted timestamps and the actual time deltas. We can visualize this or calculate metrics like RMSE or MAE to evaluate the time predictions.

In [58]:
import tensorflow as tf
import numpy as np
import pandas as pd

# Define the Seq2Seq model with Attention and separate time prediction
class Seq2SeqWithAttentionAndTimePrediction(tf.keras.Model):
    def __init__(self, input_dim, output_dim, latent_dim, num_layers, timesteps_input, timesteps_output):
        super(Seq2SeqWithAttentionAndTimePrediction, self).__init__()
        self.timesteps_input = timesteps_input
        self.timesteps_output = timesteps_output

        self.encoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)
        self.decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_state=True, return_sequences=True)

        self.attention_dense = tf.keras.layers.Dense(1)
        self.attention_softmax = tf.keras.layers.Softmax(axis=1)

        # Output layers for feature predictions (LAT, LON, SOG)
        self.output_dense = tf.keras.layers.Dense(output_dim)

        # Separate layer for time delta prediction (time in seconds)
        self.time_dense = tf.keras.layers.Dense(1)

    def call(self, inputs, training=False):
        encoder_inputs, decoder_inputs = inputs

        # Encoder
        encoder_outputs, state_h, state_c = self.encoder_lstm(encoder_inputs)

        # Decoder
        decoder_outputs, _, _ = self.decoder_lstm(decoder_inputs, initial_state=[state_h, state_c])

        # Attention mechanism
        attention_weights = self.attention_score(encoder_outputs, decoder_outputs)

        # Compute context vector
        attention_weights_expanded = tf.expand_dims(attention_weights, axis=-1)
        encoder_outputs_expanded = tf.expand_dims(encoder_outputs, axis=1)
        context_vector = tf.reduce_sum(encoder_outputs_expanded * attention_weights_expanded, axis=2)

        # Combine context vector and decoder outputs
        decoder_combined_context = tf.concat([context_vector, decoder_outputs], axis=-1)

        # Output layer for features (LAT, LON, SOG)
        outputs = self.output_dense(decoder_combined_context)

        # Separate time prediction (time delta in seconds)
        time_delta = self.time_dense(decoder_combined_context)  # Predict time delta (in seconds)

        return outputs, time_delta

    def attention_score(self, encoder_outputs, decoder_outputs):
        timesteps_decoder = tf.shape(decoder_outputs)[1]

        # Tile decoder outputs to match the encoder sequence length
        decoder_expanded = tf.expand_dims(decoder_outputs, axis=2)
        decoder_tiled = tf.tile(decoder_expanded, [1, 1, self.timesteps_input, 1])

        # Expand encoder outputs to match decoder timesteps
        encoder_expanded = tf.expand_dims(encoder_outputs, axis=1)
        encoder_tiled = tf.tile(encoder_expanded, [1, timesteps_decoder, 1, 1])

        # Concatenate encoder and decoder outputs
        concat = tf.concat([encoder_tiled, decoder_tiled], axis=-1)

        # Compute attention scores
        attention_scores = self.attention_dense(concat)
        attention_scores = tf.squeeze(attention_scores, axis=-1)

        # Apply softmax to calculate attention weights
        attention_weights = self.attention_softmax(attention_scores)

        return attention_weights

    def get_config(self):
        return {
            "input_dim": self.output_dense.units,
            "output_dim": self.timesteps_output,
            "latent_dim": self.encoder_lstm.units,
            "num_layers": 1,  # Modify as needed
            "timesteps_input": self.timesteps_input,
            "timesteps_output": self.timesteps_output,
        }

    @classmethod
    def from_config(cls, config):
        return cls(**config)

# Define the custom R² metric
def r2_score(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=None)
    y_true_mean = tf.reduce_mean(y_true, axis=None)
    ss_tot = tf.reduce_sum(tf.square(y_true - y_true_mean), axis=None)
    r2 = 1 - (ss_res / (ss_tot + tf.keras.backend.epsilon()))
    return r2

# Compile the model with separate metrics for both outputs
model.compile(
    optimizer='adam',
    loss='mse',  # Mean Squared Error for both outputs
    metrics=[
        r2_score,        # For the feature prediction output (LAT, LON, SOG)
        'mae'            # For the time delta prediction output (time delta in seconds)
    ]
)

# Train the model
model.fit(
    [X_train, Y_train[:, :-1, :]],  # Decoder input
    [Y_train[:, 1:, :], Y_train[:, 1:, -1:]],  # The two outputs: features and time delta
    epochs=2,
    batch_size=32,
    validation_data=([X_val, Y_val[:, :-1, :]], [Y_val[:, 1:, :], Y_val[:, 1:, -1:]]),  # Validation data
    verbose=1
)

# Save the model weights
model.save_weights("seq2seq_with_attention_v2.weights.h5")


Epoch 1/2
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 44ms/step - loss: 0.3976 - mae: 0.1819 - mse_loss: 0.1553 - r2_score: 0.6704 - val_loss: 0.1128 - val_mae: 0.1052 - val_mse_loss: 0.0717 - val_r2_score: 0.9555
Epoch 2/2
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - loss: 0.0658 - mae: 0.0812 - mse_loss: 0.0416 - r2_score: 0.9708 - val_loss: 0.0565 - val_mae: 0.0693 - val_mse_loss: 0.0329 - val_r2_score: 0.9731


In [70]:
# Preprocess input data by extracting a window of time series and scaling the continuous features
def preprocess_input_data_with_time(df, window_size, continuous_features, scaler):
    """
    Preprocess the input data for prediction based on the last `window_size` time steps,
    including time as a feature.
    """
    # Scale the continuous features
    df_scaled = df.copy()
    df_scaled[continuous_features] = scaler.transform(df[continuous_features])

    # Convert timestamp to numeric values (for example, to the number of seconds)
    df_scaled['BaseDateTime'] = (df_scaled['BaseDateTime'] - df_scaled['BaseDateTime'].min()).dt.total_seconds()

    # Get the last `window_size` steps of data, including time
    X_seq = df_scaled[continuous_features + ['BaseDateTime']].values[-window_size:]

    # Reshape to (1, window_size, features) for model input
    X_seq = np.expand_dims(X_seq, axis=0)  # Shape becomes (1, window_size, len(continuous_features) + 1)

    return X_seq


# Prediction function
def predict_sequence_with_time(model, df, window_size, continuous_features, scaler, weights_path, prediction_horizon):
    """
    Predict the next `prediction_horizon` time steps and their corresponding timestamps using the trained Seq2Seq model.
    """
    # Reinitialize and compile the model (ensure the architecture matches)
    model = Seq2SeqWithAttentionAndTimePrediction(
        input_dim=df[continuous_features].shape[1] + 1,  # Number of features + 1 for time
        output_dim=3 + 1,  # 3 features (LAT, LON, SOG) + 1 for predicted time
        latent_dim=128,  # Latent dimension size for the LSTM layers
        num_layers=2,  # Number of LSTM layers
        timesteps_input=window_size,
        timesteps_output=prediction_horizon
    )

    # Compile the model
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    # Load the saved weights (the pre-trained model)
    model.load_weights(weights_path)

    # Preprocess the input data (get last `window_size` steps)
    X_seq = preprocess_input_data_with_time(df, window_size, continuous_features, scaler)

    # Make predictions (use both encoder and decoder inputs)
    predictions = model.predict([X_seq, X_seq])  # Here, we provide both encoder and decoder inputs

    # Extract the predicted values for the batch
    predicted_values = predictions[0]  # Get predictions for the first batch

    # Slice only the predicted part based on prediction_horizon
    predicted_values = predicted_values[0, :prediction_horizon, :]  # Take first `prediction_horizon` steps
    print("Predicted Values Shape:", predicted_values.shape)

    # Extract time delta (last column) and reshape
    time_deltas = predicted_values[:, -1]  # Time delta is in the last column
    print("Extracted Time Deltas:", time_deltas)
    print("Shape of Time Deltas:", time_deltas.shape)

    # Ensure it's a 1D array of numeric values
    time_deltas = np.squeeze(time_deltas)  # Remove any extra dimensions
    print("Squeezed Time Deltas:", time_deltas)
    print("Shape of Squeezed Time Deltas:", time_deltas.shape)

    # Convert time deltas to timedelta and add them to the last timestamp
    last_timestamp = pd.to_datetime(df['BaseDateTime'].iloc[-1])  # Last known timestamp
    predicted_timestamps = pd.to_timedelta(time_deltas, unit='s') + last_timestamp  # Convert to actual timestamps

    # Extract predicted features (LAT, LON, SOG) - Exclude the last column (time delta)
    predicted_features = predicted_values[:, :-1]  # Exclude the last column (time delta)
    print("Shape of Predicted Features (LAT, LON, SOG):", predicted_features.shape)

    # Ensure predicted_features is 2D (shape: (prediction_horizon, 3))
    # Reshape the predicted features if necessary
    assert predicted_features.shape[0] == len(predicted_timestamps), "Mismatch between predicted features and timestamps length."

    # Convert the predictions into a DataFrame
    predicted_df = pd.DataFrame(predicted_features, columns=continuous_features)
    predicted_df['BaseDateTime'] = predicted_timestamps

    # Print the final prediction DataFrame
    print("\nPredicted Data:")
    print(predicted_df)

    # Get the observations (the last `window_size` steps from the original data)
    observed_df = df.iloc[-window_size:].copy()

    # Prepare observed DataFrame (same features, scaled values)
    observed_df[continuous_features] = scaler.transform(observed_df[continuous_features])

    # Convert the observed DataFrame into a format that includes time
    observed_df['BaseDateTime'] = pd.to_datetime(observed_df['BaseDateTime'])

    # Return both observed and predicted DataFrames
    return observed_df, predicted_df



window_size = 10  # Number of observed states
prediction_horizon = 5  # Number of time steps to predict
continuous_features = ['LAT', 'LON', 'SOG']

# Assuming the `ais_data` DataFrame is already loaded with data
scaler = StandardScaler()
scaler.fit(ais_data[continuous_features])

weights_path = '/content/seq2seq_with_attention.weights.h5'

# Predict the next `prediction_horizon` time steps
observed_df, predicted_df = predict_sequence_with_time(
    model=None,
    df=ais_data,
    window_size=window_size,
    continuous_features=continuous_features,
    scaler=scaler,
    weights_path=weights_path,
    prediction_horizon=prediction_horizon  # Pass prediction_horizon here
)

# Print the observed data and predicted data separately
print("Observed Data:")
print(observed_df)

print("\nPredicted Data:")
print(predicted_df)

  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Predicted Values Shape: (5, 4)
Extracted Time Deltas: [0.37339684 0.6036053  0.5912262  0.6592242  0.7362376 ]
Shape of Time Deltas: (5,)
Squeezed Time Deltas: [0.37339684 0.6036053  0.5912262  0.6592242  0.7362376 ]
Shape of Squeezed Time Deltas: (5,)
Shape of Predicted Features (LAT, LON, SOG): (5, 3)

Predicted Data:
        LAT       LON       SOG                  BaseDateTime
0 -0.401255  0.783593 -0.287319 2022-03-31 00:02:22.373396844
1 -0.672990  0.809928 -0.268888 2022-03-31 00:02:22.603605270
2 -0.595241  0.914851 -0.289522 2022-03-31 00:02:22.591226220
3 -0.445280  1.017585 -0.249716 2022-03-31 00:02:22.659224212
4 -0.327354  1.089873 -0.201365 2022-03-31 00:02:22.736237586
Observed Data:
             BaseDateTime       LAT       LON       MMSI PatternDescriptor  \
23770 2022-03-31 00:00:52 -0.495197  0.256379  985346207        Stationary   
23771 2022-03-31 00:01:02 -0.495197  0.256378  985346207        S