Data Preprocessing
You'll first need to preprocess the CSV data to create the appropriate format for training the DBN. This involves separating the data into features and targets for training and testing.

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np

# Load the data
csv_file = r'C:\Users\suvar\General\Work\Varsity\Honours\Research\Lab\PHASE_1\data\synthetic_trajectory_data.csv'
data = pd.read_csv(csv_file)

# Extract features and targets
features = ['Timestep', 'Projectile_Position_X', 'Projectile_Position_Y',
            'Projectile_Velocity', 'Projectile_Angle',
            'Interceptor_Position_X', 'Interceptor_Position_Y',
            'Interceptor_Velocity', 'Interceptor_Angle']

# Features for current timestep
X = data[features].copy()

# Add future timestep data to create the target variables
future_features = ['Projectile_Position_X', 'Projectile_Position_Y',
                    'Interceptor_Position_X', 'Interceptor_Position_Y']
y = data[future_features].shift(-1).copy()  # Shift by one timestep to get future positions

# Drop NaN values
X = X[:-1]  # Remove last row from X
y = y.dropna().reset_index(drop=True)  # Remove rows with NaN values from y and reset index
print(X)
# Ensure X and y have the same length
assert X.shape[0] == y.shape[0], "Feature and target matrices have different lengths"

# Add time slice information to columns
def add_time_slices(df, time_slice):
    df.columns = [f'{col}_{time_slice}' for col in df.columns]
    return df

# Group by 'Trajectory_ID' and split within each group
grouped = data.groupby('Trajectory_ID')
X_train_combined = []
X_test_combined = []
y_train_combined = []
y_test_combined = []

for name, group in grouped:
    # Extract features and targets with time slices
    time_slices = group['Timestep'].unique()
    for t in time_slices:
        group_t = group[group['Timestep'] == t]
        X_group_t = group_t[features].copy()
        y_group_t = group_t[future_features].shift(-1).copy().dropna()
        
        # Add time slices to the columns
        X_group_t = add_time_slices(X_group_t, t)
        y_group_t = add_time_slices(y_group_t, t)

        # Drop NaN values
        X_group_t = X_group_t[:-1]
        y_group_t = y_group_t.dropna().reset_index(drop=True)

        # Ensure X and y have the same length within the group
        assert X_group_t.shape[0] == y_group_t.shape[0], "Feature and target matrices have different lengths within group"

        # Split within the group
        X_train, X_test, y_train, y_test = train_test_split(X_group_t, y_group_t, test_size=0.2, random_state=42)

        # Append to combined lists
        X_train_combined.append(X_train)
        X_test_combined.append(X_test)
        y_train_combined.append(y_train)
        y_test_combined.append(y_test)

# Concatenate all the splits
X_train_combined = pd.concat(X_train_combined, axis=0).reset_index(drop=True)
X_test_combined = pd.concat(X_test_combined, axis=0).reset_index(drop=True)
y_train_combined = pd.concat(y_train_combined, axis=0).reset_index(drop=True)
y_test_combined = pd.concat(y_test_combined, axis=0).reset_index(drop=True)

print(f"Shapes - X_train_combined: {X_train_combined.shape}, y_train_combined: {y_train_combined.shape}")
print(f"Shapes - X_test_combined: {X_test_combined.shape}, y_test_combined: {y_test_combined.shape}")

# Save or use the data for DBN fitting



         Timestep  Projectile_Position_X  Projectile_Position_Y  \
0        0.000000               0.000000               0.000000   
1        0.311655             690.425786             237.256245   
2        0.623311            1380.851572             473.559653   
3        0.934966            2071.277357             708.910225   
4        1.246622            2761.703143             943.307961   
...           ...                    ...                    ...   
74994  110.702338          348760.080941            5381.196688   
74995  110.926432          349466.073007            4313.671839   
74996  111.150525          350172.065074            3241.789745   
74997  111.374619          350878.057141            2165.550408   
74998  111.598713          351584.049207            1084.953826   

       Projectile_Velocity  Projectile_Angle  Interceptor_Position_X  \
0              2342.502760         18.964647           400000.000000   
1              2342.502760         18.964647       

ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

Define and Train the DBN
Update the DBN structure to reflect the temporal dependencies and relationships for trajectory prediction.

In [23]:
from pgmpy.models import DynamicBayesianNetwork as DBN
from pgmpy.estimators import MaximumLikelihoodEstimator

def define_dbn_structure():
    dbn = DBN()
    
    # Temporal edges
    edges = [
        (('Projectile_Position_X', 0), ('Projectile_Position_X', 1)),
        (('Projectile_Position_Y', 0), ('Projectile_Position_Y', 1)),
        (('Interceptor_Position_X', 0), ('Interceptor_Position_X', 1)),
        (('Interceptor_Position_Y', 0), ('Interceptor_Position_Y', 1))
    ]
    dbn.add_edges_from(edges)
    
    # Dependencies between current state variables
    dependencies = [
        (('Projectile_Position_X', 0), ('Interceptor_Position_X', 0)),
        (('Projectile_Position_Y', 0), ('Interceptor_Position_Y', 0)),
        (('Interceptor_Position_X', 0), ('Interceptor_Position_X', 1)),
        (('Interceptor_Position_Y', 0), ('Interceptor_Position_Y', 1))
    ]
    dbn.add_edges_from(dependencies)
    
    return dbn

# Initialize the DBN structure
dbn = define_dbn_structure()

# Fit the DBN using the MaximumLikelihoodEstimator
dbn.fit(X_train, estimator=MaximumLikelihoodEstimator)


ValueError: data column names must start from time slice 0.

Predict and Evaluate
Use the trained DBN to predict the future positions and compare them with the actual future positions.

In [None]:
from pgmpy.inference import DBNInference

# Initialize the DBN Inference
dbn_infer = DBNInference(dbn)

# Predict and evaluate
predictions = []
for i in range(len(X_test)):
    evidence = X_test.iloc[i].to_dict()
    pred = dbn_infer.forward_inference(variables=[('Projectile_Position_X', 1), ('Projectile_Position_Y', 1),
                                                  ('Interceptor_Position_X', 1), ('Interceptor_Position_Y', 1)],
                                        evidence=evidence)
    predictions.append(pred)

# Convert predictions to DataFrame for comparison
predictions_df = pd.DataFrame(predictions)

# Evaluate the performance (you may need to adapt this based on your exact evaluation criteria)
accuracy = np.mean(np.abs(predictions_df.values - y_test.values) < tolerance)
print(f"Prediction Accuracy: {accuracy:.2f}")
