In [None]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error
import copy
import xgboost as xgb
import matplotlib.pyplot as plt
import random

seed = 0
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
fold_map = {
    4: 1, 13: 1,
    5: 2, 12: 2,
    0: 3, 10: 3,
    3: 4, 11: 4,
    2: 5, 9: 5,
    6: 6, 7: 6,
    1: 7, 8: 7
}
scene_a = [0,1,2,3,4,5]
scene_b = [6,7,8,9,10,11,12,13]

In [None]:
distance_columns = [
    'distance_wheelchair_FT_t_0', 'missing_w_FT_t_0',
    'distance_wheelchair_v5_t_0', 'missing_w_v5_t_0',
    'distance_drone_FT_t_0', 'missing_d_FT_t_0',
    'distance_drone_v5_t_0', 'missing_d_v5_t_0',
    'distance_range_t_0', 'missing_range_t_0',
    'distance_wheelchair_FT_t_1', 'missing_w_FT_t_1',
    'distance_wheelchair_v5_t_1', 'missing_w_v5_t_1',
    'distance_drone_FT_t_1', 'missing_d_FT_t_1',
    'distance_drone_v5_t_1', 'missing_d_v5_t_1',
    'distance_range_t_1', 'missing_range_t_1',
    'distance_wheelchair_FT_t_2', 'missing_w_FT_t_2',
    'distance_wheelchair_v5_t_2', 'missing_w_v5_t_2',
    'distance_drone_FT_t_2', 'missing_d_FT_t_2',
    'distance_drone_v5_t_2', 'missing_d_v5_t_2',
    'distance_range_t_2', 'missing_range_t_2'
]

In [None]:
# Train all XGBoost models for each filter and fold and store them in a list of tuples
df_all = pd.DataFrame()
models = []
for filter in ['original', 
               'rain_1', 'rain_2', 'rain_3', 
               'fog_1', 'fog_2', 'fog_3', 
               'bright_1', 'bright_2', 'bright_3',
               'dark_1', 'dark_2', 'dark_3']:
    print(f"Processing filter: {filter}")

    for item in range(14):
        current = pd.read_csv(f'dataset/imputation_2/Preprocessed_{filter}_run_{item}.csv', index_col=0)
        current['fold'] = fold_map.get(item, None)
        current['source_run'] = item

        df_all = pd.concat([df_all, current], ignore_index=True)

    df = df_all[df_all['fold'].isin([1, 2, 3, 4, 5, 6, 7])].reset_index(drop=True)
    X_train = df[distance_columns + ['fold']]
    y_train = df['distance_tracker_t_0']

    existing_folds = X_train['fold'].unique()

    for fold in existing_folds:
        train_mask = X_train['fold'] != fold
        val_mask = X_train['fold'] == fold

        X_train_fold = X_train.loc[train_mask].drop(columns=['fold'])
        y_train_fold = y_train.loc[train_mask].values

        model = xgb.XGBRegressor(n_estimators=100, random_state=0)
        model.fit(X_train_fold, y_train_fold)

        models.append((model, filter, fold))

In [None]:
len(models)

In [None]:
# Recall the model for each filter and fold and make predictions
import os
for filter in ['original',
               'rain_1', 'rain_2', 'rain_3',
               'fog_1', 'fog_2', 'fog_3',
               'dark_1', 'dark_2', 'dark_3',
               'bright_1', 'bright_2', 'bright_3',
               ]:
    fold = [(4,13), (5,12), (0,10), (3,11), (2,9), (6,7), (1,8)]
    for f in fold:
        print(f"Processing fold: {f}")
        predictions = []
        # Create plots for all experiments across folds
        for item in f:
            fold = fold_map.get(item, None)
            print(fold)
            # Load the experiment data
            current = pd.read_csv(f'dataset/imputation_2/Preprocessed_{filter}_run_{item}.csv', index_col=0)
            
            # ground truth
            y_all = current['distance_tracker_t_0'].values

            # features
            X_all = current[distance_columns]
            
            # Compute binary line for missing data (1 = at least one present, 0 = all missing)
            missing_cols = [col for col in current.columns if col.startswith("missing_") and col.endswith("_t_0")]
            binary_missing = ((current[missing_cols].sum(axis=1) == len(missing_cols))).astype(int).values

            # Get fold and model
            fold = fold_map.get(item, None)
            if fold is None:
                print(f"Skipping item {item} — no fold assigned.")
                continue
            print(f'Selecting model {fold-1}')

            filtered = [m for m in models if m[1] == filter and m[2] == fold]
            model = filtered[0][0]

            # Predict
            y_pred = model.predict(X_all)
            predictions.append((item,y_all, y_pred, binary_missing))


            # Plot
            plt.figure(figsize=(12, 6))
            plt.plot(y_all, label='True', color='blue', linewidth=2)
            plt.plot(y_pred, label='Predicted', color='red', linestyle='--')
            plt.plot(binary_missing, label='Missing Line (1 = present)', color='black', linestyle=':')
            plt.title(f'Experiment {item} — Fold {fold}')
            plt.xlabel('Time Index')
            plt.ylabel('Distance')
            plt.legend()
            plt.grid(True)
            plt.tight_layout()
            plt.show()

        for pred in predictions:
            print(pred[0])
            print(pred[1].shape, pred[2].shape)
            rmse = np.sqrt(mean_squared_error(pred[1], pred[2]))
            print(f'RMSE for item {pred[0]}: {rmse:.7f}')

        y_true = np.concatenate([pred[1] for pred in predictions])
        y_pred = np.concatenate([pred[2] for pred in predictions])
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        print(f'Overall fold RMSE: {rmse:.7f}')

        for pred in predictions:
            start = 0
            for i, val in enumerate(pred[3]):
                if val == 1 and start == 0:
                    if pred[0] in scene_a:
                        pred[2][i] = 2.5762
                    elif pred[0] in scene_b:
                        pred[2][i] = 3.1571
                else:
                    start = 1
        for pred in predictions:
            print(pred[0])
            print(pred[1].shape, pred[2].shape)
            rmse = np.sqrt(mean_squared_error(pred[1], pred[2]))
            print(f'New RMSE for item {pred[0]}: {rmse:.7f}')

        y_true = np.concatenate([pred[1] for pred in predictions])
        y_pred = np.concatenate([pred[2] for pred in predictions])
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        print(f'New overall fold RMSE: {rmse:.7f}')

        for pred in predictions:
            fold = fold_map.get(pred[0], None)
            plt.figure(figsize=(12, 6))
            plt.plot(pred[1], label='True', color='blue', linewidth=2)
            plt.plot(pred[2], label='Predicted', color='red', linestyle='--')
            plt.plot(pred[3], label='Missing Line (0 = present)', color='black', linestyle=':')
            plt.title(f'Experiment {pred[0]} — Fold {fold}')
            plt.xlabel('Time Index')
            plt.ylabel('Distance')
            plt.legend()
            plt.grid(True)
            plt.tight_layout()
            plt.show()
        for pred in predictions:
            experiment_id = pred[0]
            df = pd.DataFrame(pred[2], columns=['XGBoost_Predicted'])
            df.to_csv(f'data_for_plots_case2/xgb_predictions_{filter}_exp{experiment_id}.csv', index=False)