# Feature Drift Visualization - Bridge Failure Prediction

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from src.models.train import drift_detection

## Load Reference and New Data

In [2]:
ref_df = pd.read_csv('../data/processed/infra_training.csv')
new_df = pd.read_csv('../data/processed/features.csv')

## Run Drift Detection

In [3]:
features = [c for c in ref_df.columns if c not in ['structure_id', 'failure_within_1yr']]
drift = drift_detection(new_df, ref_df, features)
import pandas as pd
drift_df = pd.DataFrame.from_dict(drift, orient='index')
drift_df = drift_df.sort_values('p_value')

## Visualize Distributions of Most Drifted Features

In [4]:
top_drifted = drift_df.head(3).index.tolist()
for feat in top_drifted:
    plt.figure(figsize=(8,4))
    plt.hist(ref_df[feat], bins=30, alpha=0.5, label='Reference', density=True)
    plt.hist(new_df[feat], bins=30, alpha=0.5, label='New', density=True)
    plt.title(f'Distribution Drift: {feat} (KS={drift_df.loc[feat, "ks_stat"]:.2f})')
    plt.legend()
    plt.show()