# Collision Risk AI - Time Series LSTM Tutorial

This notebook demonstrates how to interact with the new Time-Series LSTM pipeline programmatically. We will load data, preprocess it into sequences, and inspect the model's behavior.

In [None]:
import yaml
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from app.pipeline.datasource import JsonFileDataSource
from app.pipeline.preprocessor import TimeSeriesPreprocessor
from app.model.lstm_model import CollisionRiskLSTM, CertaintyEstimator

## 1. Load Configuration and Data

In [None]:
# Load Config
with open("config.yaml", 'r') as f:
    config = yaml.safe_load(f)

print(f"Loading data from: {config['data']['json_path']}")
source = JsonFileDataSource(config['data']['json_path'])
raw_data = source.fetch_data()
print(f"Loaded {len(raw_data)} records.")

## 2. Preprocess into Sequences
The preprocessor groups updates by (SAT1, SAT2, TCA) and creates time-series sequences.

In [None]:
seq_len = config['model']['sequence_length']
preprocessor = TimeSeriesPreprocessor(sequence_length=seq_len)
sequences, targets = preprocessor.process(raw_data)

print(f"Generated {len(sequences)} sequences.")
print(f"Sequence Shape: {sequences[0].shape}")

# Visualize a sample sequence
sample_idx = 0
print("Sample Sequence (PC, MinRng, TimeToTCA):")
print(sequences[sample_idx])

## 3. Load Trained Model

In [None]:
model = CollisionRiskLSTM(
    input_size=3,
    hidden_size=config['model']['hidden_size'],
    num_layers=config['model']['num_layers']
)

# Load weights (ensure you ran 'python main.py --mode train' first)
try:
    model.load_state_dict(torch.load(config['output']['model_path']))
    model.eval()
    print("Model loaded successfully.")
except FileNotFoundError:
    print("Model file not found! Please run training first.")

## 4. Analyze Predictions & Certainty

In [None]:
if 'model' in locals():
    X = torch.tensor(np.array(sequences), dtype=torch.float32)
    
    # Get Prediction
    with torch.no_grad():
        preds = model(X).numpy()
        
    # Get Certainty (MC Dropout)
    uncertainties = []
    for i in range(10): # Analyzed first 10 for demo
        cert = CertaintyEstimator.calculate_uncertainty(model, X[i:i+1])
        uncertainties.append(cert)
        
    # Plot
    plt.figure(figsize=(10, 5))
    plt.plot(preds[:50], label='Predicted Risk')
    plt.plot(targets[:50], label='Actual Risk', alpha=0.5)
    plt.title("Risk Predictions vs Actuals (First 50)")
    plt.legend()
    plt.show()
    
    print("Top 10 Certainty Scores:", uncertainties)