In [21]:
import pandas as pd
import numpy as np
import joblib
import time
import random
import warnings
import os

warnings.filterwarnings("ignore")

# --- Configuration ---
DATASET_CSV = 'smartphone_unified_dataset_v2.csv'
SCALER_FILE = 'multi_task_scaler.joblib'
SCREENER_MODEL_FILE = 'prediction_model_lgbm_final.joblib'
WHY_MODEL_FILE = 'diagnostician_why_model.joblib'
WHEN_MODEL_FILE = 'diagnostician_when_model.joblib'

SEQUENCE_TIMESTEPS = 56 # 14 days of data
TUNED_THRESHOLD = 0.0149 # The optimal threshold we found for the screener

def run_final_simulation():
    """
    Runs a live simulation using the two-stage AI agent:
    1. A "screener" model to detect risk.
    2. "Diagnostician" models to determine the cause and timeline.
    """
    # --- 1. Load All Models and Data ---
    print("--- Loading all models and data for the final simulation ---")
    try:
        scaler = joblib.load(SCALER_FILE)
        screener_model = joblib.load(SCREENER_MODEL_FILE)
        why_model = joblib.load(WHY_MODEL_FILE)
        when_model = joblib.load(WHEN_MODEL_FILE)
        df = pd.read_csv(DATASET_CSV, parse_dates=['timestamp'])
        print("✅ All files loaded successfully.")
    except Exception as e:
        print(f"❌ Error loading files: {e}")
        return

    # --- 2. Isolate a Real Failure Case for the Test ---
    print("\n--- Randomly selecting a real failure case for the test ---")
    failing_device_ids = df[df['failure_type'] != 0]['device_id'].unique()
    device_to_test = random.choice(failing_device_ids)
    device_df = df[df['device_id'] == device_to_test].sort_values(by='timestamp').reset_index(drop=True)

    failure_point_idx = device_df[device_df['days_until_failure'] < 1].index.min()
    test_window_start_idx = max(0, failure_point_idx - (30 * 4)) # Start story 30 days before failure

    test_story_df = device_df.iloc[test_window_start_idx:failure_point_idx]

    failure_map = {0: "Healthy", 1: "Battery Failure", 2: "CPU Overheating", 3: "Memory Failure"}
    ground_truth_failure_type_code = int(test_story_df['failure_type'].max())
    ground_truth_failure_type = failure_map[ground_truth_failure_type_code]

    print(f"Test case selected: '{device_to_test}'. The ground truth is a '{ground_truth_failure_type}'.")

    # --- 3. Run the Day-by-Day Simulation ---
    print("\n--- Starting Final Live Simulation ---")
    print("-" * 100)

    initial_healthy_data = device_df.iloc[test_window_start_idx - SEQUENCE_TIMESTEPS : test_window_start_idx]
    live_buffer = list(initial_healthy_data.to_dict('records'))

    last_prediction = "None"

    # Header for our dashboard
    print(f"{'Date':<12} | {'True Days to Fail':<20} | {'AI Agent Report'}")
    print("-" * 100)

    for i in range(0, len(test_story_df), 4): # Step by 1 day
        day_data = test_story_df.iloc[i:i+4]

        for _, row in day_data.iterrows():
            live_buffer.append(row.to_dict())
            if len(live_buffer) > SEQUENCE_TIMESTEPS: live_buffer.pop(0)

        if len(live_buffer) != SEQUENCE_TIMESTEPS: continue

        current_date = day_data['timestamp'].iloc[-1]
        true_days_until_failure = day_data['days_until_failure'].iloc[-1]

        # Prepare data for models
        sequence_df = pd.DataFrame(live_buffer)
        feature_columns = ['battery_level', 'cpu_usage_percent', 'memory_usage_percent', 'storage_usage_percent', 'app_crashes', 'network_signal_strength_dbm', 'screen_on_time_minutes', 'fast_charging_active', 'speaker_volume_percent', 'ambient_temp_c']
        scaled_features = scaler.transform(sequence_df[feature_columns])
        # Reshape for LightGBM
        flattened_sequence = scaled_features.reshape(1, -1)

        # --- Run the Two-Stage AI Agent ---
        status_report = "Prediction: Normal Operation"

        # Stage 1: The Screener
        risk_score = screener_model.predict_proba(flattened_sequence)[:, 1][0]

        if risk_score > TUNED_THRESHOLD:
            # Stage 2: The Diagnostician (only run if screener finds a risk)
            predicted_reason_code = why_model.predict(flattened_sequence)[0]
            predicted_days = when_model.predict(flattened_sequence)[0]

            predicted_reason = failure_map.get(predicted_reason_code, "Unknown")
            status_report = f"Prediction: Failure imminent in ~{predicted_days:.1f} days. Predicted Cause: '{predicted_reason}'"
            last_prediction = predicted_reason

        print(f"{current_date.strftime('%Y-%m-%d'):<12} | {true_days_until_failure:<20.1f} | {status_report}")
        time.sleep(0.1)

    # --- 4. Final Verdict ---
    print("-" * 100)
    print(f"\n💥 DEVICE FAILURE OCCURRED 💥")
    print("\n--- FINAL TEST VERDICT ---")
    print(f"Ground Truth Failure Cause: '{ground_truth_failure_type}'")
    print(f"Model's Final Prediction:   '{last_prediction}'")

    if last_prediction == ground_truth_failure_type:
        print(f"\n✅ SUCCESS: The complete AI agent correctly identified the failure type.")
    else:
        print(f"\n❌ FAILURE: The AI agent misdiagnosed the failure type.")

if __name__ == "__main__":
    run_final_simulation()

--- Loading all models and data for the final simulation ---
✅ All files loaded successfully.

--- Randomly selecting a real failure case for the test ---
Test case selected: 'device_047'. The ground truth is a 'Memory Failure'.

--- Starting Final Live Simulation ---
----------------------------------------------------------------------------------------------------
Date         | True Days to Fail    | AI Agent Report
----------------------------------------------------------------------------------------------------
2023-11-08   | 30.0                 | Prediction: Normal Operation
2023-11-09   | 29.0                 | Prediction: Normal Operation
2023-11-10   | 28.0                 | Prediction: Failure imminent in ~18.8 days. Predicted Cause: 'Memory Failure'
2023-11-11   | 27.0                 | Prediction: Normal Operation
2023-11-12   | 26.0                 | Prediction: Normal Operation
2023-11-13   | 25.0                 | Prediction: Normal Operation
2023-11-14   | 24.0     