# üè≠ Smart Industrial Maintenance System ‚Äî GPU Training Notebook

**FSE 570 Capstone** | Arizona State University

This notebook runs the complete training pipeline on Google Colab with GPU acceleration.

---

## 1. Setup Environment

In [1]:
# Check GPU availability
import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB")

PyTorch: 2.8.0+cpu
CUDA available: False


In [2]:
# Install dependencies
!pip install -q xgboost lifelines shap pulp kaggle

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mediapipe 0.10.21 requires numpy<2, but you have numpy 2.4.2 which is incompatible.

[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: C:\Users\devas\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [None]:
# Clone your project repo (UPDATE THE URL)
!git clone https://github.com/YOUR_USERNAME/Capstone-Project.git
%cd Capstone-Project

In [None]:
# Setup Kaggle credentials
# Upload your kaggle.json or set environment variables
import os
os.environ['KAGGLE_USERNAME'] = 'YOUR_KAGGLE_USERNAME'  # UPDATE
os.environ['KAGGLE_KEY'] = 'YOUR_KAGGLE_KEY'  # UPDATE

## 2. Download & Preprocess Data

In [None]:
import config
from src.data.download import download_cmapss, load_cmapss_train
from src.data.preprocess import DataPreprocessor
from src.data.feature_engineering import FeatureEngineer
from src.data.synthetic_generator import SyntheticDataGenerator

# Download C-MAPSS dataset
download_cmapss()
df_train = load_cmapss_train()
print(f"Training data: {df_train.shape}")
df_train.head()

In [None]:
# Generate synthetic data
gen = SyntheticDataGenerator()
logs, context, schedule = gen.generate_all(df_train)

In [None]:
# Feature engineering (for XGBoost)
fe = FeatureEngineer()
df_engineered = fe.engineer_features(df_train.copy())
print(f"Engineered features: {df_engineered.shape}")

In [None]:
# Preprocessing pipeline (for LSTM models)
preprocessor = DataPreprocessor()
data = preprocessor.fit_transform(df_train)
preprocessor.save()

import numpy as np
for split_name, split_data in data.items():
    np.savez_compressed(
        os.path.join(config.PROCESSED_DATA_DIR, f"{split_name}_data.npz"),
        **split_data
    )

X_train = data['train']['X']
y_train_rul = data['train']['y_rul']
y_train_binary = data['train']['y_binary']
X_val = data['val']['X']
y_val_binary = data['val']['y_binary']

n_features = X_train.shape[2]
print(f"Sequences: {X_train.shape}, Features: {n_features}")

## 3. Train LSTM Autoencoder (Anomaly Detection)

In [None]:
from src.models.autoencoder import LSTMAutoencoder, AutoencoderTrainer

# Train on healthy data only
healthy_mask = y_train_rul > config.MAX_RUL * 0.5
X_healthy = X_train[healthy_mask]
X_val_ae = X_val[data['val']['y_rul'] > config.MAX_RUL * 0.5]

print(f"Training autoencoder on {len(X_healthy)} healthy samples")
print(f"Device: {config.DEVICE}")

autoencoder = LSTMAutoencoder(input_dim=n_features)
ae_trainer = AutoencoderTrainer(autoencoder, epochs=50)
ae_trainer.train(X_healthy, X_val_ae)
ae_trainer.save_model()

In [None]:
# Visualize training loss
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 4))
ax.plot(ae_trainer.train_history, label='Train Loss', color='#3a7bd5')
if ae_trainer.val_history:
    ax.plot(ae_trainer.val_history, label='Val Loss', color='#FF6B6B')
ax.set_xlabel('Epoch')
ax.set_ylabel('MSE Loss')
ax.set_title('Autoencoder Training')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Train LSTM Failure Predictor

In [None]:
from src.models.lstm_predictor import LSTMPredictor, PredictorTrainer

predictor = LSTMPredictor(input_dim=n_features)
pred_trainer = PredictorTrainer(predictor, epochs=50)
pred_trainer.train(X_train, y_train_binary, X_val, y_val_binary)
pred_trainer.save_model()

In [None]:
# Visualize predictor training
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))

ax1.plot(pred_trainer.train_history, label='Train Loss', color='#3a7bd5')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Predictor Training Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

if pred_trainer.val_history:
    epochs = range(5, len(pred_trainer.val_history) * 5 + 1, 5)
    f1s = [m['f1'] for m in pred_trainer.val_history]
    aucs = [m['auc'] for m in pred_trainer.val_history]
    ax2.plot(epochs, f1s, label='F1 Score', color='#44BB44', marker='o')
    ax2.plot(epochs, aucs, label='AUC', color='#FF6B6B', marker='s')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Score')
    ax2.set_title('Validation Metrics')
    ax2.legend()
    ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Train XGBoost RUL Model

In [None]:
from src.models.xgboost_rul import XGBoostRUL

exclude_cols = ['unit_id', 'cycle', 'RUL']
feature_cols = [c for c in df_engineered.columns if c not in exclude_cols]

unit_ids = df_engineered['unit_id'].unique()
np.random.seed(config.RANDOM_SEED)
np.random.shuffle(unit_ids)
n = len(unit_ids)
train_units = unit_ids[:int(n * 0.7)]
val_units = unit_ids[int(n * 0.7):int(n * 0.85)]

X_train_xgb = df_engineered[df_engineered['unit_id'].isin(train_units)][feature_cols]
y_train_xgb = df_engineered[df_engineered['unit_id'].isin(train_units)]['RUL'].values
X_val_xgb = df_engineered[df_engineered['unit_id'].isin(val_units)][feature_cols]
y_val_xgb = df_engineered[df_engineered['unit_id'].isin(val_units)]['RUL'].values

xgb_model = XGBoostRUL()
xgb_model.train(X_train_xgb, y_train_xgb, X_val_xgb, y_val_xgb)
xgb_model.evaluate(X_val_xgb, y_val_xgb)
xgb_model.save()

## 6. Bayesian Survival Analysis

In [None]:
from src.models.bayesian_survival import BayesianSurvival

survival_features = config.ACTIVE_SENSORS + ['cycle']
survival_cols = [c for c in survival_features if c in df_train.columns] + ['RUL']

df_survival_train = df_train[df_train['unit_id'].isin(train_units)][['unit_id'] + survival_cols]

survival_model = BayesianSurvival()
survival_model.fit(df_survival_train)

df_survival_val = df_train[df_train['unit_id'].isin(val_units)][['unit_id'] + survival_cols]
survival_model.evaluate(df_survival_val)
survival_model.save()

## 7. Explainability (SHAP & Attention)

In [None]:
from src.explainability.shap_analysis import SHAPExplainer
from src.explainability.attention_viz import AttentionVisualizer

# SHAP for XGBoost
shap_explainer = SHAPExplainer(xgb_model, model_type='xgboost')
shap_explainer.compute_shap_values(X_val_xgb)
shap_explainer.plot_global_importance(save_path='shap_importance.png')
shap_explainer.plot_beeswarm(save_path='shap_beeswarm.png')
ranking = shap_explainer.get_sensor_ranking()

In [None]:
# Attention visualization
from src.models.lstm_predictor import load_predictor
loaded_predictor = load_predictor()

attn_viz = AttentionVisualizer(loaded_predictor)
attn_viz.plot_attention_heatmap(data['test']['X'], save_path='attention_heatmap.png')
attn_viz.plot_average_attention(data['test']['X'], data['test']['y_binary'],
                                 save_path='attention_comparison.png')

## 8. MILP Optimization & Simulation

In [None]:
from src.optimization.milp_scheduler import MaintenanceScheduler

# Get predictions for test data
failure_proba, _ = loaded_predictor.predict_proba(torch.FloatTensor(data['test']['X']))

# Aggregate per unit
unit_risks = {}
for uid in np.unique(data['test']['unit_ids']):
    mask = data['test']['unit_ids'] == uid
    unit_risks[int(uid)] = float(failure_proba[mask][-1])

# Run MILP optimization
scheduler = MaintenanceScheduler()
result = scheduler.create_schedule(
    machine_risks=unit_risks,
    machine_names={uid: f'Engine-{uid:03d}' for uid in unit_risks}
)
result['schedule']

In [None]:
# Monte Carlo simulation
from src.evaluation.simulation import MaintenanceSimulator

sim = MaintenanceSimulator(n_machines=50, n_periods=100)
sim_df, sim_summary = sim.run_comparison(n_simulations=100)
sim.plot_comparison(sim_df, save_path='simulation_comparison.png')

## 9. Download Results

Download trained models and results back to local machine.

In [None]:
# Save all results to a zip for download
import shutil
shutil.make_archive('capstone_results', 'zip', '.', 'models/saved')

# In Colab, download the zip:
try:
    from google.colab import files
    files.download('capstone_results.zip')
except ImportError:
    print('Not in Colab. Files saved locally.')

---
**Project**: FSE 570 Data Science Capstone | Arizona State University