In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import pickle
import json




In [2]:
def generate_synthetic_data(num_samples=10000):
    """Generate synthetic vehicle telematics with stronger feature correlations"""
    
    print(f"\n[INFO] Generating {num_samples} synthetic samples with enhanced correlations...")
    
    components = ['engine', 'transmission', 'brakes', 'battery', 'cooling_system']
    severities = ['low', 'medium', 'high', 'critical']
    
    data = []
    
    for i in range(num_samples):
        # Select component that will fail
        failing_component = np.random.choice(components)
        component_index = components.index(failing_component)
        
        # Generate time to failure with more variation
        time_to_failure = np.random.beta(2, 5) * 365  # More samples near failure
        
        # Stronger health factor correlation
        health_factor = max(0, 1 - (time_to_failure / 365))
        health_squared = health_factor ** 2  # Non-linear degradation
        
        # Base noise
        noise = lambda scale=1.0: np.random.normal(0, 0.05 * scale)
        
        # Component-specific feature generation with stronger signals
        if failing_component == 'engine':
            engine_degradation = health_squared * 50 + noise(10)
            rpm_variance = health_squared * 1500 + noise(200)
            oil_drop = health_squared * 30 + noise(5)
            vibration_increase = health_squared * 12 + noise(2)
            emission_increase = health_squared * 120 + noise(20)
        else:
            engine_degradation = noise(5)
            rpm_variance = noise(100)
            oil_drop = noise(2)
            vibration_increase = noise(1)
            emission_increase = noise(10)
        
        if failing_component == 'transmission':
            trans_vibration = health_squared * 15 + noise(2)
            acceleration_issue = health_squared * 8 + noise(1)
        else:
            trans_vibration = noise(1)
            acceleration_issue = noise(0.5)
        
        if failing_component == 'brakes':
            brake_degradation = health_squared * 10 + noise(1)
            noise_increase = health_squared * 30 + noise(5)
        else:
            brake_degradation = noise(0.5)
            noise_increase = noise(3)
        
        if failing_component == 'battery':
            voltage_drop = health_squared * 3 + noise(0.2)
        else:
            voltage_drop = noise(0.1)
        
        if failing_component == 'cooling_system':
            coolant_drop = health_squared * 50 + noise(5)
            temp_increase = health_squared * 40 + noise(5)
        else:
            coolant_drop = noise(2)
            temp_increase = noise(3)
        
        # Generate features with stronger correlations
        sample = {
            # Telematics data
            'engine_temp': 75 + np.random.uniform(-5, 15) + engine_degradation + temp_increase,
            'rpm': 2000 + np.random.uniform(-500, 2000) + rpm_variance,
            'fuel_consumption': 7 + np.random.uniform(-1, 3) + (engine_degradation * 0.1),
            'battery_voltage': 13.5 + np.random.uniform(-0.5, 0.5) - voltage_drop,
            'oil_pressure': 45 + np.random.uniform(-10, 15) - oil_drop,
            'coolant_level': 90 + np.random.uniform(-5, 10) - coolant_drop,
            
            # Usage patterns (correlated with failure)
            'mileage': np.random.uniform(50000, 250000) + (health_factor * 100000),
            'avg_speed': 50 + np.random.uniform(-20, 30),
            'acceleration': np.random.uniform(2, 8) + acceleration_issue,
            'braking': np.random.uniform(2, 8) + brake_degradation,
            'trip_duration': np.random.uniform(20, 180),
            
            # Environmental
            'ambient_temp': np.random.uniform(-5, 40),
            'humidity': np.random.uniform(20, 90),
            'road_condition': np.random.beta(2, 2),  # More centered distribution
            
            # Historical (correlated with failure)
            'vehicle_age': np.random.uniform(1, 15) + (health_factor * 8),
            'last_maintenance_days': np.random.uniform(30, 300) + (health_factor * 200),
            'previous_failures': np.random.poisson(health_factor * 3),
            
            # Sensor readings
            'vibration': np.random.uniform(1, 5) + vibration_increase + trans_vibration,
            'noise_level': 50 + np.random.uniform(-10, 20) + noise_increase,
            'emissions': 80 + np.random.uniform(-20, 40) + emission_increase,
            
            # Outputs - with clearer severity mapping
            'component_failure': component_index,
            'time_to_failure': time_to_failure,
            'severity': int(health_factor * 3.99)  # 0-3 mapping to ensure coverage
        }
        
        data.append(sample)
    
    df = pd.DataFrame(data)
    
    # Add interaction features (feature engineering)
    df['temp_rpm_interaction'] = df['engine_temp'] * df['rpm'] / 10000
    df['age_mileage_ratio'] = df['vehicle_age'] / (df['mileage'] / 10000 + 1)
    df['maintenance_mileage'] = df['last_maintenance_days'] * df['mileage'] / 100000
    df['vibration_noise'] = df['vibration'] * df['noise_level'] / 100
    
    print(f"[SUCCESS] Generated {len(df)} samples with {len(df.columns)} features")
    
    return df

# Generate more data
df = generate_synthetic_data(10000)

# Save raw data
df.to_csv('data/vehicle_failure_data.csv', index=False)
print(f"[SUCCESS] Saved raw data to 'data/vehicle_failure_data.csv'")


[INFO] Generating 10000 synthetic samples with enhanced correlations...
[SUCCESS] Generated 10000 samples with 27 features
[SUCCESS] Saved raw data to 'data/vehicle_failure_data.csv'


In [3]:
print("\n[INFO] Preprocessing data...")

# Separate features and targets
feature_columns = [
    'engine_temp', 'rpm', 'fuel_consumption', 'battery_voltage', 'oil_pressure', 'coolant_level',
    'mileage', 'avg_speed', 'acceleration', 'braking', 'trip_duration',
    'ambient_temp', 'humidity', 'road_condition',
    'vehicle_age', 'last_maintenance_days', 'previous_failures',
    'vibration', 'noise_level', 'emissions',
    'temp_rpm_interaction', 'age_mileage_ratio', 'maintenance_mileage', 'vibration_noise'
]

X = df[feature_columns].values
y_component = df['component_failure'].values
y_time = df['time_to_failure'].values / 365.0  # Normalize to 0-1
y_severity = df['severity'].values

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Save scaler
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
print("[SUCCESS] Saved scaler to 'scaler.pkl'")


[INFO] Preprocessing data...
[SUCCESS] Saved scaler to 'scaler.pkl'


In [4]:
def create_sequences(X, y_comp, y_time, y_sev, seq_length=15):
    """Create sequences for LSTM input"""
    
    print(f"[INFO] Creating sequences with length {seq_length}...")
    
    X_seq, y_comp_seq, y_time_seq, y_sev_seq = [], [], [], []
    
    for i in range(len(X) - seq_length):
        X_seq.append(X[i:i+seq_length])
        y_comp_seq.append(y_comp[i+seq_length-1])
        y_time_seq.append(y_time[i+seq_length-1])
        y_sev_seq.append(y_sev[i+seq_length-1])
    
    return (
        np.array(X_seq),
        np.array(y_comp_seq),
        np.array(y_time_seq),
        np.array(y_sev_seq)
    )

sequence_length = 15  # Longer sequences for better pattern recognition
X_sequences, y_comp_seq, y_time_seq, y_sev_seq = create_sequences(
    X_scaled, y_component, y_time, y_severity, sequence_length
)

print(f"[SUCCESS] Created {len(X_sequences)} sequences")
print(f"[INFO] Sequence shape: {X_sequences.shape}")

[INFO] Creating sequences with length 15...
[SUCCESS] Created 9985 sequences
[INFO] Sequence shape: (9985, 15, 24)


In [5]:
X_train, X_test, y_comp_train, y_comp_test, y_time_train, y_time_test, y_sev_train, y_sev_test = train_test_split(
    X_sequences, y_comp_seq, y_time_seq, y_sev_seq,
    test_size=0.15,  # Slightly smaller test set for more training data
    random_state=42,
    stratify=y_comp_seq  # Stratify by component
)

print(f"\n[INFO] Train set: {len(X_train)} samples")
print(f"[INFO] Test set: {len(X_test)} samples")


[INFO] Train set: 8487 samples
[INFO] Test set: 1498 samples


In [6]:
print("\n[INFO] Building optimized LSTM model architecture...")

def build_optimized_lstm(input_shape, num_components=5, num_severities=4):
    """Build deeper, more powerful LSTM model"""
    
    # Input layer
    inputs = layers.Input(shape=input_shape, name='input')
    
    # Bidirectional LSTM layers for better context
    x = layers.Bidirectional(layers.LSTM(256, return_sequences=True))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    
    x = layers.LSTM(64, return_sequences=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    
    # Shared dense layers
    x = layers.Dense(128, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    
    # Task-specific heads with additional layers
    # Component failure head
    comp_x = layers.Dense(32, activation='relu', name='comp_dense')(x)
    component_output = layers.Dense(num_components, activation='softmax', name='component_failure')(comp_x)
    
    # Time to failure head
    time_x = layers.Dense(32, activation='relu', name='time_dense')(x)
    time_output = layers.Dense(1, activation='sigmoid', name='time_to_failure')(time_x)
    
    # Severity head
    sev_x = layers.Dense(32, activation='relu', name='sev_dense')(x)
    severity_output = layers.Dense(num_severities, activation='softmax', name='severity')(sev_x)
    
    # Create model
    model = models.Model(
        inputs=inputs,
        outputs=[component_output, time_output, severity_output]
    )
    
    return model


[INFO] Building optimized LSTM model architecture...


In [7]:
input_shape = (sequence_length, len(feature_columns))
model = build_optimized_lstm(input_shape)




In [8]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),  # Lower learning rate
    loss={
        'component_failure': 'sparse_categorical_crossentropy',
        'time_to_failure': 'mse',
        'severity': 'sparse_categorical_crossentropy'
    },
    loss_weights={
        'component_failure': 1.2,  # Increased weight
        'time_to_failure': 0.3,    # Decreased weight (regression is easier)
        'severity': 1.0
    },
    metrics={
        'component_failure': ['accuracy'],
        'time_to_failure': ['mae'],
        'severity': ['accuracy']
    }
)

print("[SUCCESS] Model architecture built")
print(f"[INFO] Total parameters: {model.count_params():,}")
model.summary()

[SUCCESS] Model architecture built
[INFO] Total parameters: 1,341,034
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input (InputLayer)          [(None, 15, 24)]             0         []                            
                                                                                                  
 bidirectional (Bidirection  (None, 15, 512)              575488    ['input[0][0]']               
 al)                                                                                              
                                                                                                  
 batch_normalization (Batch  (None, 15, 512)              2048      ['bidirectional[0][0]']       
 Normalization)                                                                                   
                        

In [9]:
print("\n[INFO] Starting model training...")

callbacks = [
    EarlyStopping(
        monitor='val_loss',
        patience=20,  # More patience
        restore_best_weights=True,
        verbose=1
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=8,
        min_lr=1e-7,
        verbose=1
    ),
    ModelCheckpoint(
        'best_model.keras',
        monitor='val_loss',
        save_best_only=True,
        verbose=1
    )
]

# Train model with more epochs
history = model.fit(
    X_train,
    {
        'component_failure': y_comp_train,
        'time_to_failure': y_time_train,
        'severity': y_sev_train
    },
    validation_data=(
        X_test,
        {
            'component_failure': y_comp_test,
            'time_to_failure': y_time_test,
            'severity': y_sev_test
        }
    ),
    epochs=100,  # More epochs
    batch_size=64,  # Larger batch size
    callbacks=callbacks,
    verbose=1
)

print("\n[SUCCESS] Training completed!")

print("\n[SUCCESS] Training completed!")


[INFO] Starting model training...
Epoch 1/100


Epoch 1: val_loss improved from inf to 2.83283, saving model to best_model.keras
Epoch 2/100
Epoch 2: val_loss improved from 2.83283 to 1.82122, saving model to best_model.keras
Epoch 3/100
Epoch 3: val_loss improved from 1.82122 to 1.00932, saving model to best_model.keras
Epoch 4/100
Epoch 4: val_loss improved from 1.00932 to 0.77425, saving model to best_model.keras
Epoch 5/100
Epoch 5: val_loss improved from 0.77425 to 0.69737, saving model to best_model.keras
Epoch 6/100
Epoch 6: val_loss improved from 0.69737 to 0.65695, saving model to best_model.keras
Epoch 7/100
Epoch 7: val_loss improved from 0.65695 to 0.61609, saving model to best_model.keras
Epoch 8/100
Epoch 8: val_loss improved from 0.61609 to 0.59291, saving model to best_model.keras
Epoch 9/100
Epoch 9: val_loss improved from 0.59291 to 0.57757, saving model to best_model.keras
Epoch 10/100
Epoch 10: val_loss did not improve from 0.57757
Epoch 11/100
Epoch 11: val_loss d

In [10]:
print("\n[INFO] Evaluating model on test set...")

# Evaluate
test_results = model.evaluate(
    X_test,
    {
        'component_failure': y_comp_test,
        'time_to_failure': y_time_test,
        'severity': y_sev_test
    },
    verbose=0
)

print("\n" + "=" * 80)
print("TEST SET RESULTS")
print("=" * 80)
print(f"Total Loss: {test_results[0]:.4f}")
print(f"Component Failure Loss: {test_results[1]:.4f}")
print(f"Time to Failure Loss: {test_results[2]:.4f}")
print(f"Severity Loss: {test_results[3]:.4f}")
print(f"\n>>> Component Failure Accuracy: {test_results[4]*100:.2f}%")
print(f">>> Time to Failure MAE: {test_results[5]:.4f} (normalized)")



[INFO] Evaluating model on test set...

TEST SET RESULTS
Total Loss: 0.5513
Component Failure Loss: 0.1056
Time to Failure Loss: 0.0052
Severity Loss: 0.4230

>>> Component Failure Accuracy: 96.66%
>>> Time to Failure MAE: 0.0560 (normalized)


In [11]:
print("\n[INFO] Performing detailed analysis...")

# Get predictions
predictions = model.predict(X_test)
pred_components = np.argmax(predictions[0], axis=1)
pred_severity = np.argmax(predictions[2], axis=1)

# Per-class accuracy
from sklearn.metrics import classification_report, confusion_matrix

components = ['engine', 'transmission', 'brakes', 'battery', 'cooling_system']
severities = ['low', 'medium', 'high', 'critical']

print("\n" + "=" * 80)
print("COMPONENT FAILURE - DETAILED METRICS")
print("=" * 80)
print(classification_report(y_comp_test, pred_components, target_names=components))

print("\n" + "=" * 80)
print("SEVERITY - DETAILED METRICS")
print("=" * 80)
print(classification_report(y_sev_test, pred_severity, target_names=severities))


[INFO] Performing detailed analysis...

COMPONENT FAILURE - DETAILED METRICS
                precision    recall  f1-score   support

        engine       0.98      0.95      0.97       300
  transmission       0.98      0.96      0.97       312
        brakes       0.93      0.97      0.95       295
       battery       0.98      0.98      0.98       296
cooling_system       0.97      0.98      0.97       295

      accuracy                           0.97      1498
     macro avg       0.97      0.97      0.97      1498
  weighted avg       0.97      0.97      0.97      1498


SEVERITY - DETAILED METRICS
              precision    recall  f1-score   support

         low       0.00      0.00      0.00         6
      medium       0.64      0.61      0.62       147
        high       0.79      0.78      0.78       635
    critical       0.87      0.89      0.88       710

    accuracy                           0.81      1498
   macro avg       0.57      0.57      0.57      1498
weight

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [12]:
print("\n[INFO] Saving model and metadata...")

# Save final model
model.save('weights_and_metadata/vehicle_failure_lstm_optimized.keras')
print("[SUCCESS] Saved model to 'weights_and_metadata/vehicle_failure_lstm_optimized.keras'")

# Save model weights
model.save_weights('vehicle_failure_model.h5')
print("[SUCCESS] Saved weights to 'weights_and_metadata/vehicle_failure_model.h5'")



# Save metadata
metadata = {
    'num_samples': len(df),
    'num_sequences': len(X_sequences),
    'sequence_length': sequence_length,
    'num_features': len(feature_columns),
    'feature_columns': feature_columns,
    'components': components,
    'severities': severities,
    'train_samples': len(X_train),
    'test_samples': len(X_test),
    'model_params': int(model.count_params()),
    'final_metrics': {
        'test_loss': float(test_results[0]),
        'component_accuracy': float(test_results[4]),
        'time_mae': float(test_results[5]),
        'severity_accuracy': float(test_results[6])
    }
}

with open('weights_and_metadata/model_metadata_optimized.json', 'w') as f:
    json.dump(metadata, f, indent=2)
print("[SUCCESS] Saved metadata to 'weights_and_metadata/model_metadata_optimized.json'")


[INFO] Saving model and metadata...
[SUCCESS] Saved model to 'weights_and_metadata/vehicle_failure_lstm_optimized.keras'
[SUCCESS] Saved weights to 'weights_and_metadata/vehicle_failure_model.h5'
[SUCCESS] Saved metadata to 'weights_and_metadata/model_metadata_optimized.json'
