# Manual Failure Prediction Using ML Models

This notebook shows how to manually calculate and understand failure predictions for iPhone production.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import joblib

# Load our trained model and scaler
model = joblib.load('manufacturing_quality_model.joblib')
scaler = joblib.load('feature_scaler.joblib')

## 1. Manual Prediction Example

Let's predict failure probability for a specific iPhone unit at Stage 4 (Screen Assembly)

In [None]:
# Load our data
main_unit_df = pd.read_csv('main_unit_assembly_data.csv')

# Create label encoders
le_dict = {}
for col in ['LINE', 'WORKSTATION', 'STAGE', 'VENDOR']:
    le = LabelEncoder()
    le.fit(main_unit_df[col])
    le_dict[col] = le

# Example: Manual calculation for an iPhone unit
test_case = {
    'LINE': 'LINE001',
    'WORKSTATION': 'WORKSTATION001',
    'STAGE': 'M4',  # Screen Assembly
    'VENDOR': 'VENDOR2',
    'hour': 14,     # 2 PM
    'day': 15,      # Day of month
    'month': 9,     # September
    'day_of_week': 2,  # Tuesday
    'time_diff': 120,  # 2 minutes between operations
    'has_error': 0     # No errors yet
}

# Create feature vector
features = pd.DataFrame({
    'hour': [test_case['hour']],
    'day': [test_case['day']],
    'month': [test_case['month']],
    'day_of_week': [test_case['day_of_week']],
    'time_diff': [test_case['time_diff']],
    'LINE_encoded': [le_dict['LINE'].transform([test_case['LINE']])[0]],
    'WORKSTATION_encoded': [le_dict['WORKSTATION'].transform([test_case['WORKSTATION']])[0]],
    'STAGE_encoded': [le_dict['STAGE'].transform([test_case['STAGE']])[0]],
    'VENDOR_encoded': [le_dict['VENDOR'].transform([test_case['VENDOR']])[0]],
    'has_error': [test_case['has_error']]
})

# Scale features
features_scaled = scaler.transform(features)

# Get prediction
failure_prob = model.predict_proba(features_scaled)[0][1]
print(f"Failure Probability: {failure_prob:.2%}")

## 2. Understanding What Affects the Prediction

Let's see how different factors change the failure probability:

In [None]:
def predict_with_changes(base_case, changes):
    # Create a copy of base case
    modified_case = base_case.copy()
    
    # Apply changes
    for key, value in changes.items():
        modified_case[key] = value
    
    # Create features
    features = pd.DataFrame({
        'hour': [modified_case['hour']],
        'day': [modified_case['day']],
        'month': [modified_case['month']],
        'day_of_week': [modified_case['day_of_week']],
        'time_diff': [modified_case['time_diff']],
        'LINE_encoded': [le_dict['LINE'].transform([modified_case['LINE']])[0]],
        'WORKSTATION_encoded': [le_dict['WORKSTATION'].transform([modified_case['WORKSTATION']])[0]],
        'STAGE_encoded': [le_dict['STAGE'].transform([modified_case['STAGE']])[0]],
        'VENDOR_encoded': [le_dict['VENDOR'].transform([modified_case['VENDOR']])[0]],
        'has_error': [modified_case['has_error']]
    })
    
    # Scale and predict
    features_scaled = scaler.transform(features)
    return model.predict_proba(features_scaled)[0][1]

# Test different scenarios
scenarios = [
    {"time_diff": 300},  # Longer assembly time
    {"VENDOR": "VENDOR3"},  # Different vendor
    {"hour": 22},  # Night shift
    {"has_error": 1}  # Previous error detected
]

print("Impact of Different Factors:")
print(f"Baseline Probability: {failure_prob:.2%}")
for changes in scenarios:
    new_prob = predict_with_changes(test_case, changes)
    change = new_prob - failure_prob
    print(f"\nChanges made: {changes}")
    print(f"New Probability: {new_prob:.2%} (Change: {'↑' if change > 0 else '↓'}{abs(change):.2%})")

## 3. Real-Time Monitoring Example

Let's simulate monitoring multiple units in real-time:

In [None]:
def monitor_production_line(num_units=5):
    # Simulate different units
    for i in range(num_units):
        unit_case = test_case.copy()
        
        # Randomly vary some parameters
        unit_case['time_diff'] = np.random.normal(120, 30)  # Time varies
        unit_case['hour'] = np.random.randint(8, 23)  # Different times
        unit_case['VENDOR'] = f"VENDOR{np.random.randint(1, 6)}"
        
        # Get prediction
        prob = predict_with_changes(unit_case, {})
        
        print(f"\nUnit {i+1}:")
        print(f"Time: {unit_case['hour']}:00")
        print(f"Vendor: {unit_case['VENDOR']}")
        print(f"Assembly Time: {unit_case['time_diff']:.0f} seconds")
        print(f"Failure Probability: {prob:.2%}")
        
        if prob > 0.7:
            print("⚠️ HIGH RISK - Immediate inspection needed!")
        elif prob > 0.3:
            print("⚠️ MODERATE RISK - Monitor closely")
        else:
            print("✓ LOW RISK - Normal operation")

# Run simulation
print("Real-Time Production Monitoring Simulation")
print("----------------------------------------")
monitor_production_line()

## 4. Analysis Tips

When using these predictions manually:

1. **Watch for Patterns**:
   - Time of day effects
   - Vendor-specific issues
   - Stage-related problems

2. **Risk Levels**:
   - High Risk (>70%): Immediate action
   - Moderate Risk (30-70%): Enhanced monitoring
   - Low Risk (<30%): Normal operation

3. **Key Factors to Monitor**:
   - Assembly time variations
   - Previous error history
   - Vendor performance
   - Time of day patterns

4. **Response Actions**:
   - High Risk: Stop and inspect
   - Moderate Risk: Additional quality checks
   - Low Risk: Standard procedures