# Detailed Code Explanation: iPhone Production Line Analysis

## 1. Data Collection Process

We use two main datasets:

### a) Main Unit Assembly Data (main_unit_df)
```python
def generate_main_unit_data(num_rows):
    data = {
        'USN': [f'USN{str(i+1).zfill(6)}' for i in range(num_rows)],  # Unique iPhone ID
        'LINE': [f'LINE{str(random.randint(1,5)).zfill(3)}'],         # Production line number
        'STAGE': [f'M{random.randint(1,5)}'],                         # Assembly stage
        'VENDOR': [f'VENDOR{random.randint(1,5)}'],                   # Component supplier
        'RESULTFLAG': ['T' or 'F'],                                   # Pass/Fail status
        'A_ERRORCODE': [Error codes if failure occurs]                 # Specific error types
    }
```

### b) Component Assembly Data (component_df)
```python
def generate_component_data(num_rows):
    data = {
        'USN_PRIMARY': [f'USN{str(i+1).zfill(6)}'],                   # Main iPhone reference
        'SUB_USN': [f'CSN{str(random.randint(1,999999)).zfill(6)}'],  # Component ID
        'STAGE': [f'C{random.randint(1,5)}'],                         # Component stage
        'SFC_STAGE': ['BTL:Double Lock', 'BTN Conn', etc],            # Specific operations
    }
```

### Data Integration Process:
1. Collect main unit data from assembly line sensors
2. Gather component-level data from sub-assembly stations
3. Merge data using USN (Unique Serial Number) as the key
4. Add temporal features (time stamps, shifts, etc.)
5. Calculate derived metrics (cycle times, failure rates, etc.)

## 2. ML Models Implementation

### Data Preprocessing
```python
# Convert datetime columns and extract features
for df in [main_unit_df, component_df]:
    for col in ['TRNDATE', 'INSERTTIME']:
        df[col] = pd.to_datetime(df[col])
        df[f'{col}_hour'] = df[col].dt.hour    # Time of day patterns
        df[f'{col}_day'] = df[col].dt.day      # Daily patterns
        df[f'{col}_month'] = df[col].dt.month  # Monthly patterns

# Encode categorical variables
le = LabelEncoder()
categorical_cols = ['LINE', 'WORKSTATION', 'STAGE', 'VENDOR']
for col in categorical_cols:
    df[f'{col}_encoded'] = le.fit_transform(df[col])
```

### Random Forest Model (For Overall Quality Prediction)
```python
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_main_train, y_main_train)
```
- Uses 100 decision trees
- Each tree sees different subset of data
- Combines predictions for robust results

### XGBoost Model (For Specific Defect Types)
```python
xgb_model = xgb.XGBClassifier()
xgb_model.fit(X_main_train, y_main_train)
```
- Gradient boosting for high accuracy
- Better with imbalanced data
- Handles missing values well

### LSTM Model (For Time-Series Patterns)
```python
lstm_model = Sequential([
    LSTM(50, input_shape=(timesteps, features)),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])
```
- Analyzes sequences of events
- Learns long-term patterns
- Predicts future failures

## 3. Dashboard Features

### Real-time Monitoring
```python
# Current Production Status
def update_production_status():
    total_units = len(main_unit_df['USN'].unique())
    current_failure_rate = len(main_unit_df[main_unit_df['RESULTFLAG'] == 'F']) / len(main_unit_df)
    return total_units, current_failure_rate
```

### Quality Analytics
```python
# Vendor Performance Analysis
vendor_metrics = main_df.groupby('VENDOR').agg({
    'RESULTFLAG': lambda x: (x == 'F').mean(),  # Failure rate per vendor
    'USN': 'count'                              # Total units per vendor
})

# Stage Analysis
stage_metrics = main_df.groupby('STAGE').agg({
    'RESULTFLAG': lambda x: (x == 'F').mean(),  # Failure rate per stage
    'USN': 'count'                              # Total units per stage
})
```

### Cost Impact
```python
def calculate_cost_impact(main_df, cost_per_failure=1000):
    vendor_costs = main_df[main_df['RESULTFLAG'] == 'F'].groupby('VENDOR')['USN'].count()
    vendor_costs = vendor_costs * cost_per_failure
    return vendor_costs
```

### Predictive Alerts
```python
def predict_failure_risk(current_unit):
    features = prepare_features(current_unit)
    risk_score = rf_model.predict_proba(features)[0][1]
    return risk_score > 0.7  # Alert if risk > 70%
```

## 4. Implementation Steps for Production Line

### Phase 1: Data Collection Setup
1. Install sensors at key points:
   - Battery assembly station
   - Screen fitting station
   - Final assembly checkpoints

2. Configure data collection:
```python
# Real-time data collection
def collect_production_data(station_id):
    timestamp = datetime.now()
    sensor_data = read_sensor(station_id)
    quality_metrics = analyze_metrics(sensor_data)
    return {
        'timestamp': timestamp,
        'station': station_id,
        'metrics': quality_metrics
    }
```

### Phase 2: Model Integration
1. Deploy prediction models:
```python
# Load trained models
rf_model = joblib.load('manufacturing_quality_model.joblib')
scaler = joblib.load('feature_scaler.joblib')

# Real-time prediction function
def predict_quality(current_data):
    features = preprocess_data(current_data)
    prediction = rf_model.predict(features)
    return prediction
```

### Phase 3: Dashboard Deployment
1. Set up real-time monitoring:
```python
# Streamlit dashboard update
def update_dashboard():
    st.metric("Current Production Rate", production_rate)
    st.metric("Quality Score", quality_score)
    plot_failure_trends()
```

### Phase 4: Integration with Production Systems
1. Connect with iPhone assembly line:
```python
# Production line integration
def monitor_production_line():
    while True:
        current_unit = get_current_unit()
        prediction = predict_quality(current_unit)
        if prediction == 'F':
            trigger_alert(current_unit)
        update_dashboard()
        time.sleep(update_interval)
```

2. Configure alert thresholds:
```python
alert_thresholds = {
    'high_risk': 0.8,    # 80% failure probability
    'medium_risk': 0.5,  # 50% failure probability
    'low_risk': 0.2      # 20% failure probability
}
```

## 5. Data Comparison and Integration

### Linking Main Unit and Component Data
```python
def integrate_data(main_unit_df, component_df):
    # Join datasets on USN
    merged_data = pd.merge(
        main_unit_df,
        component_df,
        left_on='USN',
        right_on='USN_PRIMARY',
        how='left'
    )
    
    # Calculate combined metrics
    merged_data['total_failures'] = (
        (merged_data['RESULTFLAG_x'] == 'F') |
        (merged_data['RESULTFLAG_y'] == 'F')
    ).astype(int)
    
    return merged_data
```

### Cross-Dataset Analysis
```python
def analyze_failure_patterns(merged_data):
    # Component impact on main unit
    component_impact = merged_data.groupby('SUB_USN').agg({
        'RESULTFLAG_x': lambda x: (x == 'F').mean(),
        'USN': 'count'
    })
    
    # Stage correlation
    stage_correlation = pd.crosstab(
        merged_data['STAGE_x'],
        merged_data['STAGE_y'],
        values=merged_data['total_failures'],
        aggfunc='mean'
    )
    
    return component_impact, stage_correlation
```

### Predictive Features from Both Datasets
```python
def create_combined_features(merged_data):
    features = {
        'main_unit_stage': merged_data['STAGE_x_encoded'],
        'component_stage': merged_data['STAGE_y_encoded'],
        'vendor_main': merged_data['VENDOR_x_encoded'],
        'vendor_component': merged_data['VENDOR_y_encoded'],
        'time_features': [
            merged_data['TRNDATE_x_hour'],
            merged_data['TRNDATE_y_hour']
        ]
    }
    
    return pd.DataFrame(features)
```