In [1]:
# create_training_data.py
import pandas as pd
import numpy as np
import os

def create_autonomous_training_data(output_path='training_data_autonomous.csv'):
    """Create comprehensive training data for autonomous manufacturing"""
    
    print("üéØ Creating autonomous manufacturing training data...")
    
    n_samples = 10000
    np.random.seed(42)
    
    # Base sensor readings (normal operation)
    data = {
        # Core maintenance sensors
        'temperature_c': np.random.normal(75, 3, n_samples),
        'pressure_bar': np.random.normal(15, 1.5, n_samples),
        'ph_level': np.random.normal(6.8, 0.2, n_samples),
        'flow_rate_lph': np.random.normal(1200, 100, n_samples),
        
        # Equipment health sensors
        'vibration_x': np.random.exponential(0.5, n_samples),
        'vibration_y': np.random.exponential(0.5, n_samples),
        'vibration_z': np.random.exponential(0.5, n_samples),
        'ultrasound_leak_db': np.random.exponential(5, n_samples),
        'acoustic_emission': np.random.beta(2, 5, n_samples),
        
        # Quality & process sensors
        'orp_redox_mv': np.random.normal(400, 50, n_samples),
        'humidity_rh': np.random.normal(55, 8, n_samples),
        'vision_defect_score': np.random.exponential(0.02, n_samples),
        'vision_contaminant_score': np.random.exponential(0.002, n_samples),
        
        # Safety & environment sensors
        'co2_ppm': np.random.normal(420, 100, n_samples),
        'o2_percent': np.random.normal(20.8, 0.3, n_samples),
        'voc_ppm': np.random.exponential(0.5, n_samples),
        'differential_pressure_bar': np.random.exponential(0.05, n_samples),
        'refrigerant_pressure_bar': np.random.normal(8.5, 0.5, n_samples),
    }
    
    df = pd.DataFrame(data)
    
    # ===== CREATE FAILURE SCENARIOS =====
    failure_mask = np.zeros(n_samples, dtype=bool)
    
    # Scenario 1: Temperature spikes cause failures
    temp_failures = df['temperature_c'] > 82
    failure_mask = failure_mask | temp_failures
    
    # Scenario 2: High vibration causes failures
    vib_magnitude = np.sqrt(df['vibration_x']**2 + df['vibration_y']**2 + df['vibration_z']**2)
    vib_failures = vib_magnitude > 3.5
    failure_mask = failure_mask | vib_failures
    
    # Scenario 3: Pressure spikes
    pressure_failures = df['pressure_bar'] > 18
    failure_mask = failure_mask | pressure_failures
    
    # Scenario 4: pH out of range (HACCP violation)
    ph_failures = (df['ph_level'] < 5.5) | (df['ph_level'] > 7.5)
    failure_mask = failure_mask | ph_failures
    
    # Scenario 5: Contamination detected
    contaminant_failures = df['vision_contaminant_score'] > 0.01
    failure_mask = failure_mask | contaminant_failures
    
    # Scenario 6: Safety hazards
    safety_failures = (df['co2_ppm'] > 1000) | (df['voc_ppm'] > 8)
    failure_mask = failure_mask | safety_failures
    
    # Create target variable
    df['failure_risk'] = failure_mask.astype(int)
    
    # Add some noise (not all anomalies cause immediate failure)
    noise = np.random.random(n_samples) < 0.1
    df.loc[noise & failure_mask, 'failure_risk'] = 0
    df.loc[noise & ~failure_mask, 'failure_risk'] = 1
    
    # Add derived features
    df['vibration_magnitude'] = vib_magnitude
    df['flow_rate_normalized'] = df['flow_rate_lph'] / 1200
    df['temperature_deviation'] = abs(df['temperature_c'] - 75)
    
    # Calculate risk scores for training
    df['safety_risk_score'] = (
        (df['co2_ppm'] > 800).astype(int) * 0.4 +
        (df['voc_ppm'] > 5).astype(int) * 0.3 +
        (df['pressure_bar'] > 17).astype(int) * 0.3
    )
    
    df['quality_risk_score'] = (
        ((df['ph_level'] < 5.5) | (df['ph_level'] > 7.5)).astype(int) * 0.4 +
        (df['vision_defect_score'] > 0.05).astype(int) * 0.3 +
        (df['vision_contaminant_score'] > 0.005).astype(int) * 0.3
    )
    
    print(f"‚úÖ Created {n_samples} samples")
    print(f"üìä Failure rate: {df['failure_risk'].mean():.1%}")
    print(f"üìà Safety risk average: {df['safety_risk_score'].mean():.3f}")
    print(f"üìà Quality risk average: {df['quality_risk_score'].mean():.3f}")
    
    # Save to CSV
    df.to_csv(output_path, index=False)
    print(f"üíæ Saved to {output_path}")
    
    # Create a smaller test dataset
    test_df = df.sample(1000, random_state=42)
    test_df.to_csv('test_data_autonomous.csv', index=False)
    print(f"üíæ Test data saved to test_data_autonomous.csv")
    
    return df

if __name__ == "__main__":
    df = create_autonomous_training_data()

üéØ Creating autonomous manufacturing training data...
‚úÖ Created 10000 samples
üìä Failure rate: 13.6%
üìà Safety risk average: 0.029
üìà Quality risk average: 0.049
üíæ Saved to training_data_autonomous.csv
üíæ Test data saved to test_data_autonomous.csv


In [2]:
# train_autonomous_model.py
import pandas as pd
import numpy as np
import joblib
import json
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')

def train_autonomous_model():
    """Train the complete autonomous manufacturing model"""
    
    print("="*60)
    print("üöÄ TRAINING AUTONOMOUS MANUFACTURING MODEL")
    print("="*60)
    
    # Load or create training data
    try:
        df = pd.read_csv('training_data_autonomous.csv')
        print(f"üì• Loaded {len(df)} samples from training_data_autonomous.csv")
    except:
        print("üìù Creating new training data...")
        from create_training_data import create_autonomous_training_data
        df = create_autonomous_training_data()
    
    # ===== DEFINE ALL FEATURES =====
    base_features = [
        # Core maintenance
        'temperature_c', 'pressure_bar', 'ph_level', 'flow_rate_lph',
        
        # Equipment health
        'vibration_x', 'vibration_y', 'vibration_z',
        'ultrasound_leak_db', 'acoustic_emission',
        
        # Quality & process
        'orp_redox_mv', 'humidity_rh',
        'vision_defect_score', 'vision_contaminant_score',
        
        # Safety & environment
        'co2_ppm', 'o2_percent', 'voc_ppm',
        'differential_pressure_bar', 'refrigerant_pressure_bar'
    ]
    
    # Add derived features if they exist
    derived_features = ['vibration_magnitude', 'flow_rate_normalized', 'temperature_deviation']
    available_derived = [f for f in derived_features if f in df.columns]
    
    # Combine all features
    all_features = base_features + available_derived
    print(f"üîß Using {len(all_features)} features for training")
    print(f"   Features: {all_features}")
    
    # Prepare training data
    X = df[all_features].fillna(0)
    y = df['failure_risk']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )
    
    print(f"\nüìä Data split:")
    print(f"   Training samples: {len(X_train)}")
    print(f"   Test samples: {len(X_test)}")
    print(f"   Failure rate in training: {y_train.mean():.2%}")
    print(f"   Failure rate in test: {y_test.mean():.2%}")
    
    # Scale features
    print("\n‚öôÔ∏è  Scaling features...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train model
    print("üéØ Training RandomForest model...")
    model = RandomForestClassifier(
        n_estimators=200,
        max_depth=20,
        min_samples_split=5,
        min_samples_leaf=2,
        random_state=42,
        n_jobs=-1,
        class_weight='balanced'
    )
    
    model.fit(X_train_scaled, y_train)
    
    # ===== EVALUATE MODEL =====
    print("\nüìà Evaluating model performance...")
    
    # Predictions
    y_pred = model.predict(X_test_scaled)
    y_pred_proba = model.predict_proba(X_test_scaled)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='roc_auc')
    
    print(f"‚úÖ Test Accuracy: {accuracy:.3f}")
    print(f"‚úÖ Cross-validation AUC: {cv_scores.mean():.3f} (¬±{cv_scores.std():.3f})")
    
    # Feature importance
    feature_importance = pd.DataFrame({
        'feature': all_features,
        'importance': model.feature_importances_
    }).sort_values('importance', ascending=False)
    
    print("\nüîù Top 10 most important features:")
    for i, row in feature_importance.head(10).iterrows():
        print(f"   {row['feature']}: {row['importance']:.3f}")
    
    # ===== SAVE MODEL & ARTIFACTS =====
    print("\nüíæ Saving model package...")
    
    # Create model directory
    import os
    model_dir = 'autonomous_model_package'
    os.makedirs(model_dir, exist_ok=True)
    
    # 1. Save the model
    model_path = os.path.join(model_dir, 'autonomous_model.joblib')
    joblib.dump(model, model_path)
    print(f"‚úÖ Model saved to {model_path}")
    
    # 2. Save the scaler
    scaler_path = os.path.join(model_dir, 'scaler.joblib')
    joblib.dump(scaler, scaler_path)
    print(f"‚úÖ Scaler saved to {scaler_path}")
    
    # 3. Save feature metadata
    features_metadata = {
        'features': all_features,
        'feature_importance': feature_importance.to_dict('records'),
        'training_date': pd.Timestamp.now().isoformat(),
        'model_performance': {
            'accuracy': float(accuracy),
            'auc_mean': float(cv_scores.mean()),
            'auc_std': float(cv_scores.std()),
            'n_training_samples': len(X_train),
            'n_features': len(all_features)
        },
        'default_values': {
            'temperature_c': 75.0,
            'pressure_bar': 15.0,
            'ph_level': 6.8,
            'flow_rate_lph': 1200.0,
            'vibration_x': 0.0,
            'vibration_y': 0.0,
            'vibration_z': 0.0,
            'ultrasound_leak_db': 0.0,
            'acoustic_emission': 0.0,
            'orp_redox_mv': 400.0,
            'humidity_rh': 55.0,
            'vision_defect_score': 0.0,
            'vision_contaminant_score': 0.0,
            'co2_ppm': 420.0,
            'o2_percent': 20.8,
            'voc_ppm': 0.0,
            'differential_pressure_bar': 0.1,
            'refrigerant_pressure_bar': 8.5
        }
    }
    
    features_path = os.path.join(model_dir, 'features.json')
    with open(features_path, 'w') as f:
        json.dump(features_metadata, f, indent=2)
    print(f"‚úÖ Features metadata saved to {features_path}")
    
    # 4. Save production_inference.py
    inference_code = '''
# production_inference.py - Autonomous Manufacturing Model
# [PASTE THE ENTIRE production_inference.py CODE HERE]
'''
    
    inference_path = os.path.join(model_dir, 'production_inference.py')
    with open(inference_path, 'w') as f:
        # You'll need to copy the production_inference.py code here
        # For now, we'll create a placeholder
        f.write("# Autonomous Manufacturing Inference Script\n")
        f.write("# Replace with the full production_inference.py code\n")
    print(f"‚úÖ Inference script saved to {inference_path}")
    
    # 5. Create model.tar.gz for SageMaker
    print("\nüì¶ Creating SageMaker deployment package...")
    import tarfile
    
    # Create tar.gz archive
    with tarfile.open('model.tar.gz', 'w:gz') as tar:
        tar.add(model_dir, arcname='.')
    
    print(f"‚úÖ Created model.tar.gz ({os.path.getsize('model.tar.gz') / 1024 / 1024:.1f} MB)")
    
    # 6. Test the model locally
    print("\nüß™ Testing model locally...")
    
    # Create a test sample
    test_sample = {
        'temperature_c': 79.5,
        'pressure_bar': 15.2,
        'ph_level': 6.5,
        'flow_rate_lph': 1250,
        'vibration_x': 2.3,
        'vibration_y': 1.8,
        'vibration_z': 3.1,
        'ultrasound_leak_db': 45.2,
        'acoustic_emission': 0.85,
        'orp_redox_mv': 450,
        'humidity_rh': 65.2,
        'vision_defect_score': 0.02,
        'vision_contaminant_score': 0.001,
        'co2_ppm': 850,
        'o2_percent': 20.8,
        'voc_ppm': 2.1,
        'differential_pressure_bar': 0.15,
        'refrigerant_pressure_bar': 8.2
    }
    
    # Prepare features in correct order
    X_test_sample = np.array([[test_sample.get(f, 0) for f in all_features]])
    X_test_scaled_sample = scaler.transform(X_test_sample)
    
    prediction = model.predict(X_test_scaled_sample)[0]
    probability = model.predict_proba(X_test_scaled_sample)[0]
    
    print(f"\nüìä Test prediction:")
    print(f"   Prediction: {'FAILURE' if prediction == 1 else 'NORMAL'}")
    print(f"   Failure probability: {probability[1]:.3f}")
    print(f"   Confidence: {np.max(probability):.3f}")
    
    # Create example payload
    example_payload = {
        'features': all_features,
        'example_input': test_sample,
        'expected_output': {
            'prediction': int(prediction),
            'probability_failure': float(probability[1]),
            'confidence': float(np.max(probability))
        }
    }
    
    example_path = os.path.join(model_dir, 'example_payload.json')
    with open(example_path, 'w') as f:
        json.dump(example_payload, f, indent=2)
    print(f"‚úÖ Example payload saved to {example_path}")
    
    print("\n" + "="*60)
    print("üéâ AUTONOMOUS MANUFACTURING MODEL TRAINING COMPLETE!")
    print("="*60)
    print(f"\nüìä Model Performance: {accuracy:.1%} accuracy")
    print(f"üìÅ Model package: {model_dir}/")
    print(f"üì¶ Deployment package: model.tar.gz")
    print(f"üîß Features: {len(all_features)}")
    
    print(f"\nüöÄ Next steps:")
    print(f"   1. Copy production_inference.py into {model_dir}/")
    print(f"   2. Upload model.tar.gz to S3")
    print(f"   3. Deploy to SageMaker endpoint")
    print(f"   4. Update Lambda to use new endpoint")
    print("="*60)
    
    return model, scaler, all_features

if __name__ == "__main__":
    model, scaler, features = train_autonomous_model()

üöÄ TRAINING AUTONOMOUS MANUFACTURING MODEL
üì• Loaded 10000 samples from training_data_autonomous.csv
üîß Using 21 features for training
   Features: ['temperature_c', 'pressure_bar', 'ph_level', 'flow_rate_lph', 'vibration_x', 'vibration_y', 'vibration_z', 'ultrasound_leak_db', 'acoustic_emission', 'orp_redox_mv', 'humidity_rh', 'vision_defect_score', 'vision_contaminant_score', 'co2_ppm', 'o2_percent', 'voc_ppm', 'differential_pressure_bar', 'refrigerant_pressure_bar', 'vibration_magnitude', 'flow_rate_normalized', 'temperature_deviation']

üìä Data split:
   Training samples: 8000
   Test samples: 2000
   Failure rate in training: 13.55%
   Failure rate in test: 13.55%

‚öôÔ∏è  Scaling features...
üéØ Training RandomForest model...

üìà Evaluating model performance...
‚úÖ Test Accuracy: 0.900
‚úÖ Cross-validation AUC: 0.641 (¬±0.026)

üîù Top 10 most important features:
   pressure_bar: 0.139
   vision_contaminant_score: 0.076
   temperature_c: 0.069
   temperature_deviation

In [None]:
SAGEMAKER_ENDPOINT_NAME = "autonomous-manufacturing-v1"  # Changed!

In [4]:
# Updated deployment script with correct image
import boto3
import json

def deploy_with_fixed_image():
    """Deployment with correct ECR image"""
    
    print("="*60)
    print("üöÄ DEPLOYING WITH CORRECT ECR IMAGE")
    print("="*60)
    
    # Configuration
    MODEL_S3 = "s3://sagemaker-eu-north-1-976792586723/models/final-model-v2.tar.gz" 
    ENDPOINT_NAME = "autonomous-manufacturing-v1"
    REGION = "eu-north-1"
    ROLE_ARN = "arn:aws:iam::976792586723:role/AmazonSageMaker-ExecutionRole-20260207T095196"
    
    # CORRECT IMAGE FOR eu-north-1 REGION
    ECR_IMAGE = "763104351884.dkr.ecr.eu-north-1.amazonaws.com/sklearn:0.23-1-cpu-py3"
    
    print(f"üì¶ Model: {MODEL_S3}")
    print(f"üè∑Ô∏è  Endpoint: {ENDPOINT_NAME}")
    print(f"üê≥ ECR Image: {ECR_IMAGE}")
    
    sm = boto3.client('sagemaker', region_name=REGION)
    
    try:
        # Create Model
        print("\n1. Creating model...")
        sm.create_model(
            ModelName='autonomous-manufacturing-model',
            ExecutionRoleArn=ROLE_ARN,
            PrimaryContainer={
                'Image': ECR_IMAGE,  # FIXED IMAGE
                'ModelDataUrl': MODEL_S3,
                'Environment': {
                    'SAGEMAKER_PROGRAM': 'production_inference.py'
                }
            }
        )
        print("   ‚úÖ Model created")
        
        # Rest of your code...
        
    except Exception as e:
        print(f"\n‚ùå Error: {e}")
        return False

if __name__ == "__main__":
    deploy_with_fixed_image()

üöÄ DEPLOYING WITH CORRECT ECR IMAGE
üì¶ Model: s3://sagemaker-eu-north-1-976792586723/models/final-model-v2.tar.gz
üè∑Ô∏è  Endpoint: autonomous-manufacturing-v1
üê≥ ECR Image: 763104351884.dkr.ecr.eu-north-1.amazonaws.com/sklearn:0.23-1-cpu-py3

1. Creating model...

‚ùå Error: An error occurred (UnrecognizedClientException) when calling the CreateModel operation: The security token included in the request is invalid.


In [5]:
from sagemaker.sklearn.model import SKLearnModel
from sagemaker import Session
import boto3

print("üöÄ TRYING SAGEMAKER SDK APPROACH")

# Initialize session
session = Session(boto_session=boto3.Session(region_name='eu-north-1'))

# Create model using SDK
sklearn_model = SKLearnModel(
    model_data="s3://sagemaker-eu-north-1-976792586723/models/final-model-v2.tar.gz",
    role="arn:aws:iam::976792586723:role/AmazonSageMaker-ExecutionRole-20260207T095196",
    entry_point="production_inference.py",
    framework_version="0.23-1",
    py_version="py3",
    sagemaker_session=session
)

try:
    print("üì¶ Creating model with SDK...")
    # Just create model first (not deploy)
    sklearn_model.create(
        instance_type='ml.m5.large',
        accelerator_type=None
    )
    print("‚úÖ Model created with SDK!")
    
    # Now deploy
    print("üöÄ Deploying endpoint...")
    predictor = sklearn_model.deploy(
        initial_instance_count=1,
        instance_type='ml.m5.large',
        endpoint_name='autonomous-manufacturing-sdk-test'
    )
    print("üéâ DEPLOYMENT SUCCESSFUL!")
    
except Exception as e:
    print(f"‚ùå SDK failed: {e}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
üöÄ TRYING SAGEMAKER SDK APPROACH
üì¶ Creating model with SDK...
‚ùå SDK failed: An error occurred (InvalidClientTokenId) when calling the GetCallerIdentity operation: The security token included in the request is invalid.
