# Deployment Preparation & Verification

This notebook simulates the deployment environment and verifies the application logic.

## Objectives:
- Verify `app/streamlit_app.py` dependencies
- Test model loading in a simulated production environment
- Generate sample predictions
- Check `submission.json` validity

In [1]:
import os
import sys
import joblib
import json
import pandas as pd
import numpy as np

print("✓ Environment check passed")

✓ Environment check passed


## 1. Verify Model Existence

In [2]:
required_files = [
    '../models/best_model.pkl',
    '../data/processed/feature_names.json',
    '../models/scaler.pkl',
    '../data/processed/feature_info.json',
    '../submission.json'
]

missing = []
for f in required_files:
    if not os.path.exists(f):
        print(f"❌ Missing: {f}")
        missing.append(f)
    else:
        print(f"✓ Found: {f}")

if missing:
    print("\nCRITICAL: Missing deployment files!")
else:
    print("\n✓ All deployment files present")

✓ Found: ../models/best_model.pkl
✓ Found: ../data/processed/feature_names.json
✓ Found: ../models/scaler.pkl
✓ Found: ../data/processed/feature_info.json
✓ Found: ../submission.json

✓ All deployment files present


## 2. Simulate Prediction Request

In [3]:
# Load model artifacts
model = joblib.load('../models/best_model.pkl')
scaler = joblib.load('../models/scaler.pkl')
with open('../data/processed/feature_names.json', 'r') as f:
    features = json.load(f)['feature_names']

print(f"Model: {type(model).__name__}")
print(f"Expected Features: {len(features)}")

# Create dummy input
dummy_input = pd.DataFrame(np.random.rand(1, len(features)), columns=features)
dummy_scaled = scaler.transform(dummy_input)

# Predict
try:
    pred = model.predict(dummy_scaled)
    prob = model.predict_proba(dummy_scaled)
    
    print(f"\nPrediction: {pred[0]}")
    print(f"Probability: {prob[0]}")
    print("✓ Inference test passed")
except Exception as e:
    print(f"❌ Inference failed: {e}")

Model: RandomForestClassifier
Expected Features: 39

Prediction: 1
Probability: [0.29390859 0.70609141]
✓ Inference test passed




## 3. Validate Submission JSON

In [4]:
try:
    with open('../submission.json', 'r') as f:
        submission = json.load(f)

    print(json.dumps(submission, indent=4))

    # Validation checks
    if submission.get('roc_auc', 0) < 0.75:
        print("\n⚠️  ROC-AUC matches requirement?")
    else:
        print("\n✓ ROC-AUC meets requirement")
except FileNotFoundError:
    print("⚠️  submission.json not found yet")

{
    "student_name": "Rushikesh",
    "project_name": "Customer Churn Prediction System",
    "model_name": "Random Forest (SMOTE Balanced)",
    "roc_auc": 0.7517,
    "accuracy": 0.7033,
    "precision": 0.6882,
    "recall": 0.7479,
    "f1_score": 0.7168,
    "final_status": "Completed - Target Met (ROC-AUC > 0.75)",
    "features_engineered": [
        "Recency",
        "Frequency",
        "TotalSpent",
        "FrequencyTrend",
        "SpendTrend",
        "Freq_x_Spend",
        "Active_Freq",
        "Spend_per_Item",
        "ProductDiversityScore",
        "CustomerSegment"
    ],
    "requirements_met": [
        "Data Pipeline (Acquisition, Cleaning, Feature Eng)",
        "Model Training (Baseline, SMOTE, Tuning, Ensemble)",
        "Evaluation (ROC-AUC >= 0.75 Met!)",
        "Deployment Logic (Streamlit App)",
        "Documentation (README, Notebooks)"
    ],
    "limitations": "Model optimized for recall to capture churners. False positive rate is acceptable for re