# 8. Model Deployment: Production-Ready House Price Predictor

**Objectives:**
1. **Save the Champion Model**: Persist the best performing model for production use
2. **Create Prediction Pipeline**: Build a reusable prediction function
3. **Model Validation**: Final validation on unseen data
4. **Deployment Preparation**: Package everything for production deployment

---

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Settings
sns.set_theme(style="whitegrid", palette="viridis")
plt.rcParams['figure.figsize'] = (12, 8)

print("üöÄ Starting Model Deployment Pipeline")
print("="*50)

In [None]:
# --- 8.1 Load and Validate Champion Model ---
print("8.1 LOADING CHAMPION MODEL")
print("-"*30)

# Assuming we have the best model from previous sections
# If running standalone, load the saved model
try:
    # Try to load from saved model
    champion_model = joblib.load('champion_model.pkl')
    print("‚úÖ Loaded saved champion model")
except:
    # If no saved model, use the best_model from previous sections
    if 'best_model' in globals():
        champion_model = best_model
        champion_name = best_model_name
        print(f"‚úÖ Using champion model: {champion_name}")
    else:
        print("‚ùå No model found. Please run the model training sections first.")
        raise ValueError("Champion model not available")

# Display model info
print(f"Champion Model: {champion_name}")
print(f"Model Type: {type(champion_model.named_steps['model']).__name__}")

In [None]:
# --- 8.2 Final Model Validation ---
print("\n8.2 FINAL MODEL VALIDATION")
print("-"*30)

# Load fresh test data (simulate production scenario)
df_fresh = pd.read_csv('../../../data/processed/merged.csv')

# Apply same preprocessing as training
df_fresh['city'] = df_fresh['city'].astype(str).str.strip().str.lower()
df_fresh['region'] = df_fresh['region'].astype(str).str.strip().str.lower()
df_fresh['size'] = df_fresh['size'].replace(-1, np.nan)
df_fresh['room_count'] = df_fresh['room_count'].replace(-1, np.nan)
df_fresh['bathroom_count'] = df_fresh['bathroom_count'].replace(-1, np.nan)

# Apply region cleaning (from section 1)
df_fresh = clean_region_names(df_fresh, region_column='region')

# Scope filter
df_fresh = df_fresh[
    (df_fresh['city'].isin(['tunis', 'ariana', 'ben arous', 'la manouba']))
].copy()

# IQR cleaning (from section 1)
df_fresh['price_per_m2'] = df_fresh['price'] / df_fresh['size']
df_fresh = df_fresh.groupby('city', group_keys=False).apply(remove_outliers_iqr)

# KNN imputation (from section 2)
df_fresh = impute_region_with_knn(df_fresh)

# Feature engineering (from section 3)
df_fresh['avg_room_size'] = df_fresh['size'] / df_fresh['room_count']
df_fresh['log_price'] = np.log1p(df_fresh['price'])

# Prepare features
features = ['city','region' ,'size', 'room_count', 'bathroom_count', 'avg_room_size']
X_fresh = df_fresh[features]
y_fresh = df_fresh['log_price']

# Split for validation
from sklearn.model_selection import train_test_split
_, X_prod_test, _, y_prod_test = train_test_split(
    X_fresh, y_fresh, test_size=0.2, random_state=123  # Different seed for production simulation
)

print(f"Production Test Set: {X_prod_test.shape[0]} samples")
print(f"Features: {X_prod_test.shape[1]}")

# Make predictions
y_pred_prod = champion_model.predict(X_prod_test)
y_pred_prod_real = np.expm1(y_pred_prod)
y_prod_real = np.expm1(y_prod_test)

# Calculate metrics
r2_prod = r2_score(y_prod_test, y_pred_prod)
mae_prod = mean_absolute_error(y_prod_real, y_pred_prod_real)
rmse_prod = np.sqrt(mean_squared_error(y_prod_real, y_pred_prod_real))

print("
üìä Production Validation Results:")
print(f"   R¬≤ Score: {r2_prod:.4f}")
print(f"   MAE: {mae_prod:,.0f} TND")
print(f"   RMSE: {rmse_prod:,.0f} TND")
print(f"   Mean Price: {y_prod_real.mean():,.0f} TND")
print(f"   MAE as % of Mean Price: {(mae_prod/y_prod_real.mean()*100):.1f}%")

In [None]:
# --- 8.3 Save Champion Model ---
print("\n8.3 SAVING CHAMPION MODEL")
print("-"*30)

# Save the model
model_filename = f'champion_model_{champion_name.replace(" ", "_").lower()}.pkl'
joblib.dump(champion_model, model_filename)

# Save model metadata
model_metadata = {
    'model_name': champion_name,
    'r2_score': r2_prod,
    'mae': mae_prod,
    'rmse': rmse_prod,
    'features': features,
    'preprocessing': 'RobustScaler + OneHotEncoder',
    'target_transformation': 'log1p',
    'creation_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
    'version': '2.0'
}

metadata_filename = f'model_metadata_{champion_name.replace(" ", "_").lower()}.json'
import json
with open(metadata_filename, 'w') as f:
    json.dump(model_metadata, f, indent=4)

print(f"‚úÖ Model saved as: {model_filename}")
print(f"‚úÖ Metadata saved as: {metadata_filename}")
print(f"üìÅ File sizes: {pd.Series([model_filename, metadata_filename]).apply(lambda x: f'{x}: {pd.Series([x]).apply(lambda f: f\"{os.path.getsize(f)/1024/1024:.2f} MB\" if os.path.exists(f) else \"N/A\").iloc[0]}').to_dict()}")

In [None]:
# --- 8.4 Create Prediction Function ---
print("\n8.4 CREATING PREDICTION FUNCTION")
print("-"*30)

def predict_house_price(city, region, size, room_count, bathroom_count, model_path='champion_model.pkl'):
    """
    Predict house price using the trained model.
    
    Parameters:
    -----------
    city : str
        City name (tunis, ariana, ben arous, la manouba)
    region : str
        Region/neighborhood name
    size : float
        House size in square meters
    room_count : int
        Number of rooms
    bathroom_count : int
        Number of bathrooms
    model_path : str
        Path to saved model file
        
    Returns:
    --------
    dict : Prediction results with confidence intervals
    """
    
    # Load model
    model = joblib.load(model_path)
    
    # Prepare input data
    input_data = pd.DataFrame({
        'city': [str(city).lower().strip()],
        'region': [str(region).lower().strip()],
        'size': [float(size)],
        'room_count': [int(room_count)],
        'bathroom_count': [int(bathroom_count)]
    })
    
    # Add engineered features
    input_data['avg_room_size'] = input_data['size'] / input_data['room_count']
    
    # Make prediction
    log_price_pred = model.predict(input_data)[0]
    price_pred = np.expm1(log_price_pred)
    
    # Estimate confidence interval (rough approximation)
    # Using RMSE from validation as error estimate
    error_margin = rmse_prod * 1.96  # 95% confidence interval
    
    return {
        'predicted_price': round(price_pred, 2),
        'confidence_interval': {
            'lower': round(max(0, price_pred - error_margin), 2),
            'upper': round(price_pred + error_margin, 2)
        },
        'price_per_m2': round(price_pred / size, 2),
        'input_features': {
            'city': city,
            'region': region,
            'size': size,
            'room_count': room_count,
            'bathroom_count': bathroom_count
        },
        'model_info': champion_name,
        'r2_score': round(r2_prod, 4)
    }

# Test the prediction function
print("üß™ Testing Prediction Function:")
test_prediction = predict_house_price(
    city='tunis',
    region='centre ville',
    size=120,
    room_count=3,
    bathroom_count=2
)

print(f"üè† Test Property: {test_prediction['input_features']}")
print(f"üí∞ Predicted Price: {test_prediction['predicted_price']:,.0f} TND")
print(f"üìä Price per m¬≤: {test_prediction['price_per_m2']:,.0f} TND")
print(f"üéØ Confidence Interval: {test_prediction['confidence_interval']['lower']:,.0f} - {test_prediction['confidence_interval']['upper']:,.0f} TND")

In [None]:
# --- 8.5 Create Simple Web Interface (Streamlit) ---
print("\n8.5 CREATING WEB INTERFACE")
print("-"*30)

streamlit_code = '''
import streamlit as st
import pandas as pd
import joblib
import json

# Load model and metadata
@st.cache_resource
def load_model():
    model = joblib.load('champion_model.pkl')
    with open('model_metadata.json', 'r') as f:
        metadata = json.load(f)
    return model, metadata

def predict_price(city, region, size, rooms, bathrooms):
    """Prediction function"""
    # Load model
    model, metadata = load_model()
    
    # Prepare data
    data = pd.DataFrame({
        'city': [city.lower()],
        'region': [region.lower()],
        'size': [size],
        'room_count': [rooms],
        'bathroom_count': [bathrooms]
    })
    data['avg_room_size'] = data['size'] / data['room_count']
    
    # Predict
    log_pred = model.predict(data)[0]
    price_pred = np.expm1(log_pred)
    
    return price_pred

# Streamlit UI
st.title("üè† Grand Tunis House Price Predictor")
st.markdown("**Expert AI Model for Real Estate Valuation**")

# Sidebar
st.sidebar.header("Model Information")
model, metadata = load_model()
st.sidebar.metric("Model R¬≤ Score", f"{metadata['r2_score']:.3f}")
st.sidebar.metric("Mean Absolute Error", f"{metadata['mae']:,.0f} TND")

# Main interface
col1, col2 = st.columns(2)

with col1:
    st.subheader("Property Details")
    city = st.selectbox("City", ["tunis", "ariana", "ben arous", "la manouba"])
    region = st.text_input("Region/Neighborhood", "centre ville")
    size = st.slider("Size (m¬≤)", 30, 500, 100)
    rooms = st.slider("Number of Rooms", 1, 10, 3)
    bathrooms = st.slider("Number of Bathrooms", 1, 5, 2)

with col2:
    st.subheader("Prediction Results")
    if st.button("üîÆ Predict Price", type="primary"):
        try:
            prediction = predict_price(city, region, size, rooms, bathrooms)
            
            st.metric("üè† Estimated Price", f"{prediction:,.0f} TND")
            st.metric("üìä Price per m¬≤", f"{prediction/size:,.0f} TND")
            
            # Confidence interval
            error_margin = metadata['rmse'] * 1.96
            st.metric("üéØ Confidence Range", 
                     f"{max(0, prediction-error_margin):,.0f} - {prediction+error_margin:,.0f} TND")
            
        except Exception as e:
            st.error(f"Prediction failed: {str(e)}")

# Footer
st.markdown("---")
st.markdown("*Built with advanced machine learning for accurate real estate valuation*")
'''

# Save Streamlit app
with open('house_price_predictor.py', 'w', encoding='utf-8') as f:
    f.write(streamlit_code)

print("‚úÖ Created Streamlit web app: house_price_predictor.py")
print("üöÄ To run: streamlit run house_price_predictor.py")

In [None]:
# --- 8.6 Create API Endpoint (FastAPI) ---
print("\n8.6 CREATING API ENDPOINT")
print("-"*30)

fastapi_code = '''
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import pandas as pd
import numpy as np
import json
from typing import Dict, Any

app = FastAPI(title="Grand Tunis House Price Predictor API",
              description="AI-powered real estate valuation for Grand Tunis",
              version="2.0")

# Load model and metadata
@app.on_event("startup")
async def load_model():
    global model, metadata
    model = joblib.load('champion_model.pkl')
    with open('model_metadata.json', 'r') as f:
        metadata = json.load(f)

class HouseFeatures(BaseModel):
    city: str
    region: str
    size: float
    room_count: int
    bathroom_count: int

class PredictionResponse(BaseModel):
    predicted_price: float
    confidence_interval: Dict[str, float]
    price_per_m2: float
    input_features: Dict[str, Any]
    model_info: str
    r2_score: float

@app.get("/")
async def root():
    return {
        "message": "Grand Tunis House Price Predictor API",
        "version": "2.0",
        "model": metadata.get('model_name', 'Unknown'),
        "r2_score": metadata.get('r2_score', 0)
    }

@app.post("/predict", response_model=PredictionResponse)
async def predict_price(features: HouseFeatures):
    try:
        # Prepare input data
        input_data = pd.DataFrame({
            'city': [features.city.lower().strip()],
            'region': [features.region.lower().strip()],
            'size': [features.size],
            'room_count': [features.room_count],
            'bathroom_count': [features.bathroom_count]
        })
        
        # Add engineered features
        input_data['avg_room_size'] = input_data['size'] / input_data['room_count']
        
        # Make prediction
        log_price_pred = model.predict(input_data)[0]
        price_pred = np.expm1(log_price_pred)
        
        # Confidence interval
        error_margin = metadata['rmse'] * 1.96
        confidence_lower = max(0, price_pred - error_margin)
        confidence_upper = price_pred + error_margin
        
        return PredictionResponse(
            predicted_price=round(price_pred, 2),
            confidence_interval={
                "lower": round(confidence_lower, 2),
                "upper": round(confidence_upper, 2)
            },
            price_per_m2=round(price_pred / features.size, 2),
            input_features={
                "city": features.city,
                "region": features.region,
                "size": features.size,
                "room_count": features.room_count,
                "bathroom_count": features.bathroom_count
            },
            model_info=metadata.get('model_name', 'Unknown'),
            r2_score=round(metadata.get('r2_score', 0), 4)
        )
        
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Prediction failed: {str(e)}")

@app.get("/health")
async def health_check():
    return {"status": "healthy", "model_loaded": True}
'''

# Save FastAPI app
with open('api.py', 'w', encoding='utf-8') as f:
    f.write(fastapi_code)

print("‚úÖ Created FastAPI endpoint: api.py")
print("üöÄ To run: uvicorn api:app --reload")
print("üìñ API docs: http://localhost:8000/docs")

In [None]:
# --- 8.7 Deployment Instructions ---
print("\n8.7 DEPLOYMENT INSTRUCTIONS")
print("-"*30)

deployment_instructions = '''
# Grand Tunis House Price Predictor - Deployment Guide

## Files Created:
- `champion_model.pkl` - Trained model
- `model_metadata.json` - Model information
- `house_price_predictor.py` - Streamlit web app
- `api.py` - FastAPI REST API

## Local Deployment:

### 1. Web App (Streamlit):
```bash
pip install streamlit
streamlit run house_price_predictor.py
```
Visit: http://localhost:8501

### 2. API (FastAPI):
```bash
pip install fastapi uvicorn
uvicorn api:app --reload
```
API: http://localhost:8000
Docs: http://localhost:8000/docs

## Cloud Deployment:

### Heroku (Web App):
1. Create `requirements.txt`:
```
streamlit
pandas
scikit-learn
joblib
numpy
matplotlib
seaborn
```

2. Create `Procfile`:
```
web: streamlit run house_price_predictor.py --server.port $PORT
```

3. Deploy to Heroku

### AWS/Docker (API):
1. Create `Dockerfile`:
```dockerfile
FROM python:3.9-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
```

2. Build and deploy to AWS ECS/Fargate

## Usage Examples:

### API Call:
```python
import requests

response = requests.post("http://localhost:8000/predict", json={
    "city": "tunis",
    "region": "centre ville",
    "size": 120,
    "room_count": 3,
    "bathroom_count": 2
})

prediction = response.json()
print(f"Predicted Price: {prediction['predicted_price']:,} TND")
```

### Model Performance:
- R¬≤ Score: {r2_prod:.4f}
- MAE: {mae_prod:,.0f} TND
- Suitable for production use in real estate applications
'''

# Save deployment instructions
with open('DEPLOYMENT_README.md', 'w', encoding='utf-8') as f:
    f.write(deployment_instructions.format(r2_prod=r2_prod, mae_prod=mae_prod))

print("‚úÖ Created deployment guide: DEPLOYMENT_README.md")
print("\nüéâ DEPLOYMENT PIPELINE COMPLETE!")
print("="*50)
print("Your house price predictor is ready for production!")
print("üìÅ Check the generated files in this folder")

In [None]:
# --- 8.8 Final Summary ---
print("\n8.8 DEPLOYMENT SUMMARY")
print("-"*30)

print("üèÜ Champion Model Deployed:")
print(f"   Model: {champion_name}")
print(f"   R¬≤ Score: {r2_prod:.4f}")
print(f"   MAE: {mae_prod:,.0f} TND")

print("\nüì¶ Deployment Assets Created:")
print("   ‚úÖ champion_model.pkl (model file)")
print("   ‚úÖ model_metadata.json (model info)")
print("   ‚úÖ house_price_predictor.py (web app)")
print("   ‚úÖ api.py (REST API)")
print("   ‚úÖ DEPLOYMENT_README.md (instructions)")

print("\nüöÄ Ready for Production!")
print("   - Web interface for users")
print("   - REST API for integrations")
print("   - Complete deployment guide")
print("   - Model validation complete")

print("\n" + "="*50)
print("üéØ MISSION ACCOMPLISHED: House Price Predictor Deployed!")
print("="*50)