# üè† House Price Prediction - Interactive Jupyter Notebook

This notebook provides an interactive interface for predicting house prices using Random Forest.

## Step 1: Install Required Packages

In [None]:
# Run this cell first to install required packages
!pip install --upgrade ipywidgets pandas numpy scikit-learn matplotlib seaborn
print("\n‚úÖ Installation complete! Please restart the kernel if needed.")

## Step 2: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import pickle
import os
import warnings
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# Suppress warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úÖ All libraries imported successfully!")
print(f"üì¶ scikit-learn version: {__import__('sklearn').__version__}")

## Step 3: Define Training Function

In [None]:
def train_model(csv_path):
    """Train the Random Forest model"""
    print("üìä Loading data...")
    df = pd.read_csv(csv_path)
    print(f"‚úÖ Data loaded! Shape: {df.shape}")
    
    # Display first few rows
    print("\nüìã First 5 rows of data:")
    display(df.head())
    
    print("\nüîß Preprocessing data...")
    # Binary Encoding
    binary_cols = [
        'mainroad', 'guestroom', 'basement',
        'hotwaterheating', 'airconditioning', 'prefarea'
    ]
    
    for col in binary_cols:
        df[col] = df[col].map({'yes': 1, 'no': 0})
    
    # One-Hot Encoding
    df = pd.get_dummies(df, columns=['furnishingstatus'], drop_first=True)
    
    # Split features and target
    X = df.drop("price", axis=1)
    y = df["price"]
    
    print("üìä Splitting data (70% train, 30% test)...")
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)
    
    print("üå≤ Training Random Forest model...")
    # Train model
    rf_model = RandomForestRegressor(
        n_estimators=100,
        max_depth=None,
        random_state=42,
        n_jobs=-1
    )
    rf_model.fit(X_train, y_train)
    
    print("üìà Evaluating model...")
    # Calculate metrics
    y_pred = rf_model.predict(X_test)
    metrics = {
        'r2_score': r2_score(y_test, y_pred),
        'rmse': np.sqrt(mean_squared_error(y_test, y_pred)),
        'mae': mean_absolute_error(y_test, y_pred)
    }
    
    # Display metrics
    print("\n" + "="*50)
    print("üìä MODEL PERFORMANCE METRICS")
    print("="*50)
    print(f"R¬≤ Score:  {metrics['r2_score']:.4f}")
    print(f"RMSE:      ${metrics['rmse']:,.2f}")
    print(f"MAE:       ${metrics['mae']:,.2f}")
    print("="*50)
    
    # Save model with protocol for compatibility
    model_data = {
        'model': rf_model, 
        'metrics': metrics, 
        'feature_names': X.columns.tolist(),
        'sklearn_version': __import__('sklearn').__version__
    }
    
    try:
        # Use protocol 4 for better compatibility
        with open('house_price_model.pkl', 'wb') as f:
            pickle.dump(model_data, f, protocol=4)
        print("\nüíæ Model saved successfully!")
    except Exception as e:
        print(f"‚ö†Ô∏è Warning: Could not save model - {e}")
    
    # Plot feature importance
    plt.figure(figsize=(12, 6))
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Importance': rf_model.feature_importances_
    }).sort_values('Importance', ascending=False)
    
    sns.barplot(data=feature_importance, x='Importance', y='Feature')
    plt.title('Feature Importance in House Price Prediction', fontsize=16, fontweight='bold')
    plt.xlabel('Importance', fontsize=12)
    plt.ylabel('Feature', fontsize=12)
    plt.tight_layout()
    plt.show()
    
    return rf_model, metrics, X.columns.tolist()

def load_model_safe():
    """Safely load model with error handling"""
    try:
        with open('house_price_model.pkl', 'rb') as f:
            model_data = pickle.load(f)
        print("‚úÖ Model loaded successfully!")
        if 'sklearn_version' in model_data:
            print(f"   Model trained with scikit-learn: {model_data['sklearn_version']}")
        return model_data
    except ModuleNotFoundError as e:
        print(f"‚ùå Error loading model: {e}")
        print("\nüîß This usually means the model was saved with a different scikit-learn version.")
        print("   Solution: Retrain the model by running the next cell.")
        return None
    except FileNotFoundError:
        print("‚ö†Ô∏è No saved model found. Please train the model first.")
        return None
    except Exception as e:
        print(f"‚ùå Unexpected error: {e}")
        return None

print("‚úÖ Functions defined successfully!")

## Step 4: Load or Train Model

### Option A: Try to load existing model

In [None]:
# Try to load existing model
model_data = load_model_safe()

if model_data:
    model = model_data['model']
    metrics = model_data['metrics']
    feature_names = model_data['feature_names']
    
    print("\nüìä Model Performance:")
    print(f"R¬≤ Score: {metrics['r2_score']:.4f}")
    print(f"RMSE: ${metrics['rmse']:,.2f}")
    print(f"MAE: ${metrics['mae']:,.2f}")
    print("\n‚úÖ Model is ready! Skip to Step 5 to start predicting.")
else:
    print("\n‚ö†Ô∏è No model loaded. Please run Option B below to train a new model.")

### Option B: Train a new model (Run this if Option A failed)

In [None]:
# ‚ö†Ô∏è IMPORTANT: Update this path to your CSV file location
csv_file_path = 'House Price.csv'  # ‚Üê Change this to your file path

# Examples:
# csv_file_path = '/path/to/your/House Price.csv'
# csv_file_path = 'C:/Users/YourName/Downloads/House Price.csv'  # Windows
# csv_file_path = '/Users/YourName/Downloads/House Price.csv'   # Mac/Linux

print(f"üìÇ Looking for file: {csv_file_path}")

try:
    model, metrics, feature_names = train_model(csv_file_path)
    print("\nüéâ Model training complete!")
    print("‚úÖ You can now proceed to Step 5 for predictions.")
except FileNotFoundError:
    print(f"\n‚ùå Error: File '{csv_file_path}' not found!")
    print("\nüí° Solutions:")
    print("   1. Make sure the file exists in the current directory")
    print("   2. Update csv_file_path with the full path to your file")
    print("   3. Upload the file to the same directory as this notebook")
    print("\nüìç Current directory:", os.getcwd())
    print("üìÅ Files in current directory:")
    for file in os.listdir('.'):
        if file.endswith('.csv'):
            print(f"   ‚Ä¢ {file}")
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    import traceback
    traceback.print_exc()

## Step 5: Interactive Prediction Interface

### Run this cell to create the interactive controls

In [None]:
# Check if model is loaded
if 'model' not in globals():
    print("‚ùå Model not loaded! Please run Step 4 first.")
else:
    print("üéõÔ∏è Creating interactive controls...\n")

    # Create widgets
    area_slider = widgets.IntSlider(
        value=5000,
        min=500,
        max=20000,
        step=100,
        description='Area (sq ft):',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='500px')
    )

    bedrooms_slider = widgets.IntSlider(
        value=3,
        min=1,
        max=10,
        description='Bedrooms:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='500px')
    )

    bathrooms_slider = widgets.IntSlider(
        value=2,
        min=1,
        max=5,
        description='Bathrooms:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='500px')
    )

    stories_slider = widgets.IntSlider(
        value=2,
        min=1,
        max=4,
        description='Stories:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='500px')
    )

    parking_slider = widgets.IntSlider(
        value=2,
        min=0,
        max=5,
        description='Parking:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='500px')
    )

    # Binary inputs
    mainroad_check = widgets.Checkbox(value=True, description='Main Road Access')
    guestroom_check = widgets.Checkbox(value=False, description='Guest Room')
    basement_check = widgets.Checkbox(value=False, description='Basement')
    hotwaterheating_check = widgets.Checkbox(value=False, description='Hot Water Heating')
    airconditioning_check = widgets.Checkbox(value=True, description='Air Conditioning')
    prefarea_check = widgets.Checkbox(value=True, description='Preferred Area')

    # Furnishing status
    furnishing_dropdown = widgets.Dropdown(
        options=['furnished', 'semi-furnished', 'unfurnished'],
        value='furnished',
        description='Furnishing:',
        style={'description_width': 'initial'}
    )

    # Predict button
    predict_button = widgets.Button(
        description='üîÆ Predict Price',
        button_style='success',
        layout=widgets.Layout(width='200px', height='50px')
    )

    # Output area
    output_area = widgets.Output()

    def predict_price(b):
        with output_area:
            clear_output()
            
            # Create input dataframe
            input_data = {
                'area': area_slider.value,
                'bedrooms': bedrooms_slider.value,
                'bathrooms': bathrooms_slider.value,
                'stories': stories_slider.value,
                'mainroad': 1 if mainroad_check.value else 0,
                'guestroom': 1 if guestroom_check.value else 0,
                'basement': 1 if basement_check.value else 0,
                'hotwaterheating': 1 if hotwaterheating_check.value else 0,
                'airconditioning': 1 if airconditioning_check.value else 0,
                'parking': parking_slider.value,
                'prefarea': 1 if prefarea_check.value else 0,
                'furnishingstatus_semi-furnished': 1 if furnishing_dropdown.value == 'semi-furnished' else 0,
                'furnishingstatus_unfurnished': 1 if furnishing_dropdown.value == 'unfurnished' else 0
            }
            
            input_df = pd.DataFrame([input_data])
            
            # Make prediction
            prediction = model.predict(input_df)[0]
            
            # Display results
            display(HTML(f"""
            <div style='background-color: #e8f4f8; padding: 30px; border-radius: 10px; border: 3px solid #1f77b4; text-align: center;'>
                <h2 style='color: #555; margin: 0;'>Predicted House Price</h2>
                <h1 style='color: #1f77b4; font-size: 3em; margin: 10px 0;'>${prediction:,.2f}</h1>
            </div>
            """))
            
            # Display input summary
            print("\nüìã Input Summary:")
            print("="*50)
            display(input_df.T)
            
            # Create visualization
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
            
            # Plot 1: Feature values
            feature_values = pd.Series(input_data)
            feature_values.plot(kind='barh', ax=ax1, color='skyblue')
            ax1.set_title('Input Feature Values', fontsize=14, fontweight='bold')
            ax1.set_xlabel('Value', fontsize=12)
            ax1.grid(axis='x', alpha=0.3)
            
            # Plot 2: Feature importance
            importance_df = pd.DataFrame({
                'Feature': feature_names,
                'Importance': model.feature_importances_
            }).sort_values('Importance', ascending=False).head(10)
            
            importance_df.plot(x='Feature', y='Importance', kind='barh', ax=ax2, color='coral', legend=False)
            ax2.set_title('Top 10 Feature Importance', fontsize=14, fontweight='bold')
            ax2.set_xlabel('Importance', fontsize=12)
            ax2.grid(axis='x', alpha=0.3)
            
            plt.tight_layout()
            plt.show()

    predict_button.on_click(predict_price)

    # Display interface
    display(HTML("<h2 style='color: #1f77b4;'>üè† House Price Prediction Interface</h2>"))
    display(HTML("<h3>Numerical Features</h3>"))
    display(area_slider, bedrooms_slider, bathrooms_slider, stories_slider, parking_slider)

    display(HTML("<h3>Amenities</h3>"))
    display(widgets.HBox([mainroad_check, guestroom_check, basement_check]))
    display(widgets.HBox([hotwaterheating_check, airconditioning_check, prefarea_check]))

    display(HTML("<h3>Furnishing Status</h3>"))
    display(furnishing_dropdown)

    display(HTML("<br>"))
    display(predict_button)
    display(output_area)
    
    print("\n‚úÖ Interface ready! Adjust the values and click 'Predict Price' button above.")

## Step 6: Model Analysis (Optional)

In [None]:
if 'model' in globals():
    print("üå≤ Random Forest Model Information")
    print("="*50)
    print(f"Number of trees: {model.n_estimators}")
    print(f"Number of features: {model.n_features_in_}")
    print(f"Feature names: {feature_names}")
    print("\nüìä Model Performance:")
    print(f"R¬≤ Score: {metrics['r2_score']:.4f}")
    print(f"RMSE: ${metrics['rmse']:,.2f}")
    print(f"MAE: ${metrics['mae']:,.2f}")
else:
    print("‚ùå Model not loaded. Please run Step 4 first.")

## Step 7: Batch Predictions (Optional)

In [None]:
if 'model' in globals():
    # Example: Make predictions for multiple houses at once
    sample_houses = pd.DataFrame([
        {'area': 5000, 'bedrooms': 3, 'bathrooms': 2, 'stories': 2, 'mainroad': 1, 'guestroom': 0, 
         'basement': 0, 'hotwaterheating': 0, 'airconditioning': 1, 'parking': 2, 'prefarea': 1,
         'furnishingstatus_semi-furnished': 0, 'furnishingstatus_unfurnished': 0},
        {'area': 7500, 'bedrooms': 4, 'bathrooms': 3, 'stories': 3, 'mainroad': 1, 'guestroom': 1, 
         'basement': 1, 'hotwaterheating': 1, 'airconditioning': 1, 'parking': 3, 'prefarea': 1,
         'furnishingstatus_semi-furnished': 0, 'furnishingstatus_unfurnished': 0},
        {'area': 3000, 'bedrooms': 2, 'bathrooms': 1, 'stories': 1, 'mainroad': 0, 'guestroom': 0, 
         'basement': 0, 'hotwaterheating': 0, 'airconditioning': 0, 'parking': 1, 'prefarea': 0,
         'furnishingstatus_semi-furnished': 0, 'furnishingstatus_unfurnished': 1}
    ])

    predictions = model.predict(sample_houses)
    sample_houses['Predicted_Price'] = predictions

    print("üèòÔ∏è Batch Predictions:")
    print("="*80)
    
    # Format the output nicely
    result_df = sample_houses[['area', 'bedrooms', 'bathrooms', 'stories', 'Predicted_Price']].copy()
    result_df['Predicted_Price'] = result_df['Predicted_Price'].apply(lambda x: f'${x:,.2f}')
    display(result_df)
else:
    print("‚ùå Model not loaded. Please run Step 4 first.")