In [None]:
import os
import joblib
import pandas as pd
import requests
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split

# Define the model path
model_path = "house_price_model.joblib"

# 1. Train and save model function
def train_and_save_model():
    # Sample data - in a real scenario, you would load this from a file
    # This is a simplified example dataset
    np.random.seed(42)
    n_samples = 1000
    
    # Generate synthetic data
    data = {
        "Area": np.random.normal(1500, 500, n_samples),
        "Age": np.random.randint(1, 50, n_samples),
        "Bedrooms": np.random.randint(1, 6, n_samples),
        "Bathrooms": np.random.randint(1, 5, n_samples),
        "HallRooms": np.random.randint(1, 4, n_samples),
        "Kitchens": np.random.randint(1, 3, n_samples),
        "LoanAmount": np.random.normal(500000, 200000, n_samples),
        "Direction": np.random.choice(["North", "South", "East", "West"], n_samples),
        "GovtApproved": np.random.choice(["Yes", "No"], n_samples),
        "Location": np.random.choice(["Downtown", "Suburb", "Rural"], n_samples),
        "Price": np.random.normal(2000000, 1000000, n_samples)
    }
    
    # Create DataFrame
    df = pd.DataFrame(data)
    
    # Split features and target
    X = df.drop("Price", axis=1)
    y = df["Price"]
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Define categorical features
    categorical_features = ["Direction", "GovtApproved", "Location"]
    numeric_features = ["Area", "Age", "Bedrooms", "Bathrooms", "HallRooms", "Kitchens", "LoanAmount"]
    
    # Create preprocessor
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', 'passthrough', numeric_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ])
    
    # Create pipeline
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('model', RandomForestRegressor(n_estimators=100, random_state=42))
    ])
    
    # Train model
    pipeline.fit(X_train, y_train)
    
    # Save model
    joblib.dump(pipeline, model_path)
    
    print(f"Model trained and saved to {model_path}")
    return pipeline

# 2. Fetch real-time market multiplier
def fetch_market_multiplier(lat, lon, api_key="YOUR_API_KEY"):
    # In a real scenario, this would make an API call
    # For this example, we'll return a random multiplier between 0.8 and 1.2
    # to simulate market conditions
    try:
        # Simulating API call for demonstration
        # url = "https://api.openrealestate.com/market-value"
        # params = {"lat": lat, "lon": lon, "apikey": api_key}
        # resp = requests.get(url, params=params)
        # resp.raise_for_status()
        # data = resp.json()
        # return data["average_sale_value"] / data["base_value"]
        
        # For demonstration, return a random multiplier
        return np.random.uniform(0.8, 1.2)
    except Exception as e:
        print(f"Error fetching market multiplier: {e}")
        return 1.0  # Default multiplier if API fails

# 3. Load trained model or train if not exists
def get_model():
    if not os.path.exists(model_path):
        print(f"Model file {model_path} not found. Training a new model...")
        return train_and_save_model()
    else:
        return joblib.load(model_path)

# 4. Prompt user for input and predict
def prompt_and_predict():
    # Get the model (train if needed)
    model = get_model()
    
    print("Enter house details to predict price:")
    area = float(input("Area (sqft): "))
    age = float(input("Age (years): "))
    bedrooms = int(input("Number of bedrooms: "))
    bathrooms = int(input("Number of bathrooms: "))
    hall_rooms = int(input("Number of hall rooms: "))
    kitchens = int(input("Number of kitchens: "))
    loan = float(input("Loan amount on house: "))
    direction = input("Direction (North/South/East/West): ").strip()
    govt = input("Government approved layout? (Yes/No): ").strip()
    location = input("Location (neighborhood name): ").strip()
    latitude = float(input("Latitude: "))
    longitude = float(input("Longitude: "))

    # Prepare feature DataFrame
    input_df = pd.DataFrame([{
        "Area": area,
        "Age": age,
        "Bedrooms": bedrooms,
        "Bathrooms": bathrooms,
        "HallRooms": hall_rooms,
        "Kitchens": kitchens,
        "LoanAmount": loan,
        "Direction": direction,
        "GovtApproved": govt,
        "Location": location
    }])

    base_price = model.predict(input_df)[0]
    multiplier = fetch_market_multiplier(latitude, longitude)
    updated_price = base_price * multiplier

    print(f"\nBase Predicted Price: ₹{base_price:,.2f}")
    print(f"Updated Price (market-adjusted): ₹{updated_price:,.2f}")

if __name__ == "__main__":
    prompt_and_predict()

Model file house_price_model.joblib not found. Training a new model...
Model trained and saved to house_price_model.joblib
Enter house details to predict price:
