In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
file_path = 'Housing.csv'  # Update the file path if needed
data = pd.read_csv(file_path)

# Separate features and target
X = data.drop('price', axis=1)
y = data['price']

# Identify categorical and numerical columns
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating',
                    'airconditioning', 'prefarea', 'furnishingstatus']
numerical_cols = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking']

# Create a column transformer to handle categorical and numerical features
preprocessor = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_cols),
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_cols)
    ]
)

# Create a pipeline with preprocessing and model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# ✅ Function to predict house price
def predict_price():
    try:
        # Collect numerical inputs
        area = float(input("Enter area (in sqft): "))
        bedrooms = int(input("Enter number of bedrooms: "))
        bathrooms = int(input("Enter number of bathrooms: "))
        stories = int(input("Enter number of stories: "))
        parking = int(input("Enter number of parking spaces: "))

        # Collect categorical inputs and validate them
        mainroad = input("Is it on the main road? (yes/no): ").strip().lower()
        guestroom = input("Does it have a guestroom? (yes/no): ").strip().lower()
        basement = input("Does it have a basement? (yes/no): ").strip().lower()
        hotwaterheating = input("Does it have hot water heating? (yes/no): ").strip().lower()
        airconditioning = input("Does it have air conditioning? (yes/no): ").strip().lower()
        prefarea = input("Is it in a preferred area? (yes/no): ").strip().lower()
        furnishingstatus = input("Furnishing status (furnished/semi-furnished/unfurnished): ").strip().lower()

        # ✅ Ensure categorical values are valid
        valid_yes_no = ['yes', 'no']
        if mainroad not in valid_yes_no:
            raise ValueError("Invalid input for 'mainroad'. Please enter 'yes' or 'no'.")
        if guestroom not in valid_yes_no:
            raise ValueError("Invalid input for 'guestroom'. Please enter 'yes' or 'no'.")
        if basement not in valid_yes_no:
            raise ValueError("Invalid input for 'basement'. Please enter 'yes' or 'no'.")
        if hotwaterheating not in valid_yes_no:
            raise ValueError("Invalid input for 'hotwaterheating'. Please enter 'yes' or 'no'.")
        if airconditioning not in valid_yes_no:
            raise ValueError("Invalid input for 'airconditioning'. Please enter 'yes' or 'no'.")
        if prefarea not in valid_yes_no:
            raise ValueError("Invalid input for 'prefarea'. Please enter 'yes' or 'no'.")
        if furnishingstatus not in ['furnished', 'semi-furnished', 'unfurnished']:
            raise ValueError("Invalid input for 'furnishingstatus'. Please enter 'furnished', 'semi-furnished', or 'unfurnished'.")

        # ✅ Convert all inputs to a DataFrame (ensuring proper order and data types)
        input_data = pd.DataFrame([[
            area, bedrooms, bathrooms, stories, parking,
            mainroad, guestroom, basement, hotwaterheating,
            airconditioning, prefarea, furnishingstatus
        ]], columns=numerical_cols + categorical_cols)

        # ✅ Convert categorical inputs explicitly to strings
        for col in categorical_cols:
            input_data[col] = input_data[col].astype(str)

        # ✅ Predict the price
        predicted_price = model.predict(input_data)[0]
        print(f"\n🏠 Predicted House Price: ₹{predicted_price:.2f}")

    except Exception as e:
        print(f"\n❌ Prediction failed: {e}")

# ✅ Call the function directly
predict_price()


Enter area (in sqft): 23456
Enter number of bedrooms: 4
Enter number of bathrooms: 5
Enter number of stories: 2
Enter number of parking spaces: 4
Is it on the main road? (yes/no): yes
Does it have a guestroom? (yes/no): yes
Does it have a basement? (yes/no): yes
Does it have hot water heating? (yes/no): yes
Does it have air conditioning? (yes/no): yes
Is it in a preferred area? (yes/no): yes
Furnishing status (furnished/semi-furnished/unfurnished): furnished

🏠 Predicted House Price: ₹16384324.56
