In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the data
data = pd.read_csv('house_prices.csv')

# Feature selection
X = data[['area', 'bedrooms', 'bathrooms', 'location']]
y = data['price']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocessing the data
numeric_features = ['area', 'bedrooms', 'bathrooms']
categorical_features = ['location']

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)])

# Define the model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# For better interpretation, print the coefficients
if isinstance(model.named_steps['regressor'], LinearRegression):
    print("Coefficients: ", model.named_steps['regressor'].coef_)


Mean Squared Error: 49058417158.05315
Coefficients:  [ 1077558.70923687   662096.77419355   662096.77419355 -1026164.87455197
     1254.48028674  1024910.39426523]


In [9]:
import pandas as pd
import numpy as np

# Load the trained model and preprocessor
import joblib
joblib.dump(model, 'house_price_model.pkl')

# Load the model
model = joblib.load('house_price_model.pkl')

# Function to predict house prices
def predict_price(area, bedrooms, bathrooms, location):
    input_data = pd.DataFrame({
        'area': [area],
        'bedrooms': [bedrooms],
        'bathrooms': [bathrooms],
        'location': [location]
    })
    predicted_price = model.predict(input_data)
    return predicted_price[0]

# User input
area = float(input("Enter the area in square feet: "))
bedrooms = int(input("Enter the number of bedrooms: "))
bathrooms = int(input("Enter the number of bathrooms: "))
location = input("Enter the location: ")

# Predict the price
predicted_price = predict_price(area, bedrooms, bathrooms, location)
print(f"The predicted price for a house with area {area} sq ft, {bedrooms} bedrooms, {bathrooms} bathrooms in {location} is: {predicted_price}")


Enter the area in square feet: 100
Enter the number of bedrooms: 3
Enter the number of bathrooms: 2
Enter the location: mumbai
The predicted price for a house with area 100.0 sq ft, 3 bedrooms, 2 bathrooms in mumbai is: 3364874.5519713317
