In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [9]:
data = pd.read_csv("Housing.csv")

In [10]:
le = LabelEncoder()
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating',
                    'airconditioning', 'prefarea', 'furnishingstatus']

In [11]:
for col in categorical_cols:
    data[col] = le.fit_transform(data[col])

X = data.drop('price', axis=1)
y = data['price']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [14]:
print("R² Score:", r2_score(y_test, y_pred))
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

R² Score: 0.6494754192267803
Mean Absolute Error: 979679.6912959901
Mean Squared Error: 1771751116594.0352


In [16]:
new_house = [[8000, 4, 3, 2, 1, 1, 1, 0, 1, 2, 1, 2]]

feature_names = X_train.columns
new_house_df = pd.DataFrame(new_house, columns=feature_names)

Predicted price: 9037271.108671479


In [18]:
predicted_price = model.predict(new_house_df)
print(f"\nPredicted Price for new house: ₹{predicted_price[0]:,.2f}")


Predicted Price for new house: ₹9,037,271.11
