In [7]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [8]:
df = pd.read_csv("house_rent_dataset.csv")

# dropping unnecessary coloumns
df = df.drop(columns=["Posted On", "Point of Contact"])

# cleaning floor coloumn by extracting number and ground/lower

def convert_floor(floor_str):
    floor_str = str(floor_str).lower()
    if "ground" in floor_str:
        return 0
    elif "basement" in floor_str or "lower" in floor_str:
        return -1
    else:
        try:
            return int(floor_str.split()[0])
        except:
            return 0

df["Floor"] = df["Floor"].apply(convert_floor)

# dropping null coloumns 

df = df.dropna()

In [9]:
# binary encoding the coloumns and remove the first one to reduce the computational time

categorical_cols = ["Area Type", "Area Locality", "City", "Furnishing Status", "Tenant Preferred"]
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# extracting rent date for training and testing 
X = df_encoded.drop("Rent", axis=1)
y = df_encoded["Rent"]

In [10]:
# choosing random 80% data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)

# training our model
model = LinearRegression()
model.fit(X_train, y_train)

# predicting our model
y_pred = model.predict(X_test)

# Calculate MSE
mse = mean_squared_error(y_test, y_pred)

In [11]:
# print model coefficients
print("Model Coefficients (Weights):")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")

Model Coefficients (Weights):
BHK: 242.9929
Size: 33.4900
Floor: 556.1005
Bathroom: 9043.7708
Area Type_Carpet Area: -4776.3564
Area Type_Super Area: -4950.6775
Area Locality_ in Boduppal, NH 2 2: -20421.5624
Area Locality_ in Erragadda, NH 9: -22954.5178
Area Locality_ in Miyapur, NH 9: -0.0000
Area Locality_117 Residency, Chembur East: 14229.0429
Area Locality_2 BHK: -1722.4545
Area Locality_2nd Main Road: -15267.2950
Area Locality_355 konnur highroad Ayanavaram: 1884.2586
Area Locality_5-20 Adharshnagar: -25993.9746
Area Locality_5000: 6924.2676
Area Locality_58 block: 0.0000
Area Locality_7 Bungalow, Seven Bungalows: 27996.2842
Area Locality_7 Bungalows Andheri West: 75688.7412
Area Locality_700051: -0.0000
Area Locality_90 ft road: 0.0000
Area Locality_A 307 Blossom Heights: -58386.2749
Area Locality_A Narayanapura, Mahadevapura: -45369.9604
Area Locality_AGCR Enclave, Anand Vihar: -31861.0671
Area Locality_AGS Colony-Velachery: -12548.8809
Area Locality_AH Block, Salt Lake: 11472

In [12]:
# printing intercept and mse
print(f"\nIntercept: {model.intercept_:.4f}")
print(f"Mean Squared Error (MSE) on Test Set: {mse:.4f}\n")


Intercept: 13889.9695
Mean Squared Error (MSE) on Test Set: 2483860309.5735



In [13]:
# print actual vs predicted rents 
print("Actual Rent vs Predicted Rent on Test Set:")
for actual, pred in zip(y_test, y_pred):
    print(f"Actual: {int(actual):,}  Predicted: {int(round(pred)):,}")

Actual Rent vs Predicted Rent on Test Set:
Actual: 24,000  Predicted: -272
Actual: 15,000  Predicted: 22,022
Actual: 10,000  Predicted: 18,481
Actual: 15,500  Predicted: -7,219
Actual: 80,000  Predicted: 127,649
Actual: 25,000  Predicted: 54,523
Actual: 21,500  Predicted: 65,300
Actual: 7,500  Predicted: 31,912
Actual: 7,500  Predicted: -22,900
Actual: 12,000  Predicted: 727
Actual: 250,000  Predicted: 304,164
Actual: 75,000  Predicted: 148,969
Actual: 14,000  Predicted: 7,451
Actual: 10,500  Predicted: 31,900
Actual: 30,000  Predicted: 39,296
Actual: 3,500  Predicted: -23,254
Actual: 9,000  Predicted: 16,869
Actual: 8,500  Predicted: -14,318
Actual: 5,500  Predicted: 5,650
Actual: 20,000  Predicted: -3,072
Actual: 20,000  Predicted: 29,043
Actual: 20,000  Predicted: 32,246
Actual: 45,000  Predicted: 31,103
Actual: 16,000  Predicted: 39,118
Actual: 15,000  Predicted: 25,852
Actual: 25,000  Predicted: 23,833
Actual: 6,500  Predicted: 5,326
Actual: 30,000  Predicted: 41,870
Actual: 10,50