### Import the libraries

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

### Import the dataset

In [4]:
dataset = pd.read_csv("House_Rent_Dataset.csv")

#Dropping the "Posted On" column

dataset = dataset.drop(["Posted On", "Area Locality"], axis=1)

### Encoding categorical data

In [6]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse_output=False)

categorical_columns = ["Floor", "Area Type", "City", "Furnishing Status", "Tenant Preferred", "Point of Contact"]

encoded_features = encoder.fit_transform(dataset[categorical_columns])

encoded_df = pd.DataFrame(encoded_features ,columns=encoder.get_feature_names_out(categorical_columns))

new_dataset = dataset.drop(columns=categorical_columns)

new_dataset = pd.concat([new_dataset.reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)

new_dataset

Unnamed: 0,Rent,BHK,Bathroom,Size,Floor_1,Floor_1 out of 1,Floor_1 out of 10,Floor_1 out of 11,Floor_1 out of 12,Floor_1 out of 13,...,City_Mumbai,Furnishing Status_Furnished,Furnishing Status_Semi-Furnished,Furnishing Status_Unfurnished,Tenant Preferred_Bachelors,Tenant Preferred_Bachelors/Family,Tenant Preferred_Family,Point of Contact_Contact Agent,Point of Contact_Contact Builder,Point of Contact_Contact Owner
0,10000,2,2,1100,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
1,20000,2,1,800,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2,17000,2,1,1000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,10000,2,1,800,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
4,7500,2,1,850,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4741,15000,2,2,1000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4742,29000,3,3,2000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
4743,35000,3,3,1750,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
4744,45000,3,2,1500,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0


### Split dataset to train and test set

In [8]:
x = new_dataset.iloc[:, 1:].values
y = new_dataset.iloc[:,0].values

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 0.3, random_state=0)

### Feature Scaling

In [16]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

x_train[:, 1:] = sc.fit_transform(x_train[:, 1:])
x_test[:, 1:] = sc.transform(x_test[:, 1:])

### Training the Multiple Linear Regression Model on  train set

In [19]:
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(x_train, y_train)

### Predict the result

In [22]:
y_pred = regressor.predict(x_test)

### Evaluating the model

In [25]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse, r2

(9774050820.053831, 0.23547437788528303)

### Comparing the Original and Predicted Price

In [28]:
y_pred = [f"{val:.0f}" for val in y_pred]
comparison_df = pd.DataFrame({
    "Original Pice": y_test,
    "Predicted Price": y_pred
})

comparison_df.head(50)

Unnamed: 0,Original Pice,Predicted Price
0,50000,24490
1,15000,30700
2,12000,13350
3,13000,13287
4,48000,45100
5,17000,19667
6,18000,15400
7,12000,9150
8,14000,11050
9,75000,62650
