### HOUSE RENT DATASET

In [28]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import sklearn.preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score, classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')
from sklearn import svm

In [29]:
df=pd.read_csv("/content/drive/MyDrive/ML/Class5/House_Rent_Dataset.csv")
df.head()

Unnamed: 0,Posted On,BHK,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact
0,2022-05-18,2,10000,1100,Ground out of 2,Super Area,Bandel,Kolkata,Unfurnished,Bachelors/Family,2,Contact Owner
1,2022-05-13,2,20000,800,1 out of 3,Super Area,"Phool Bagan, Kankurgachi",Kolkata,Semi-Furnished,Bachelors/Family,1,Contact Owner
2,2022-05-16,2,17000,1000,1 out of 3,Super Area,Salt Lake City Sector 2,Kolkata,Semi-Furnished,Bachelors/Family,1,Contact Owner
3,2022-07-04,2,10000,800,1 out of 2,Super Area,Dumdum Park,Kolkata,Unfurnished,Bachelors/Family,1,Contact Owner
4,2022-05-09,2,7500,850,1 out of 2,Carpet Area,South Dum Dum,Kolkata,Unfurnished,Bachelors,1,Contact Owner


In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4746 entries, 0 to 4745
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Posted On          4746 non-null   object
 1   BHK                4746 non-null   int64 
 2   Rent               4746 non-null   int64 
 3   Size               4746 non-null   int64 
 4   Floor              4746 non-null   object
 5   Area Type          4746 non-null   object
 6   Area Locality      4746 non-null   object
 7   City               4746 non-null   object
 8   Furnishing Status  4746 non-null   object
 9   Tenant Preferred   4746 non-null   object
 10  Bathroom           4746 non-null   int64 
 11  Point of Contact   4746 non-null   object
dtypes: int64(4), object(8)
memory usage: 445.1+ KB


In [31]:
df.isnull().sum()

Posted On            0
BHK                  0
Rent                 0
Size                 0
Floor                0
Area Type            0
Area Locality        0
City                 0
Furnishing Status    0
Tenant Preferred     0
Bathroom             0
Point of Contact     0
dtype: int64

In [32]:
target = list()
for val in df['BHK']:
  if val < 2:
    target.append(0)
  elif val >= 2 and val < 4:
    target.append(1)
  else:
    target.append(2)



In [33]:
df["target"] = target
df.head()

Unnamed: 0,Posted On,BHK,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact,target
0,2022-05-18,2,10000,1100,Ground out of 2,Super Area,Bandel,Kolkata,Unfurnished,Bachelors/Family,2,Contact Owner,1
1,2022-05-13,2,20000,800,1 out of 3,Super Area,"Phool Bagan, Kankurgachi",Kolkata,Semi-Furnished,Bachelors/Family,1,Contact Owner,1
2,2022-05-16,2,17000,1000,1 out of 3,Super Area,Salt Lake City Sector 2,Kolkata,Semi-Furnished,Bachelors/Family,1,Contact Owner,1
3,2022-07-04,2,10000,800,1 out of 2,Super Area,Dumdum Park,Kolkata,Unfurnished,Bachelors/Family,1,Contact Owner,1
4,2022-05-09,2,7500,850,1 out of 2,Carpet Area,South Dum Dum,Kolkata,Unfurnished,Bachelors,1,Contact Owner,1


In [34]:
area_type = {
    'Super Area': 0, 
    'Carpet Area': 1, 
    'Built Area': 2
}

fur_status = {
    'Semi-Furnished': 0, 
    'Unfurnished': 1, 
    'Furnished': 2
}

city = {
 'Kolkata': 0,
 'Bangalore': 1,
 'Mumbai': 2,
 'Chennai': 3,
 'Hyderabad': 4,
 'Delhi': 5,
}

tenant = {
    'Bachelors/Family': 0,
    'Bachelors': 1,
    'Family': 2
}
contact = {
    'Contact Owner': 0,
    'Contact Agent': 1,
    'Contact Builder': 2
}

In [35]:
df['Area Type'] = df['Area Type'].apply(lambda x: area_type[x])
df['Furnishing Status'] = df['Furnishing Status'].apply(lambda x: fur_status[x])
df['City'] = df['City'].apply(lambda x: city[x])
df['Tenant Preferred'] = df['Tenant Preferred'].apply(lambda x: tenant[x])
df['Point of Contact'] = df['Point of Contact'].apply(lambda x: contact[x])

In [36]:
df.head()

Unnamed: 0,Posted On,BHK,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact,target
0,2022-05-18,2,10000,1100,Ground out of 2,0,Bandel,0,1,0,2,0,1
1,2022-05-13,2,20000,800,1 out of 3,0,"Phool Bagan, Kankurgachi",0,0,0,1,0,1
2,2022-05-16,2,17000,1000,1 out of 3,0,Salt Lake City Sector 2,0,0,0,1,0,1
3,2022-07-04,2,10000,800,1 out of 2,0,Dumdum Park,0,1,0,1,0,1
4,2022-05-09,2,7500,850,1 out of 2,1,South Dum Dum,0,1,1,1,0,1


In [37]:
df.corr()

Unnamed: 0,BHK,Rent,Size,Area Type,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact,target
BHK,1.0,0.369718,0.716145,0.153225,0.091828,-0.072916,0.123609,0.794885,0.229764,0.865674
Rent,0.369718,1.0,0.413551,0.214867,-0.023734,0.022612,0.082496,0.441215,0.338966,0.295662
Size,0.716145,0.413551,1.0,0.079705,0.060177,-0.089949,0.109498,0.740703,0.213436,0.590853
Area Type,0.153225,0.214867,0.079705,1.0,-0.087328,0.025982,0.30865,0.183012,0.558047,0.106028
City,0.091828,-0.023734,0.060177,-0.087328,1.0,-0.001296,-0.012251,0.131302,-0.020005,0.074525
Furnishing Status,-0.072916,0.022612,-0.089949,0.025982,-0.001296,1.0,0.002795,-0.063716,0.014395,-0.068883
Tenant Preferred,0.123609,0.082496,0.109498,0.30865,-0.012251,0.002795,1.0,0.144298,0.242072,0.09421
Bathroom,0.794885,0.441215,0.740703,0.183012,0.131302,-0.063716,0.144298,1.0,0.32649,0.67468
Point of Contact,0.229764,0.338966,0.213436,0.558047,-0.020005,0.014395,0.242072,0.32649,1.0,0.141743
target,0.865674,0.295662,0.590853,0.106028,0.074525,-0.068883,0.09421,0.67468,0.141743,1.0


In [40]:
#df.drop(['Posted On', 'Area Locality', 'Floor'], axis = 'columns', inplace = True)
#df

area_locality = df['Area Locality'].value_counts().to_dict()
for i, k in enumerate(area_locality):
  area_locality[k] = i

df['Area Locality'] = df['Area Locality'].apply(lambda x: area_locality[x])
df.corr()


Unnamed: 0,BHK,Rent,Size,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact,target
BHK,1.0,0.369718,0.716145,0.153225,-0.027987,0.091828,-0.072916,0.123609,0.794885,0.229764,0.865674
Rent,0.369718,1.0,0.413551,0.214867,0.047623,-0.023734,0.022612,0.082496,0.441215,0.338966,0.295662
Size,0.716145,0.413551,1.0,0.079705,-0.059827,0.060177,-0.089949,0.109498,0.740703,0.213436,0.590853
Area Type,0.153225,0.214867,0.079705,1.0,0.141167,-0.087328,0.025982,0.30865,0.183012,0.558047,0.106028
Area Locality,-0.027987,0.047623,-0.059827,0.141167,1.0,-0.121784,0.041867,0.039508,0.011115,0.03362,-0.036818
City,0.091828,-0.023734,0.060177,-0.087328,-0.121784,1.0,-0.001296,-0.012251,0.131302,-0.020005,0.074525
Furnishing Status,-0.072916,0.022612,-0.089949,0.025982,0.041867,-0.001296,1.0,0.002795,-0.063716,0.014395,-0.068883
Tenant Preferred,0.123609,0.082496,0.109498,0.30865,0.039508,-0.012251,0.002795,1.0,0.144298,0.242072,0.09421
Bathroom,0.794885,0.441215,0.740703,0.183012,0.011115,0.131302,-0.063716,0.144298,1.0,0.32649,0.67468
Point of Contact,0.229764,0.338966,0.213436,0.558047,0.03362,-0.020005,0.014395,0.242072,0.32649,1.0,0.141743


In [42]:
floor = df['Floor'].value_counts().to_dict()
for i, k in enumerate(floor):
  floor[k] = i

df['Floor'] = df['Floor'].apply(lambda x: floor[x])
df.corr()


Unnamed: 0,BHK,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact,target
BHK,1.0,0.369718,0.716145,0.210955,0.153225,-0.027987,0.091828,-0.072916,0.123609,0.794885,0.229764,0.865674
Rent,0.369718,1.0,0.413551,0.298625,0.214867,0.047623,-0.023734,0.022612,0.082496,0.441215,0.338966,0.295662
Size,0.716145,0.413551,1.0,0.171089,0.079705,-0.059827,0.060177,-0.089949,0.109498,0.740703,0.213436,0.590853
Floor,0.210955,0.298625,0.171089,1.0,0.261584,0.124971,-0.072745,0.017383,0.135703,0.312276,0.409726,0.148961
Area Type,0.153225,0.214867,0.079705,0.261584,1.0,0.141167,-0.087328,0.025982,0.30865,0.183012,0.558047,0.106028
Area Locality,-0.027987,0.047623,-0.059827,0.124971,0.141167,1.0,-0.121784,0.041867,0.039508,0.011115,0.03362,-0.036818
City,0.091828,-0.023734,0.060177,-0.072745,-0.087328,-0.121784,1.0,-0.001296,-0.012251,0.131302,-0.020005,0.074525
Furnishing Status,-0.072916,0.022612,-0.089949,0.017383,0.025982,0.041867,-0.001296,1.0,0.002795,-0.063716,0.014395,-0.068883
Tenant Preferred,0.123609,0.082496,0.109498,0.135703,0.30865,0.039508,-0.012251,0.002795,1.0,0.144298,0.242072,0.09421
Bathroom,0.794885,0.441215,0.740703,0.312276,0.183012,0.011115,0.131302,-0.063716,0.144298,1.0,0.32649,0.67468


In [43]:
df.drop(['Posted On', 'BHK'], axis = 'columns', inplace = True)
df.head()

Unnamed: 0,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact,target
0,10000,1100,1,0,507,0,1,0,2,0,1
1,20000,800,4,0,465,0,0,0,1,0,1
2,17000,1000,4,0,13,0,0,0,1,0,1
3,10000,800,0,0,737,0,1,0,1,0,1
4,7500,850,0,1,1251,0,1,1,1,0,1


In [44]:
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [45]:
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 2000)

In [47]:
model = svm.SVC()
model.fit(X_train, Y_train)

In [48]:
model.score(X_test, Y_test)

0.7305263157894737

In [49]:
classifier = DecisionTreeClassifier()
classifier.fit(X_train, Y_train)
classifier.score(X_test, Y_test)

0.8557894736842105