# Data Cleaning

## Flat dataset cleaning

In [546]:
import numpy as np
import pandas as pd
import re

In [29]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [30]:
df = pd.read_csv('flats.csv')
df.head()

Unnamed: 0,property_name,link,society,price,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,property_id
0,2 BHK Flat in Krishna Colony,https://www.99acres.com/2-bhk-bedroom-apartmen...,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ...",C68850746
1,2 BHK Flat in Ashok Vihar,https://www.99acres.com/2-bhk-bedroom-apartmen...,Apna Enclave,50 Lac,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ...",H68850564
2,2 BHK Flat in Sohna,https://www.99acres.com/2-bhk-bedroom-apartmen...,Tulsiani Easy in Homes,40 Lac,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ...",J68850120
3,2 BHK Flat in Sector 61 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Smart World Orchard,1.47 Crore,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",,S68849476
4,2 BHK Flat in Sector 92 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Parkwood Westend,70 Lac,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ...",L47956793


In [31]:
df.shape

(3017, 20)

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3017 entries, 0 to 3016
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   property_name    3017 non-null   object
 1   link             3017 non-null   object
 2   society          3016 non-null   object
 3   price            3007 non-null   object
 4   area             3004 non-null   object
 5   areaWithType     3008 non-null   object
 6   bedRoom          3008 non-null   object
 7   bathroom         3008 non-null   object
 8   balcony          3008 non-null   object
 9   additionalRoom   1694 non-null   object
 10  address          3002 non-null   object
 11  floorNum         3006 non-null   object
 12  facing           2127 non-null   object
 13  agePossession    3007 non-null   object
 14  nearbyLocations  2913 non-null   object
 15  description      3008 non-null   object
 16  furnishDetails   2203 non-null   object
 17  features         2594 non-null   

In [33]:
# check for duplicates :-
df.duplicated().sum()

0

In [34]:
# check for missing values :-
df.isnull().sum()

property_name         0
link                  0
society               1
price                10
area                 13
areaWithType          9
bedRoom               9
bathroom              9
balcony               9
additionalRoom     1323
address              15
floorNum             11
facing              890
agePossession        10
nearbyLocations     104
description           9
furnishDetails      814
features            423
rating              341
property_id           9
dtype: int64

In [35]:
# drop unnecessary columns :-
df.drop(columns = ['link','property_id'],inplace = True)

In [36]:
df.shape

(3017, 18)

In [37]:
df.head()

Unnamed: 0,property_name,society,price,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,Apna Enclave,50 Lac,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."
2,2 BHK Flat in Sohna,Tulsiani Easy in Homes,40 Lac,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ..."
3,2 BHK Flat in Sector 61 Gurgaon,Smart World Orchard,1.47 Crore,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",
4,2 BHK Flat in Sector 92 Gurgaon,Parkwood Westend,70 Lac,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ..."


In [38]:
# rename some columns :- 
df.rename(columns={'area':'price_per_sqft'},inplace = True)

In [39]:
df.head(1)

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [40]:
df['society'].value_counts()

SS The Leaf3.8 ★                                     73
Tulip Violet4.3 ★                                    40
Shapoorji Pallonji Joyville Gurugram4.0 ★            39
Signature Global Park4.0 ★                           36
Shree Vardhman Victoria3.8 ★                         35
Tulip Violet4.2 ★                                    33
Emaar MGF Emerald Floors Premier3.8 ★                32
Smart World Orchard                                  32
Smart World Gems                                     32
Paras Dews                                           31
DLF The Ultima4.0 ★                                  31
DLF Regal Gardens3.9 ★                               30
M3M Woodshire4.0 ★                                   29
Shree Vardhman Flora3.8 ★                            29
La Vida by Tata Housing                              28
Signature Global Solera3.7 ★                         28
Godrej Nature Plus                                   27
Emaar Gurgaon Greens4.1 ★                       

In [41]:
df['society'].value_counts().shape

(638,)

In [196]:
# Remove (*) marks from the name of the society

In [197]:
def remove_star(text):
    text = text
    string = re.sub(r'\★','',text)
    return string

In [199]:
df['society'] = df['society'].apply(lambda name : remove_star(str(name))).str.lower().str.strip()

In [200]:
df['society']

0                                maa bhagwati residency
1                                          apna enclave
2                                tulsiani easy in homes
3                                   smart world orchard
4                                      parkwood westend
5                        signature global infinity mall
6                                            the cocoon
7                                        ats triumph3.9
8                                     vatika xpressions
9                                        raheja revanta
10                             signature global city 81
11                                 hcbs sports ville3.6
12                                   supertech araville
13                                               godrej
14                              shree vardhman flora3.8
15                                           sobha city
16                                         senate court
17              shapoorji pallonji joyville guru

In [201]:
df.head()

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,50 Lac,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."
2,2 BHK Flat in Sohna,tulsiani easy in homes,40 Lac,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ..."
3,2 BHK Flat in Sector 61 Gurgaon,smart world orchard,1.47 Crore,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",
4,2 BHK Flat in Sector 92 Gurgaon,parkwood westend,70 Lac,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ..."


In [261]:
# remove all the numbers from the sciety name
df['society'] = df['society'].apply(lambda name : re.sub(r'\d*\.\d*','',str(name)))
df['society'].head()

0    maa bhagwati residency
1              apna enclave
2    tulsiani easy in homes
3       smart world orchard
4          parkwood westend
Name: society, dtype: object

In [266]:
df['price'].value_counts()

1.25 Crore     79
1.1 Crore      61
1.4 Crore      60
1.2 Crore      59
1.5 Crore      59
90 Lac         58
1.3 Crore      57
95 Lac         53
2 Crore        51
1.75 Crore     47
1 Crore        46
1.6 Crore      43
1.35 Crore     41
1.9 Crore      40
1.55 Crore     40
75 Lac         38
1.65 Crore     38
1.7 Crore      37
1.8 Crore      37
80 Lac         36
2.2 Crore      34
1.15 Crore     33
50 Lac         33
1.45 Crore     31
85 Lac         31
1.05 Crore     30
2.5 Crore      29
40 Lac         29
60 Lac         29
2.1 Crore      26
45 Lac         26
65 Lac         25
1.85 Crore     23
35 Lac         23
2.35 Crore     23
3 Crore        22
70 Lac         21
2.25 Crore     20
55 Lac         20
3.5 Crore      19
2.4 Crore      18
2.3 Crore      18
30 Lac         17
2.65 Crore     16
1.38 Crore     15
2.6 Crore      15
2.15 Crore     14
3.4 Crore      14
2.45 Crore     14
2.7 Crore      13
92 Lac         13
78 Lac         13
3.25 Crore     13
82 Lac         13
1.32 Crore     13
38 Lac    

In [227]:
df = df[df['price'] != 'Price on Request']

In [267]:
df['price'].head()

0        45 Lac
1        50 Lac
2        40 Lac
3    1.47 Crore
4        70 Lac
Name: price, dtype: object

In [282]:
def treat_price(text):
    if type(text) == float:
        return text
    else:
        if text[1] == 'Lac':
            return round(float(text[0])/100,2)
        else:
            return round(float(text[0]),2)

In [284]:
df['price'] = df['price'].str.split(' ').apply(treat_price)

In [286]:
df.head()

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."
2,2 BHK Flat in Sohna,tulsiani easy in homes,0.4,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ..."
3,2 BHK Flat in Sector 61 Gurgaon,smart world orchard,1.47,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",
4,2 BHK Flat in Sector 92 Gurgaon,parkwood westend,0.7,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ..."


In [291]:
df['price_per_sqft'].value_counts()

₹ 10,000/sq.ft.     19
₹ 8,000/sq.ft.      16
₹ 12,500/sq.ft.     16
₹ 6,666/sq.ft.      13
₹ 5,000/sq.ft.      13
₹ 7,500/sq.ft.      12
₹ 8,333/sq.ft.      12
₹ 6,000/sq.ft.      11
₹ 8,461/sq.ft.       9
₹ 12,000/sq.ft.      8
₹ 7,000/sq.ft.       8
₹ 9,000/sq.ft.       7
₹ 9,230/sq.ft.       6
₹ 8,928/sq.ft.       6
₹ 5,500/sq.ft.       6
₹ 11,500/sq.ft.      6
₹ 11,111/sq.ft.      6
₹ 6,578/sq.ft.       6
₹ 8,888/sq.ft.       6
₹ 8,205/sq.ft.       5
₹ 7,692/sq.ft.       5
₹ 8,571/sq.ft.       5
₹ 4,666/sq.ft.       5
₹ 5,556/sq.ft.       5
₹ 4,615/sq.ft.       5
₹ 7,641/sq.ft.       5
₹ 5,384/sq.ft.       5
₹ 14,242/sq.ft.      5
₹ 7,142/sq.ft.       5
₹ 10,714/sq.ft.      5
₹ 4,444/sq.ft.       5
₹ 5,600/sq.ft.       5
₹ 7,407/sq.ft.       5
₹ 6,500/sq.ft.       5
₹ 11,428/sq.ft.      5
₹ 16,000/sq.ft.      5
₹ 7,250/sq.ft.       4
₹ 9,822/sq.ft.       4
₹ 13,913/sq.ft.      4
₹ 5,660/sq.ft.       4
₹ 8,043/sq.ft.       4
₹ 8,500/sq.ft.       4
₹ 9,210/sq.ft.       4
₹ 15,000/sq

In [325]:
df['price_per_sqft'] = df['price_per_sqft'].str.split("/").str.get(0).str.replace('₹', '').str.replace(',','').str.strip().astype('float')

In [326]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [329]:
df['bedRoom'].value_counts()

3 Bedrooms    1437
2 Bedrooms     944
4 Bedrooms     478
1 Bedroom      104
5 Bedrooms      31
6 Bedrooms       3
Name: bedRoom, dtype: int64

In [340]:
df = df[~df['bedRoom'].isnull()]

In [344]:
df['bedRoom'] = df['bedRoom'].str.split(" ").str.get(0).astype('int')

In [346]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [351]:
df['bathroom'] = df['bathroom'].str.split(' ').str.get(0).astype('int')

In [355]:
df['balcony'].value_counts()

3 Balconies     974
3+ Balconies    862
2 Balconies     749
1 Balcony       315
No Balcony       97
Name: balcony, dtype: int64

In [356]:
df['balcony'].isnull().sum()

0

In [363]:
df['balcony'] = df['balcony'].str.split(' ').str.get(0).str.replace('No','0')

In [364]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2,1,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2,1,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [366]:
df['additionalRoom'].value_counts()

Servant Room                                     629
Study Room                                       232
Others                                           179
Pooja Room                                       132
Study Room,Servant Room                           81
Store Room                                        76
Pooja Room,Servant Room                           60
Servant Room,Others                               52
Servant Room,Pooja Room                           30
Study Room,Others                                 27
Pooja Room,Study Room,Servant Room,Others         25
Pooja Room,Study Room,Servant Room                24
Servant Room,Store Room                           19
Pooja Room,Study Room                             13
Pooja Room,Study Room,Servant Room,Store Room     12
Study Room,Pooja Room                              8
Servant Room,Study Room                            8
Study Room,Servant Room,Store Room                 7
Pooja Room,Store Room                         

In [367]:
df['additionalRoom'].isnull().sum()

1305

In [368]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2997 entries, 0 to 3016
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   property_name    2997 non-null   object 
 1   society          2997 non-null   object 
 2   price            2996 non-null   float64
 3   price_per_sqft   2996 non-null   float64
 4   areaWithType     2997 non-null   object 
 5   bedRoom          2997 non-null   int64  
 6   bathroom         2997 non-null   int64  
 7   balcony          2997 non-null   object 
 8   additionalRoom   1692 non-null   object 
 9   address          2991 non-null   object 
 10  floorNum         2995 non-null   object 
 11  facing           2123 non-null   object 
 12  agePossession    2996 non-null   object 
 13  nearbyLocations  2906 non-null   object 
 14  description      2997 non-null   object 
 15  furnishDetails   2200 non-null   object 
 16  features         2590 non-null   object 
 17  rating        

In [371]:
df['additionalRoom'].fillna('not available',inplace = True)

In [372]:
df['additionalRoom']

0                                       not available
1                                       not available
2                                       not available
3                                          Study Room
4                                          Study Room
5                                       not available
6                                       not available
7                                       not available
8                                          Study Room
9                                        Servant Room
10                                      not available
11                                      not available
12                                      not available
13                                      not available
14                                         Study Room
15                                      not available
16                                      not available
17                                      not available
18                          

In [374]:
df['additionalRoom'] = df['additionalRoom'].str.lower()

In [375]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [377]:
df['floorNum'].value_counts()

2nd   of 4 Floors           74
3rd   of 4 Floors           71
4th   of 4 Floors           62
1st   of 4 Floors           61
12nd   of 14 Floors         49
14th   of 14 Floors         48
Ground of 14 Floors         40
10th   of 14 Floors         35
7th   of 14 Floors          35
8th   of 14 Floors          34
4th   of 14 Floors          28
6th   of 14 Floors          27
2nd   of 2 Floors           26
1st   of 14 Floors          26
3rd   of 3 Floors           26
3rd   of 14 Floors          24
5th   of 14 Floors          24
8th   of 19 Floors          24
11st   of 14 Floors         23
1st   of 1 Floors           23
9th   of 14 Floors          23
9th   of 9 Floors           23
5th   of 12 Floors          22
2nd   of 3 Floors           22
2nd   of 14 Floors          21
8th   of 18 Floors          20
10th   of 19 Floors         18
6th   of 18 Floors          18
10th   of 18 Floors         17
9th   of 13 Floors          17
7th   of 15 Floors          17
12nd   of 12 Floors         17
Ground o

In [378]:
df['floorNum'].isnull().sum()

2

In [379]:
df[df['floorNum'].isnull()]

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
181,3 BHK Flat in Dwarka Expressway Gurgaon,experion heartsong,1.08,6150.0,Built Up area: 1758 (163.32 sq.m.),3,3,0,not available,"604, Tower B-3, 6th Floor,Sector 108, Dwarka E...",,,Under Construction,,A property by one of the most reputed builders...,[],,
2766,2 BHK Flat in Sector 78 Gurgaon,,0.6,3692.0,Built Up area: 1625 (150.97 sq.m.),2,2,0,not available,"Gurgaon, Sector 78 Gurgaon, Gurgaon, Haryana",,,Under Construction,,The property is under construction it's by rah...,[],,"['Safety4 out of 5', 'Lifestyle4 out of 5', 'E..."


In [388]:
df['floorNum'] = df['floorNum'].str.split(' ').str.get(0).replace('Ground','0').str.replace('Basement','-1').str.replace('Lower','0').str.extract(r'(\d+)')

In [389]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [390]:
df.drop(columns = ['facing'],inplace = True)

In [392]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [395]:
# insert some new columns in the data sets 
df.insert(loc=1,column = 'Property_type',value='flat')

In [396]:
df

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,flat,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4.0,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,flat,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1.0,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."
2,2 BHK Flat in Sohna,flat,tulsiani easy in homes,0.4,6722.0,Carpet area: 595 (55.28 sq.m.),2,2,3,not available,"Sohna, Gurgaon, Haryana",12.0,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ..."
3,2 BHK Flat in Sector 61 Gurgaon,flat,smart world orchard,1.47,12250.0,Carpet area: 1200 (111.48 sq.m.),2,2,2,study room,"Sector 61 Gurgaon, Gurgaon, Haryana",2.0,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",
4,2 BHK Flat in Sector 92 Gurgaon,flat,parkwood westend,0.7,5204.0,Super Built up area 1345(124.95 sq.m.),2,2,3,study room,"Sector 92 Gurgaon, Gurgaon, Haryana",5.0,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ..."
5,2 BHK Flat in Sector 36 Gurgaon,flat,signature global infinity mall,0.41,6269.0,Built Up area: 654 (60.76 sq.m.),2,2,3,not available,"Sohna Sector 36, Sector 36 Gurgaon, Gurgaon, H...",3.0,undefined,,Best in class property available at sector 36 ...,,,
6,3 BHK Flat in Dwarka Expressway Gurgaon,flat,the cocoon,2.0,13333.0,Super Built up area 1500(139.35 sq.m.),3,3,3,not available,"Dwarka Expressway Gurgaon, Gurgaon, Haryana",5.0,0 to 1 Year Old,"['Shri Multispeciality Hospital', 'Esic Hospit...",Residential apartment for sell.The property co...,[],,
7,3 BHK Flat in Sector 104 Gurgaon,flat,ats triumph,1.8,7860.0,Carpet area: 2290 (212.75 sq.m.),3,4,3,not available,"Sector 104 Gurgaon, Gurgaon, Haryana",14.0,0 to 1 Year Old,"['IFFCO Chowk Metro Station', 'The Esplanade M...",Ats triumph is one of gurgaon's most sought af...,[],"['Power Back-up', 'Intercom Facility', 'Lift(s...","['Green Area4 out of 5', 'Amenities4.5 out of ..."
8,2 BHK Flat in Sector 88B Gurgaon,flat,vatika xpressions,1.1,8148.0,Built Up area: 1350 (125.42 sq.m.)Carpet area:...,2,4,3+,study room,"H 23, Sector 88B Gurgaon, Gurgaon, Haryana",2.0,Under Construction,"['Dwarka Expressway', 'Adarsh public school,Ga...",We are the proud owners of this 2 bhk apartmen...,,"['Feng Shui / Vaastu Compliant', 'Intercom Fac...",
9,3 BHK Flat in Sector 78 Gurgaon,flat,raheja revanta,4.75,16885.0,Built Up area: 2813 (261.34 sq.m.),3,3,2,servant room,"Sector 78 Gurgaon, Gurgaon, Haryana",31.0,Dec 2023,"['Sapphire 83 Mall', 'NH-8', 'Bal Bharti Publi...","This 3 bhk flat is located in raheja revanta, ...",,"['Security / Fire Alarm', 'Intercom Facility',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [397]:
# New area column is also added to the data set
df.insert(loc = 5, column = 'area', value = round(df['price']*10000000/ df['price_per_sqft']))

In [400]:
df.head(2)

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,flat,maa bhagwati residency,0.45,5000.0,900.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,flat,apna enclave,0.5,7692.0,650.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [401]:
# Export this cleaned file 
df.to_csv('flat_cleaned.csv',index = False)

# House Dataset Cleaning

In [402]:
df = pd.read_csv('houses.csv')

In [403]:
df

Unnamed: 0,property_name,link,society,price,rate,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,property_id
0,5 Bedroom House for sale in Sector 70A Gurgaon,https://www.99acres.com/5-bhk-bedroom-independ...,Bptp Visionnaire,5.25 Crore,"₹ 20,115/sq.ft.",(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5 Bedrooms,4 Bathrooms,3+ Balconies,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5...",K70037724
1,5 Bedroom House for sale in Sector 21A Faridabad,https://www.99acres.com/5-bhk-bedroom-independ...,,5.7 Crore,"₹ 105,751/sq.ft.",(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5 Bedrooms,4 Bathrooms,2 Balconies,"Store Room,Pooja Room,Servant Room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",,E69288322
2,10 Bedroom House for sale in Sushant Lok Phase 1,https://www.99acres.com/10-bhk-bedroom-indepen...,,2.1 Crore,"₹ 38,251/sq.ft.",(51 sq.m.) Plot Area,Plot area 61(51 sq.m.),10 Bedrooms,10 Bathrooms,3+ Balconies,Servant Room,"Sushant Lok Phase 1, Gurgaon, Haryana",5 Floors,West,0 to 1 Year Old,"['Sector 42-43 metro station', 'Huda city cent...","Monthly rental income is rs1,40,000/- Best opt...","['10 Bed', '3 Fan', '10 Geyser', '2 Light', 'N...","['Maintenance Staff', 'Water Storage', 'Visito...","['Environment5 out of 5', 'Lifestyle5 out of 5...",F69536898
3,21 Bedroom House for sale in Sector 54 Gurgaon,https://www.99acres.com/21-bhk-bedroom-indepen...,,5 Crore,"₹ 43,066/sq.ft.",(108 sq.m.) Plot Area,Plot area 129(107.86 sq.m.),21 Bedrooms,21 Bathrooms,3+ Balconies,Servant Room,"Sector 54 Gurgaon, Gurgaon, Haryana",5 Floors,North,1 to 5 Year Old,"['Sector 53-54 metro station', 'Sector 54 chow...","129 sq yd plot size. 5 floors built up , fully...","['1 Water Purifier', '21 Fan', '1 Fridge', '1 ...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment4 out of 5', 'Lifestyle5 out of 5...",R69483164
4,12 Bedroom House for sale in Sushant Lok Phase 1,https://www.99acres.com/12-bhk-bedroom-indepen...,,3 Crore,"₹ 53,763/sq.ft.",(52 sq.m.) Plot Area,Plot area 62(51.84 sq.m.),12 Bedrooms,12 Bathrooms,3+ Balconies,Others,"1228, Sushant Lok Phase 1, Gurgaon, Haryana",5 Floors,West,Within 6 months,"['Sector 42-43 metro station', 'Huda city cent...",Best for investment purpose. Fully furnished b...,"['1 Water Purifier', '1 Fridge', '12 Fan', '1 ...","['Maintenance Staff', 'Water Storage', 'Visito...","['Environment5 out of 5', 'Lifestyle5 out of 5...",M69381272
5,20 Bedroom House for sale in Sushant Lok Phase 1,https://www.99acres.com/20-bhk-bedroom-indepen...,,4.5 Crore,"₹ 39,062/sq.ft.",(107 sq.m.) Plot Area,Plot area 128(107.02 sq.m.),20 Bedrooms,20 Bathrooms,3+ Balconies,Servant Room,"Sushant Lok Phase 1, Gurgaon, Haryana",4 Floors,East,1 to 5 Year Old,"['Huda city centre metro station', 'New Life C...",Best for investment purpose. 3l rupees monthly...,"['1 Water Purifier', '1 Fridge', '20 Fan', '1 ...","['Water Storage', 'Park', 'Visitor Parking', '...","['Environment5 out of 5', 'Lifestyle5 out of 5...",V69218808
6,5 Bedroom House for sale in Sector 66 Gurgaon,https://www.99acres.com/5-bhk-bedroom-independ...,Emaar MGF Marbella,12 Crore,"₹ 37,037/sq.ft.",(301 sq.m.) Plot Area,Plot area 360(301.01 sq.m.),5 Bedrooms,5 Bathrooms,3+ Balconies,"Servant Room,Pooja Room,Store Room","Sector 66 Gurgaon, Gurgaon, Haryana",3 Floors,East,0 to 1 Year Old,"['Sector 55-56 Rapid Metro Station', 'HUB 66',...",Available for sale 5bhk independent house in e...,"['4 Wardrobe', '1 Water Purifier', '6 Fan', '1...","['Security / Fire Alarm', 'Feng Shui / Vaastu ...","['Environment3 out of 5', 'Lifestyle4 out of 5...",G55931988
7,16 Bedroom House for sale in DLF Phase 1,https://www.99acres.com/16-bhk-bedroom-indepen...,Luxury DLF City Floors,20 Crore,"₹ 48,889/sq.ft.",(418 sq.m.) Plot Area,Plot area 500(418.06 sq.m.),16 Bedrooms,16 Bathrooms,3+ Balconies,Servant Room,"DLF Phase 1, Gurgaon, Haryana",4 Floors,,0 to 1 Year Old,"['Sikandarpur metro station', 'Sikanderpur met...",Prime location on dlf phase - 1 builder floor ...,"['16 Wardrobe', '26 Fan', '1 Exhaust Fan', '16...","['High Ceiling Height', 'Park', 'Visitor Parki...","['Environment5 out of 5', 'Lifestyle5 out of 5...",E67952012
8,6 Bedroom House for sale in DLF Phase 2,https://www.99acres.com/6-bhk-bedroom-independ...,,10.85 Crore,"₹ 38,150/sq.ft.",(264 sq.m.) Plot Area,Plot area 316(264.22 sq.m.),6 Bedrooms,6 Bathrooms,1 Balcony,Store Room,"DLF Phase 2, Gurgaon, Haryana",3 Floors,North-East,1 to 5 Year Old,"['Vodafone belvedere towers metro station', 'D...","6 bhk kothi in dlf phase 2, gurgaon available ...","['4 Wardrobe', '1 Water Purifier', '5 Fan', '1...","['Security / Fire Alarm', 'Feng Shui / Vaastu ...","['Environment5 out of 5', 'Lifestyle5 out of 5...",U69218678
9,3 Bedroom House for sale in Sushant Lok Phase 1,https://www.99acres.com/3-bhk-bedroom-independ...,Sushant Lok 1 Builder Floors,1.95 Crore,"₹ 10,077/sq.ft.",(180 sq.m.) Plot Area,Plot area 215(179.77 sq.m.),3 Bedrooms,4 Bathrooms,3+ Balconies,Servant Room,"C Block Small Nos, Sushant Lok Phase 1, Gurgao...",4 Floors,West,0 to 1 Year Old,"['Huda city centre metro station', 'Sector 42-...",Brand new builder floor at very reasonable pri...,"['3 Wardrobe', '5 Fan', '1 Exhaust Fan', '3 Ge...","['Feng Shui / Vaastu Compliant', 'High Ceiling...","['Environment5 out of 5', 'Lifestyle5 out of 5...",L69688060


In [404]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1044 entries, 0 to 1043
Data columns (total 21 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   property_name    1044 non-null   object
 1   link             1044 non-null   object
 2   society          453 non-null    object
 3   price            968 non-null    object
 4   rate             1005 non-null   object
 5   area             1044 non-null   object
 6   areaWithType     987 non-null    object
 7   bedRoom          987 non-null    object
 8   bathroom         987 non-null    object
 9   balcony          987 non-null    object
 10  additionalRoom   589 non-null    object
 11  address          1031 non-null   object
 12  noOfFloor        967 non-null    object
 13  facing           674 non-null    object
 14  agePossession    987 non-null    object
 15  nearbyLocations  913 non-null    object
 16  description      1036 non-null   object
 17  furnishDetails   743 non-null    

In [408]:
df.shape

(1044, 21)

In [410]:
df.duplicated().sum()

13

In [413]:
df = df.drop_duplicates()

In [415]:
df.duplicated().sum()

0

In [416]:
df.isnull().sum()

property_name        0
link                 0
society            586
price               76
rate                39
area                 0
areaWithType        57
bedRoom             57
bathroom            57
balcony             57
additionalRoom     453
address             13
noOfFloor           76
facing             369
agePossession       57
nearbyLocations    131
description          8
furnishDetails     299
features           369
rating             137
property_id          8
dtype: int64

In [417]:
df.drop(columns = ['link','property_id'],inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns = ['link','property_id'],inplace = True)


In [418]:
df.shape

(1031, 19)

In [419]:
df.head(2)

Unnamed: 0,property_name,society,price,rate,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,Bptp Visionnaire,5.25 Crore,"₹ 20,115/sq.ft.",(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5 Bedrooms,4 Bathrooms,3+ Balconies,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,,5.7 Crore,"₹ 105,751/sq.ft.",(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5 Bedrooms,4 Bathrooms,2 Balconies,"Store Room,Pooja Room,Servant Room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [422]:
df.rename(columns={'rate':'price_per_sqft'},inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'rate':'price_per_sqft'},inplace = True)


In [423]:
df.head(1)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,Bptp Visionnaire,5.25 Crore,"₹ 20,115/sq.ft.",(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5 Bedrooms,4 Bathrooms,3+ Balconies,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."


In [425]:
df['society'].isnull().sum()

586

In [426]:
df['society'].value_counts()

International City by SOBHA Phase 2                  26
Emaar MGF Marbella                                   26
Vipul Tatvam Villa                                   24
International City by Sobha Phase 1                  23
Unitech Uniworld Resorts                             13
DLF City Plots                                       11
DLF City Plots Phase 2                               11
Unitech Espace                                       11
Eldeco Mansionz                                       9
Bptp Visionnaire                                      8
Not Applicable                                        7
Uppal Southend4.0 ★                                   7
Sushant Lok 1 Builder Floors                          6
Ansals Palam Vihar                                    6
DLF City Phase 1                                      5
Experion Windchants3.8 ★                              5
DLF Alameda                                           4
Unitech Aspen Greens                            

In [431]:
df['society'] = df['society'].apply(lambda name : re.sub(r'\★','',str(name))).str.lower().str.strip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['society'] = df['society'].apply(lambda name : re.sub(r'\★','',str(name))).str.lower().str.strip()


In [435]:
df['society'] = df['society'].str.replace('nan','independent')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['society'] = df['society'].str.replace('nan','independent')


In [437]:
df['society'].isnull().sum()

0

In [442]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25 Crore,"₹ 20,115/sq.ft.",(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5 Bedrooms,4 Bathrooms,3+ Balconies,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7 Crore,"₹ 105,751/sq.ft.",(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5 Bedrooms,4 Bathrooms,2 Balconies,"Store Room,Pooja Room,Servant Room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [449]:
df['price'].value_counts()

3.5 Crore           19
5.5 Crore           17
6.5 Crore           16
6 Crore             14
5 Crore             13
4.5 Crore           13
70 Lac              13
8 Crore             13
50 Lac              13
75 Lac              13
8.5 Crore           12
7 Crore             12
85 Lac              12
1.5 Crore           11
4 Crore             11
10 Crore            10
Price on Request    10
45 Lac              10
2.25 Crore          10
2.5 Crore           10
60 Lac              10
90 Lac              10
9 Crore             10
9.5 Crore            9
12 Crore             9
80 Lac               9
1.7 Crore            9
2 Crore              9
1.1 Crore            9
1 Crore              8
6.75 Crore           8
11 Crore             8
1.6 Crore            8
4.6 Crore            8
1.3 Crore            7
95 Lac               7
12.5 Crore           7
3.75 Crore           7
1.2 Crore            7
55 Lac               7
2.1 Crore            7
65 Lac               7
40 Lac               7
1.35 Crore 

In [454]:
df = df[df['price'] != 'Price on Request']

In [455]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25 Crore,"₹ 20,115/sq.ft.",(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5 Bedrooms,4 Bathrooms,3+ Balconies,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7 Crore,"₹ 105,751/sq.ft.",(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5 Bedrooms,4 Bathrooms,2 Balconies,"Store Room,Pooja Room,Servant Room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [456]:
df.shape

(1021, 19)

In [460]:
df['price'] = df['price'].str.split(' ').apply(treat_price)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['price'] = df['price'].str.split(' ').apply(treat_price)


In [462]:
df['price'].isnull().sum()

76

In [463]:
df['price_per_sqft'].value_counts()

₹ 22,222/sq.ft.     14
₹ 11,111/sq.ft.     11
₹ 33,333/sq.ft.     11
₹ 10,000/sq.ft.     10
₹ 26,667/sq.ft.      6
₹ 13,333/sq.ft.      6
₹ 37,037/sq.ft.      6
₹ 26,235/sq.ft.      6
₹ 30,556/sq.ft.      6
₹ 31,481/sq.ft.      6
₹ 9,444/sq.ft.       5
₹ 23,333/sq.ft.      5
₹ 23,148/sq.ft.      5
₹ 11,100/sq.ft.      5
₹ 8,000/sq.ft.       5
₹ 8,889/sq.ft.       5
₹ 5,000/sq.ft.       5
₹ 35,000/sq.ft.      4
₹ 32,407/sq.ft.      4
₹ 9,722/sq.ft.       4
₹ 18,889/sq.ft.      4
₹ 24,691/sq.ft.      4
₹ 25,000/sq.ft.      4
₹ 27,778/sq.ft.      4
₹ 21,605/sq.ft.      4
₹ 34,722/sq.ft.      4
₹ 6,944/sq.ft.       4
₹ 36,111/sq.ft.      3
₹ 13,889/sq.ft.      3
₹ 20,000/sq.ft.      3
₹ 16,058/sq.ft.      3
₹ 8,235/sq.ft.       3
₹ 60,000/sq.ft.      3
₹ 12,222/sq.ft.      3
₹ 8,333/sq.ft.       3
₹ 14,815/sq.ft.      3
₹ 12,000/sq.ft.      3
₹ 6,666/sq.ft.       3
₹ 15,555/sq.ft.      3
₹ 10,556/sq.ft.      3
₹ 7,500/sq.ft.       3
₹ 11,250/sq.ft.      3
₹ 15,625/sq.ft.      3
₹ 25,926/sq

In [470]:
df['price_per_sqft'] = df['price_per_sqft'].str.split('/').str.get(0).str.replace('₹','').str.replace(',','').astype('float')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['price_per_sqft'] = df['price_per_sqft'].str.split('/').str.get(0).str.replace('₹','').str.replace(',','').astype('float')


In [471]:
df.head()

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25,20115.0,(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5 Bedrooms,4 Bathrooms,3+ Balconies,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7,105751.0,(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5 Bedrooms,4 Bathrooms,2 Balconies,"Store Room,Pooja Room,Servant Room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",
2,10 Bedroom House for sale in Sushant Lok Phase 1,independent,2.1,38251.0,(51 sq.m.) Plot Area,Plot area 61(51 sq.m.),10 Bedrooms,10 Bathrooms,3+ Balconies,Servant Room,"Sushant Lok Phase 1, Gurgaon, Haryana",5 Floors,West,0 to 1 Year Old,"['Sector 42-43 metro station', 'Huda city cent...","Monthly rental income is rs1,40,000/- Best opt...","['10 Bed', '3 Fan', '10 Geyser', '2 Light', 'N...","['Maintenance Staff', 'Water Storage', 'Visito...","['Environment5 out of 5', 'Lifestyle5 out of 5..."
3,21 Bedroom House for sale in Sector 54 Gurgaon,independent,5.0,43066.0,(108 sq.m.) Plot Area,Plot area 129(107.86 sq.m.),21 Bedrooms,21 Bathrooms,3+ Balconies,Servant Room,"Sector 54 Gurgaon, Gurgaon, Haryana",5 Floors,North,1 to 5 Year Old,"['Sector 53-54 metro station', 'Sector 54 chow...","129 sq yd plot size. 5 floors built up , fully...","['1 Water Purifier', '21 Fan', '1 Fridge', '1 ...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment4 out of 5', 'Lifestyle5 out of 5..."
4,12 Bedroom House for sale in Sushant Lok Phase 1,independent,3.0,53763.0,(52 sq.m.) Plot Area,Plot area 62(51.84 sq.m.),12 Bedrooms,12 Bathrooms,3+ Balconies,Others,"1228, Sushant Lok Phase 1, Gurgaon, Haryana",5 Floors,West,Within 6 months,"['Sector 42-43 metro station', 'Huda city cent...",Best for investment purpose. Fully furnished b...,"['1 Water Purifier', '1 Fridge', '12 Fan', '1 ...","['Maintenance Staff', 'Water Storage', 'Visito...","['Environment5 out of 5', 'Lifestyle5 out of 5..."


In [473]:
df['bedRoom'].value_counts()

4 Bedrooms     232
5 Bedrooms     194
3 Bedrooms     146
2 Bedrooms      88
6 Bedrooms      81
9 Bedrooms      45
1 Bedroom       38
8 Bedrooms      33
7 Bedrooms      31
12 Bedrooms     28
10 Bedrooms     21
16 Bedrooms     12
13 Bedrooms      4
18 Bedrooms      2
19 Bedrooms      2
11 Bedrooms      2
20 Bedrooms      1
21 Bedrooms      1
36 Bedrooms      1
34 Bedrooms      1
14 Bedrooms      1
Name: bedRoom, dtype: int64

In [479]:
df[df['bedRoom'].isnull()]

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
62,5 Bedroom House for sale in Sector 109 Gurgaon,international city by sobha phase 2,,13622.0,(467-681 sq.m.) Built-up Area,,,,,,"Sector 109 Gurgaon, Gurgaon, Haryana",,,,"['Dwarka Sector 21 Metro Station', 'Pacific D2...",International city by sobha phase 2 in sector-...,,,"['Environment5 out of 5', 'Lifestyle4 out of 5..."
63,4 Bedroom House for sale in Sector 109 Gurgaon,international city by sobha phase 2,,13909.0,(384-392 sq.m.) Built-up Area,,,,,,"Sector 109 Gurgaon, Gurgaon, Haryana",,,,"['Dwarka Sector 21 Metro Station', 'Pacific D2...",International city by sobha phase 2 in sector-...,,,"['Environment5 out of 5', 'Lifestyle4 out of 5..."
66,5 Bedroom House for sale in Sector 66 Gurgaon,emaar mgf marbella,,16058.0,(606-754 sq.m.) Built-up Area,,,,,,"Sector 66 Gurgaon, Gurgaon, Haryana",,,,"['Sector 55-56 Rapid Metro Station', 'HUB 66',...","Emaar mgf marbella in sector-66, gurgaon by em...",,,"['Environment3 out of 5', 'Lifestyle4 out of 5..."
67,4 Bedroom House for sale in Sector 66 Gurgaon,emaar mgf marbella,,16058.0,(521 sq.m.) Built-up Area,,,,,,"Sector 66 Gurgaon, Gurgaon, Haryana",,,,"['Sector 55-56 Rapid Metro Station', 'HUB 66',...","Emaar mgf marbella in sector-66, gurgaon by em...",,,"['Environment3 out of 5', 'Lifestyle4 out of 5..."
68,5 Bedroom House for sale in Sector 109 Gurgaon,international city by sobha phase 2,,13623.0,(467-681 sq.m.) Built-up Area,,,,,,"Sector 109 Gurgaon, Gurgaon, Haryana",,,,"['Dwarka Sector 21 Metro Station', 'Pacific D2...",International city by sobha phase 2 in sector-...,,,"['Environment5 out of 5', 'Lifestyle4 out of 5..."
69,4 Bedroom House for sale in Sector 109 Gurgaon,international city by sobha phase 2,,13910.0,(294-418 sq.m.) Built-up Area,,,,,,"Sector 109 Gurgaon, Gurgaon, Haryana",,,,"['Dwarka Sector 21 Metro Station', 'Pacific D2...",International city by sobha phase 2 in sector-...,,,"['Environment5 out of 5', 'Lifestyle4 out of 5..."
70,3 Bedroom House for sale in Sector 109 Gurgaon,international city by sobha phase 2,,13910.0,(293 sq.m.) Built-up Area,,,,,,"Sector 109 Gurgaon, Gurgaon, Haryana",,,,"['Dwarka Sector 21 Metro Station', 'Pacific D2...",International city by sobha phase 2 in sector-...,,,"['Environment5 out of 5', 'Lifestyle4 out of 5..."
71,4 Bedroom House for sale in Sector 112 Gurgaon,experion windchants3.8,,12027.0,(590-687 sq.m.) Built-up Area,,,,,,"Sector 112 Gurgaon, Gurgaon, Haryana",,,,"['Ansal Plaza', 'Dwaraka Expressway', 'First S...","Experion windchants in sector 112, gurgaon by ...",,,"['Environment5 out of 5', 'Lifestyle4 out of 5..."
72,4 Bedroom House for sale in Sector 66 Gurgaon,emaar mgf marbella,,15700.0,(521 sq.m.) Built-up Area,,,,,,"Sector 66 Gurgaon, Gurgaon, Haryana",,,,"['Sector 55-56 Rapid Metro Station', 'HUB 66',...","Emaar mgf marbella in sector-66, gurgaon by em...",,,"['Environment3 out of 5', 'Lifestyle4 out of 5..."
73,5 Bedroom House for sale in Sector 66 Gurgaon,emaar mgf marbella,,19708.0,(606 sq.m.) Built-up Area,,,,,,"Sector 66 Gurgaon, Gurgaon, Haryana",,,,"['Sector 55-56 Rapid Metro Station', 'HUB 66',...","Emaar mgf marbella in sector-66, gurgaon by em...",,,"['Environment3 out of 5', 'Lifestyle4 out of 5..."


In [487]:
df = df[~df['bedRoom'].isnull()]

In [489]:
df['bedRoom'] = df['bedRoom'].str.split(' ').str.get(0)

In [490]:
df.shape

(964, 19)

In [493]:
df['bathroom'].value_counts()

4 Bathrooms     228
3 Bathrooms     165
5 Bathrooms     137
2 Bathrooms     114
6 Bathrooms      82
1 Bathroom       65
9 Bathrooms      42
7 Bathrooms      39
8 Bathrooms      28
12 Bathrooms     22
10 Bathrooms     11
16 Bathrooms      8
18 Bathrooms      4
13 Bathrooms      4
11 Bathrooms      4
20 Bathrooms      3
17 Bathrooms      3
14 Bathrooms      2
21 Bathrooms      1
36 Bathrooms      1
34 Bathrooms      1
Name: bathroom, dtype: int64

In [496]:
 df['bathroom'] = df['bathroom'].str.split(' ').str.get(0)

In [497]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25,20115.0,(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5,4,3+ Balconies,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7,105751.0,(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5,4,2 Balconies,"Store Room,Pooja Room,Servant Room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [498]:
df['balcony'].value_counts()

3+ Balconies    365
2 Balconies     220
3 Balconies     159
No Balcony      115
1 Balcony       105
Name: balcony, dtype: int64

In [502]:
df['balcony'] = df['balcony'].str.split(' ').str.get(0).str.replace('No','0')

In [503]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25,20115.0,(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5,4,3+,Servant Room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7,105751.0,(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5,4,2,"Store Room,Pooja Room,Servant Room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [505]:
df['additionalRoom'].value_counts()

Servant Room                                     81
Pooja Room,Study Room,Servant Room,Store Room    60
Others                                           58
Pooja Room                                       38
Pooja Room,Study Room,Servant Room               33
Pooja Room,Study Room,Servant Room,Others        32
Store Room                                       31
Pooja Room,Servant Room                          24
Study Room                                       19
Study Room,Servant Room                          19
Servant Room,Store Room                          15
Pooja Room,Study Room                            11
Pooja Room,Store Room                            11
Pooja Room,Others                                11
Servant Room,Pooja Room                           8
Servant Room,Others                               8
Pooja Room,Store Room,Study Room,Servant Room     8
Pooja Room,Servant Room,Store Room                7
Store Room,Servant Room                           6
Study Room,S

In [506]:
df['additionalRoom'].isnull().sum()

386

In [510]:
df['additionalRoom'].fillna('not available',inplace = True)

In [511]:
df['additionalRoom'].isnull().sum()

0

In [513]:
df['additionalRoom'] = df['additionalRoom'].str.lower()

In [514]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,noOfFloor,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25,20115.0,(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5,4,3+,servant room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3 Floors,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7,105751.0,(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5,4,2,"store room,pooja room,servant room","Sector 21A Faridabad, Gurgaon, Haryana",2 Floors,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [515]:
df['noOfFloor'].value_counts()

3 Floors     304
2 Floors     288
1 Floors     181
4 Floors     136
5 Floors       9
0 Floors       6
14 Floors      3
6 Floors       3
11 Floors      2
12 Floors      2
10 Floors      2
16 Floors      1
51 Floors      1
41 Floors      1
27 Floors      1
20 Floors      1
13 Floors      1
21 Floors      1
02 Floors      1
33 Floors      1
Name: noOfFloor, dtype: int64

In [516]:
df['noOfFloor'].isnull().sum()

19

In [519]:
df['noOfFloor'] = df['noOfFloor'].str.split(' ').str.get(0)

In [520]:
df.rename(columns = {'noOfFloor' :  'floorNum'},inplace = True)

In [521]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25,20115.0,(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5,4,3+,servant room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3,North-East,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7,105751.0,(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5,4,2,"store room,pooja room,servant room","Sector 21A Faridabad, Gurgaon, Haryana",2,,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [522]:
df.drop(columns = ['facing'],inplace = True)

In [523]:
df.head(2)

Unnamed: 0,property_name,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,bptp visionnaire,5.25,20115.0,(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5,4,3+,servant room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,independent,5.7,105751.0,(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5,4,2,"store room,pooja room,servant room","Sector 21A Faridabad, Gurgaon, Haryana",2,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [524]:
df.insert(loc = 1,column = 'Property_type',value = 'house')

In [525]:
df.head(2)

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,house,bptp visionnaire,5.25,20115.0,(242 sq.m.) Plot Area,Plot area 290(242.48 sq.m.),5,4,3+,servant room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,house,independent,5.7,105751.0,(50 sq.m.) Plot Area,Plot area 539(50.07 sq.m.),5,4,2,"store room,pooja room,servant room","Sector 21A Faridabad, Gurgaon, Haryana",2,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [526]:
df['area'] = round((df['price']*10000000)/df['price_per_sqft'])

In [527]:
df.head(2)

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,house,bptp visionnaire,5.25,20115.0,2610.0,Plot area 290(242.48 sq.m.),5,4,3+,servant room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."
1,5 Bedroom House for sale in Sector 21A Faridabad,house,independent,5.7,105751.0,539.0,Plot area 539(50.07 sq.m.),5,4,2,"store room,pooja room,servant room","Sector 21A Faridabad, Gurgaon, Haryana",2,5 to 10 Year Old,,"Hi, we have an independent house/villa availab...","['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Private Garden / Terrace', 'Park', 'Visitor ...",


In [528]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 964 entries, 0 to 1043
Data columns (total 19 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   property_name    964 non-null    object 
 1   Property_type    964 non-null    object 
 2   society          964 non-null    object 
 3   price            945 non-null    float64
 4   price_per_sqft   945 non-null    float64
 5   area             945 non-null    float64
 6   areaWithType     964 non-null    object 
 7   bedRoom          964 non-null    object 
 8   bathroom         964 non-null    object 
 9   balcony          964 non-null    object 
 10  additionalRoom   964 non-null    object 
 11  address          959 non-null    object 
 12  floorNum         945 non-null    object 
 13  agePossession    964 non-null    object 
 14  nearbyLocations  848 non-null    object 
 15  description      964 non-null    object 
 16  furnishDetails   729 non-null    object 
 17  features       

In [529]:
df.to_csv('house_cleaned.csv',index = False)

## Merge both the data set flat  and house

In [532]:
flat = pd.read_csv('flat_cleaned.csv')
house = pd.read_csv('house_cleaned.csv')

In [533]:
flat.head(1)

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,flat,maa bhagwati residency,0.45,5000.0,900.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4.0,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."


In [534]:
house.head(1)

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,5 Bedroom House for sale in Sector 70A Gurgaon,house,bptp visionnaire,5.25,20115.0,2610.0,Plot area 290(242.48 sq.m.),5,4,3+,servant room,"29b, Sector 70A Gurgaon, Gurgaon, Haryana",3.0,0 to 1 Year Old,"['Good Earth City Center 2', 'Kunskapsskolan I...",Do you wish to buy an independent house in sec...,"['1 Wardrobe', '1 Fan', '1 Exhaust Fan', '1 Ge...","['Feng Shui / Vaastu Compliant', 'Private Gard...","['Environment5 out of 5', 'Lifestyle4 out of 5..."


In [535]:
print(flat.shape)
print(house.shape)

(2997, 19)
(964, 19)


In [543]:
merged = pd.concat([flat,house],ignore_index = True)

In [537]:
merged.shape

(3961, 19)

In [542]:
merged.sample()

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
1879,4 BHK Flat in Sector 62 Gurgaon,flat,pioneer araya,9.95,20995.0,4739.0,Super Built up area 4739(440.27 sq.m.)Carpet a...,4,6,3,"pooja room,servant room","Sector 62 Gurgaon, Gurgaon, Haryana",12.0,1 to 5 Year Old,"['Paras Trinity Shopping Mall', 'Golf Course E...",Living at the 54 really is like living in a wo...,"['6 Fan', '10 Light', '7 AC', '1 Chimney', '1 ...","['Security / Fire Alarm', 'Power Back-up', 'Pr...","['Green Area5 out of 5', 'Construction4 out of..."


In [544]:
merged.to_csv('gurgaon_properties.csv',index = False)

##  clean this merged data

In [548]:
df = pd.read_csv('gurgaon_properties.csv')

In [550]:
df.head()

Unnamed: 0,property_name,Property_type,society,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,flat,maa bhagwati residency,0.45,5000.0,900.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4.0,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,flat,apna enclave,0.5,7692.0,650.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1.0,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."
2,2 BHK Flat in Sohna,flat,tulsiani easy in homes,0.4,6722.0,595.0,Carpet area: 595 (55.28 sq.m.),2,2,3,not available,"Sohna, Gurgaon, Haryana",12.0,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ..."
3,2 BHK Flat in Sector 61 Gurgaon,flat,smart world orchard,1.47,12250.0,1200.0,Carpet area: 1200 (111.48 sq.m.),2,2,2,study room,"Sector 61 Gurgaon, Gurgaon, Haryana",2.0,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",
4,2 BHK Flat in Sector 92 Gurgaon,flat,parkwood westend,0.7,5204.0,1345.0,Super Built up area 1345(124.95 sq.m.),2,2,3,study room,"Sector 92 Gurgaon, Gurgaon, Haryana",5.0,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ..."


In [552]:
df.duplicated().sum()

2

In [553]:
df.shape

(3961, 19)

In [554]:
df.insert(loc=3,column='sector',value=df['property_name'].str.split('in').str.get(1).str.replace('Gurgaon','').str.strip())

In [555]:
df.head()

Unnamed: 0,property_name,Property_type,society,sector,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,flat,maa bhagwati residency,Krishna Colony,0.45,5000.0,900.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4.0,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,flat,apna enclave,Ashok Vihar,0.5,7692.0,650.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1.0,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."
2,2 BHK Flat in Sohna,flat,tulsiani easy in homes,Sohna,0.4,6722.0,595.0,Carpet area: 595 (55.28 sq.m.),2,2,3,not available,"Sohna, Gurgaon, Haryana",12.0,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ..."
3,2 BHK Flat in Sector 61 Gurgaon,flat,smart world orchard,Sector 61,1.47,12250.0,1200.0,Carpet area: 1200 (111.48 sq.m.),2,2,2,study room,"Sector 61 Gurgaon, Gurgaon, Haryana",2.0,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",
4,2 BHK Flat in Sector 92 Gurgaon,flat,parkwood westend,Sector 92,0.7,5204.0,1345.0,Super Built up area 1345(124.95 sq.m.),2,2,3,study room,"Sector 92 Gurgaon, Gurgaon, Haryana",5.0,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ..."


In [556]:
df['sector'] = df['sector'].str.lower()

In [557]:
df['sector'] = df['sector'].str.replace('dharam colony','sector 12')
df['sector'] = df['sector'].str.replace('krishna colony','sector 7')
df['sector'] = df['sector'].str.replace('suncity','sector 54')
df['sector'] = df['sector'].str.replace('prem nagar','sector 13')
df['sector'] = df['sector'].str.replace('mg road','sector 28')
df['sector'] = df['sector'].str.replace('gandhi nagar','sector 28')
df['sector'] = df['sector'].str.replace('laxmi garden','sector 11')
df['sector'] = df['sector'].str.replace('shakti nagar','sector 11')
df['sector'] = df['sector'].str.replace('baldev nagar','sector 7')
df['sector'] = df['sector'].str.replace('shivpuri','sector 7')
df['sector'] = df['sector'].str.replace('garhi harsaru','sector 17')
df['sector'] = df['sector'].str.replace('imt manesar','manesar')
df['sector'] = df['sector'].str.replace('adarsh nagar','sector 12')
df['sector'] = df['sector'].str.replace('shivaji nagar','sector 11')
df['sector'] = df['sector'].str.replace('bhim nagar','sector 6')
df['sector'] = df['sector'].str.replace('madanpuri','sector 7')
df['sector'] = df['sector'].str.replace('saraswati vihar','sector 28')
df['sector'] = df['sector'].str.replace('arjun nagar','sector 8')
df['sector'] = df['sector'].str.replace('ravi nagar','sector 9')
df['sector'] = df['sector'].str.replace('vishnu garden','sector 105')
df['sector'] = df['sector'].str.replace('bhondsi','sector 11')
df['sector'] = df['sector'].str.replace('surya vihar','sector 21')
df['sector'] = df['sector'].str.replace('devilal colony','sector 9')
df['sector'] = df['sector'].str.replace('valley view estate','gwal pahari')
df['sector'] = df['sector'].str.replace('mehrauli  road','sector 14')
df['sector'] = df['sector'].str.replace('jyoti park','sector 7')
df['sector'] = df['sector'].str.replace('ansal plaza','sector 23')
df['sector'] = df['sector'].str.replace('dayanand colony','sector 6')
df['sector'] = df['sector'].str.replace('sushant lok phase 2','sector 55')
df['sector'] = df['sector'].str.replace('chakkarpur','sector 28')
df['sector'] = df['sector'].str.replace('greenwood city','sector 45')
df['sector'] = df['sector'].str.replace('subhash nagar','sector 12')
df['sector'] = df['sector'].str.replace('sohna road road','sohna road')
df['sector'] = df['sector'].str.replace('malibu town','sector 47')
df['sector'] = df['sector'].str.replace('surat nagar 1','sector 104')
df['sector'] = df['sector'].str.replace('new colony','sector 7')
df['sector'] = df['sector'].str.replace('mianwali colony','sector 12')
df['sector'] = df['sector'].str.replace('jacobpura','sector 12')
df['sector'] = df['sector'].str.replace('rajiv nagar','sector 13')
df['sector'] = df['sector'].str.replace('ashok vihar','sector 3')
df['sector'] = df['sector'].str.replace('dlf phase 1','sector 26')
df['sector'] = df['sector'].str.replace('nirvana country','sector 50')
df['sector'] = df['sector'].str.replace('palam vihar','sector 2')
df['sector'] = df['sector'].str.replace('dlf phase 2','sector 25')
df['sector'] = df['sector'].str.replace('sushant lok phase 1','sector 43')
df['sector'] = df['sector'].str.replace('laxman vihar','sector 4')
df['sector'] = df['sector'].str.replace('dlf phase 4','sector 28')
df['sector'] = df['sector'].str.replace('dlf phase 3','sector 24')
df['sector'] = df['sector'].str.replace('sushant lok phase 3','sector 57')
df['sector'] = df['sector'].str.replace('dlf phase 5','sector 43')
df['sector'] = df['sector'].str.replace('rajendra park','sector 105')
df['sector'] = df['sector'].str.replace('uppals southend','sector 49')
df['sector'] = df['sector'].str.replace('sohna','sohna road')
df['sector'] = df['sector'].str.replace('ashok vihar phase 3 extension','sector 5')
df['sector'] = df['sector'].str.replace('south city 1','sector 41')
df['sector'] = df['sector'].str.replace('ashok vihar phase 2','sector 5')

In [558]:
a = df['sector'].value_counts()[df['sector'].value_counts() >= 3]
df = df[df['sector'].isin(a.index)]

In [559]:
df['sector'] = df['sector'].str.replace('sector 95a','sector 95')
df['sector'] = df['sector'].str.replace('sector 23a','sector 23')
df['sector'] = df['sector'].str.replace('sector 12a','sector 12')
df['sector'] = df['sector'].str.replace('sector 3a','sector 3')
df['sector'] = df['sector'].str.replace('sector 110 a','sector 110')
df['sector'] = df['sector'].str.replace('patel nagar','sector 15')
df['sector'] = df['sector'].str.replace('a block sector 43','sector 43')
df['sector'] = df['sector'].str.replace('maruti kunj','sector 12')
df['sector'] = df['sector'].str.replace('b block sector 43','sector 43')
df['sector'] = df['sector'].str.replace('sector-33 sohna road','sector 33')
df['sector'] = df['sector'].str.replace('sector 1 manesar','manesar')
df['sector'] = df['sector'].str.replace('sector 4 phase 2','sector 4')
df['sector'] = df['sector'].str.replace('sector 1a manesar','manesar')
df['sector'] = df['sector'].str.replace('c block sector 43','sector 43')
df['sector'] = df['sector'].str.replace('sector 89 a','sector 89')
df['sector'] = df['sector'].str.replace('sector 2 extension','sector 2')
df['sector'] = df['sector'].str.replace('sector 36 sohna road','sector 36')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sector'] = df['sector'].str.replace('sector 95a','sector 95')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sector'] = df['sector'].str.replace('sector 23a','sector 23')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sector'] = df['sector'].str.replace('sector 12a','sector 12')
A value

In [560]:
df[df['sector'] == 'new']

Unnamed: 0,property_name,Property_type,society,sector,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
2045,4 BHK Flat in New Gurgaon,flat,dlf 76,new,4.0,11428.0,3500.0,Carpet area: 3500 (325.16 sq.m.),4,4,2,"study room,servant room","New Gurgaon, Gurgaon, Haryana",4.0,Jun 2027,"['Shri Balaji Hospital and Trauma Center', 'S....",This lovely 4 bhk apartment/flat in new gurgao...,"['6 Wardrobe', '1 Fridge', '8 Fan', '1 Exhaust...","['Security / Fire Alarm', 'Feng Shui / Vaastu ...",
2803,2 BHK Flat in New Gurgaon,flat,takshila heights sector 37 c,new,0.67,5583.0,1200.0,Super Built up area 1200(111.48 sq.m.),2,2,2,not available,"New Gurgaon, Gurgaon, Haryana",3.0,1 to 5 Year Old,"['Shri Balaji Hospital and Trauma Center', 'S....",Check out this 2 bhk apartment for sale in tak...,[],"['Lift(s)', 'Swimming Pool', 'Visitor Parking'...",
2895,2 BHK Flat in New Gurgaon,flat,green court,new,0.38,5507.0,690.0,Carpet area: 690 (64.1 sq.m.),2,2,1,not available,"New Gurgaon, Gurgaon, Haryana",7.0,Under Construction,"['Ing bank ATM', 'Dcb bank ATM', 'Indus ind ba...",We are the proud owners of this 2 bhk apartmen...,[],"['Intercom Facility', 'Lift(s)', 'Maintenance ...",
2960,4 BHK Flat in New Gurgaon,flat,sare homes,new,0.85,4786.0,1776.0,Super Built up area 1776(165 sq.m.),4,4,3,not available,"New Gurgaon, Gurgaon, Haryana",3.0,5 to 10 Year Old,"['Columbia Asia Hospital', 'Apex Multi Special...",Located in the popular residential address of ...,[],,


In [561]:
df.loc[2803,'sector'] = 'sector 37'
df.loc[2960,'sector'] = 'sector 92'
df.loc[2895,'sector'] = 'sector 90'
df.loc[2045,'sector'] = 'sector 76'

In [562]:
df[df['sector'] == 'new sector 2']

Unnamed: 0,property_name,Property_type,society,sector,price,price_per_sqft,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,agePossession,nearbyLocations,description,furnishDetails,features,rating
30,2 BHK Flat in New Palam Vihar,flat,ompee k s residency,new sector 2,1.6,26936.0,594.0,Carpet area: 66 (55.18 sq.m.),2,2,2,not available,"New Palam Vihar, Gurgaon, Haryana",1.0,1 to 5 Year Old,"['Palam Vihar Vyapar kendra', 'Palam triangle'...",We are the proud owners of this 2 bhk apartmen...,,,"['Environment4 out of 5', 'Safety4 out of 5', ..."
88,2 BHK Flat in New Palam Vihar,flat,my home,new sector 2,0.28,3166.0,884.0,Carpet area: 900 (83.61 sq.m.),2,1,1,others,"F 150/b, New Palam Vihar, Gurgaon, Haryana",2.0,1 to 5 Year Old,"['Palam Vihar Vyapar kendra', 'Palam triangle'...","2 bhk room with wooden coverd ,1 drawing room,...","['3 Wardrobe', '5 Light', '1 Chimney', '1 Modu...","['Water Storage', 'Park']","['Environment4 out of 5', 'Safety4 out of 5', ..."
2842,2 BHK Flat in New Palam Vihar,flat,my home,new sector 2,0.22,4400.0,500.0,Carpet area: 500 (46.45 sq.m.),2,2,1,not available,"New Palam Vihar, Gurgaon, Haryana",1.0,0 to 1 Year Old,"['Palam Vihar Vyapar kendra', 'Palam triangle'...",Cctv surveillance are provided here. There is ...,"['3 Fan', '1 Exhaust Fan', '15 Light', '1 Modu...",,"['Safety4 out of 5', 'Lifestyle4 out of 5', 'E..."
3168,2 Bedroom House for sale in New Palam Vihar,house,my home,new sector 2,0.34,12592.0,270.0,Plot area 270(25.08 sq.m.),2,2,2,not available,"Ez-19 A, New Palam Vihar, Gurgaon, Haryana",3.0,5 to 10 Year Old,"['Palam Vihar Vyapar kendra', 'Palam triangle'...",There are availability of various facilities l...,"['1 Wardrobe', '3 Fan', '6 Light', 'No AC', 'N...","['Water Storage', 'Park', 'Visitor Parking']","['Environment4 out of 5', 'Lifestyle4 out of 5..."
3274,3 Bedroom House for sale in New Palam Vihar,house,independent,new sector 2,1.0,8796.0,1137.0,Plot area 120(100.34 sq.m.)Built Up area: 120 ...,3,2,2,pooja room,"Q-148, New Palam Vihar, Phase-2, Near Royal Oa...",1.0,10+ Year Old,"['Palam Vihar Vyapar kendra', 'Palam triangle'...","Ground and first floor, Ground floor: Ground f...",,,"['Environment4 out of 5', 'Lifestyle4 out of 5..."


In [563]:
df.loc[[30,88,2842,3168,3274],'sector'] = 'sector 110'

In [564]:
df.shape

(3803, 20)

In [565]:

df.duplicated().sum()

2

In [566]:
# features to drop -> property_name, address, description, rating
df.drop(columns=['property_name', 'address', 'description', 'rating'],inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop(columns=['property_name', 'address', 'description', 'rating'],inplace=True)


In [567]:
df.duplicated().sum()

124

In [568]:
df.to_csv('gurgaon_properties_cleaned_v1.csv',index=False)