In [1]:
import pandas as pd
import sklearn

In [2]:
sklearn.__version__

'1.5.0'

In [3]:
from pathlib import Path
df = pd.read_csv(Path("../data/raw/flats.csv"))

In [4]:
df.head()

Unnamed: 0,property_name,link,society,price,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,property_id
0,2 BHK Flat in Krishna Colony,https://www.99acres.com/2-bhk-bedroom-apartmen...,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ...",C68850746
1,2 BHK Flat in Ashok Vihar,https://www.99acres.com/2-bhk-bedroom-apartmen...,Apna Enclave,50 Lac,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ...",H68850564
2,2 BHK Flat in Sohna,https://www.99acres.com/2-bhk-bedroom-apartmen...,Tulsiani Easy in Homes,40 Lac,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ...",J68850120
3,2 BHK Flat in Sector 61 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Smart World Orchard,1.47 Crore,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",,S68849476
4,2 BHK Flat in Sector 92 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Parkwood Westend,70 Lac,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ...",L47956793


In [5]:
df.shape

(3028, 20)

In [6]:
# to drop link col no need 
df.drop(columns=['link','property_id'],inplace=True)

In [7]:
# Now let's clean each column one by one
# rename columns
df.rename(columns={'area':'price_per_sqft'},inplace=True)

In [8]:
df['society'].value_counts()

society
SS The Leaf3.8 ★                             73
Tulip Violet4.3 ★                            40
Shapoorji Pallonji Joyville Gurugram4.0 ★    39
Signature Global Park4.0 ★                   36
Shree Vardhman Victoria3.8 ★                 35
                                             ..
shree kirpalu gi sector 52                    1
Meditech Apartment                            1
Mariners Home                                 1
IMT View Society                              1
Spire Woods Now Ananda by Alpha corp          1
Name: count, Length: 639, dtype: int64

In [9]:
import re

class DataCleaning:
    
    def __init__(self,df) -> None:
        self.df = df
    
    def split_bhk_area(self,prop_name):
        split_function = lambda x: pd.Series([x.split(' ')[0], ' '.join(x.split(' ')[1:])])
        
        df[['BHK','society1']] = df[prop_name].apply(split_function)
        df.drop(columns=[prop_name],inplace=True)
        
        return df
    
    def conv_price_unit_to_cr(self,prop_name):
        
        checks = ['Lacs','lac','LAKHS','lakh','Lakhs','lakhs','lacs','LACS','Lac']
        
        
        def convert_price(price):
            try:
                parts = str(price).split(' ')
                if len(parts)==2 and parts[1] in checks:
                    return float(parts[0])/100
                elif len(parts)==2 :
                    return parts[0]
                return price
            except (ValueError, AttributeError):
                return price
        
        self.df[prop_name] = self.df[prop_name].apply(convert_price)
        #renaming the column name
        self.df.rename(columns={prop_name:'price_in_cr'},inplace=True)
        # checked value_counts and got to know that this string was problem in the logic
        self.df = self.df[self.df['price_in_cr'] != 'Price on Request']
        return self.df
    
    def area_clean(self, area_feature):
        cleaned_values = []
        for val in df[area_feature]:
            try:
                # Ensuring the value is a string 
                val_str = str(val)
                # using the regular expression to remove unwanted characters
                cleaned_val = re.sub(r'[₹\s,/sq.ft]', '', val_str)
                # convert the cleaned value to integer
                cleaned_int = int(cleaned_val)
                cleaned_values.append(cleaned_int)
            except ValueError:
                #Handle the case where conversion to int fails
                cleaned_values.append(None)
            
        df['price_per_sqft'] = cleaned_values
        
        return df

      
  

In [10]:
DC = DataCleaning(df)
df = DC.split_bhk_area("property_name")

In [11]:
df.head()

Unnamed: 0,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,BHK,society1
0,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ...",2,BHK Flat in Krishna Colony
1,Apna Enclave,50 Lac,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ...",2,BHK Flat in Ashok Vihar
2,Tulsiani Easy in Homes,40 Lac,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ...",2,BHK Flat in Sohna
3,Smart World Orchard,1.47 Crore,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",,2,BHK Flat in Sector 61 Gurgaon
4,Parkwood Westend,70 Lac,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ...",2,BHK Flat in Sector 92 Gurgaon


In [12]:
df = DC.conv_price_unit_to_cr('price')


In [13]:
df.sample(2)

Unnamed: 0,society,price_in_cr,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,BHK,society1
41,Ashiana Mulberry,0.74,"₹ 6,200/sq.ft.",Carpet area: 1210 (112.41 sq.m.),2 Bedrooms,2 Bathrooms,3+ Balconies,,"Sohna, Gurgaon, Haryana",12nd of 17 Floors,North-West,0 to 1 Year Old,"['Discount Department Store', 'Badshahpur Sohn...",Available with fully furnished kitchen and mas...,"['1 Modular Kitchen', 'No AC', 'No Bed', 'No C...","['Security / Fire Alarm', 'Intercom Facility',...","['Environment4 out of 5', 'Safety4 out of 5', ...",2,BHK Flat in Sohna
1893,La Vida by Tata Housing,2.35,"₹ 10,599/sq.ft.",Super Built up area 2217(205.97 sq.m.)Built Up...,3 Bedrooms,3 Bathrooms,3 Balconies,Servant Room,"Sector 113 Gurgaon , Gurgaon, Haryana",11st of 25 Floors,North-East,0 to 1 Year Old,"['Dwarka Sector 21', 'Pacific D21 Mall', 'Bajg...",This 3 bhk flat is located in la vida by tata ...,"['1 Geyser', 'No AC', 'No Bed', 'No Chimney', ...","['Centrally Air Conditioned', 'Water purifier'...",,3,BHK Flat in Sector 113 Gurgaon


In [14]:
df['price_per_sqft'].value_counts()

price_per_sqft
₹ 10,000/sq.ft.    19
₹ 12,500/sq.ft.    16
₹ 8,000/sq.ft.     16
₹ 6,666/sq.ft.     13
₹ 5,000/sq.ft.     13
                   ..
₹ 12,401/sq.ft.     1
₹ 6,224/sq.ft.      1
₹ 8,179/sq.ft.      1
₹ 12,802/sq.ft.     1
₹ 35,222/sq.ft.     1
Name: count, Length: 2131, dtype: int64

In [15]:
df = DC.area_clean("price_per_sqft")


In [16]:
df.sample(2)

Unnamed: 0,society,price_in_cr,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,BHK,society1
881,Bestech Park View Residency3.9 ★,0.981,6932.0,Super Built up area 1415(131.46 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Palam Vihar, Gurgaon, Haryana",11st of 14 Floors,East,5 to 10 Year Old,"['Dwarka Sector 21 Metro Station', 'HUDA Marke...",Bestech park view residency is one of gurgaon'...,"['2 Wardrobe', '4 Fan', '1 Geyser', '5 Light',...","['Security / Fire Alarm', 'Power Back-up', 'Li...","['Green Area5 out of 5', 'Construction4 out of...",2,BHK Flat in Palam Vihar
2860,Uphaar Homes,0.45,4736.0,Carpet area: 950 (88.26 sq.m.),3 Bedrooms,2 Bathrooms,1 Balcony,,"Sector 105 Gurgaon, Gurgaon, Haryana",1st of 4 Floors,,1 to 5 Year Old,"['Palam Vihar Vyapar kendra', 'Palam triangle'...","Well guarded, peace of mind, 24x7 water, nearb...","['5 Fan', '1 Exhaust Fan', '1 Geyser', '14 Lig...","['Power Back-up', 'Feng Shui / Vaastu Complian...","['Safety4 out of 5', 'Lifestyle4 out of 5', 'E...",3,BHK Flat in Sector 105 Gurgaon


In [17]:
df.isnull().sum()
# 20 flats are their about which price is not provided 
df = df[~df['price_in_cr'].isnull()]

In [18]:
df.isnull().sum()

society               1
price_in_cr           0
price_per_sqft        1
areaWithType          0
bedRoom               0
bathroom              0
balcony               0
additionalRoom     1304
address               6
floorNum              2
facing              874
agePossession         1
nearbyLocations      91
description           0
furnishDetails      796
features            407
rating              328
BHK                   0
society1              0
dtype: int64

In [19]:
df =df[~df['price_per_sqft'].isnull()]

In [20]:
df['bedRoom'].value_counts()
# easy to cleamn

bedRoom
3 Bedrooms    1437
2 Bedrooms     943
4 Bedrooms     478
1 Bedroom      104
5 Bedrooms      31
6 Bedrooms       3
Name: count, dtype: int64

In [21]:
df['bedRoom'] = df['bedRoom'].apply(lambda x: int(x.split(' ')[0]) if x else 0)
df['bathroom'] = df['bathroom'].apply(lambda x: int(x.split(' ')[0]) if x else 0)


In [22]:
#df['balcony'] = df['balcony'].apply(lambda x: int(x.split(' ')[0]) if x else 0)
df['balcony'].value_counts()

balcony
3 Balconies     973
3+ Balconies    862
2 Balconies     749
1 Balcony       315
No Balcony       97
Name: count, dtype: int64

In [23]:
def bal(count):
    if count.split(' ')[0]=="3+":
        return 3
    elif count.split(' ')[0]=="No":
        return 0
    else:
        return int(count.split(' ')[0])
        

In [24]:
df['balcony']= df['balcony'].apply(bal)

In [25]:
df.head()

Unnamed: 0,society,price_in_cr,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,BHK,society1
0,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2,1,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ...",2,BHK Flat in Krishna Colony
1,Apna Enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2,1,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ...",2,BHK Flat in Ashok Vihar
2,Tulsiani Easy in Homes,0.4,6722.0,Carpet area: 595 (55.28 sq.m.),2,2,3,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ...",2,BHK Flat in Sohna
3,Smart World Orchard,1.47,12250.0,Carpet area: 1200 (111.48 sq.m.),2,2,2,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",,2,BHK Flat in Sector 61 Gurgaon
4,Parkwood Westend,0.7,5204.0,Super Built up area 1345(124.95 sq.m.),2,2,3,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ...",2,BHK Flat in Sector 92 Gurgaon


In [26]:
df.insert(loc=3,column='sector',value=df['society1'].str.split('in').str.get(1).str.replace('Gurgaon','').str.strip())

In [27]:
df['sector'] = df['sector'].str.replace('dharam colony','sector 12')
df['sector'] = df['sector'].str.replace('krishna colony','sector 7')
df['sector'] = df['sector'].str.replace('suncity','sector 54')
df['sector'] = df['sector'].str.replace('prem nagar','sector 13')
df['sector'] = df['sector'].str.replace('mg road','sector 28')
df['sector'] = df['sector'].str.replace('gandhi nagar','sector 28')
df['sector'] = df['sector'].str.replace('laxmi garden','sector 11')
df['sector'] = df['sector'].str.replace('shakti nagar','sector 11')

In [28]:
df['sector'] = df['sector'].str.replace('baldev nagar','sector 7')
df['sector'] = df['sector'].str.replace('shivpuri','sector 7')
df['sector'] = df['sector'].str.replace('garhi harsaru','sector 17')
df['sector'] = df['sector'].str.replace('imt manesar','manesar')
df['sector'] = df['sector'].str.replace('adarsh nagar','sector 12')
df['sector'] = df['sector'].str.replace('shivaji nagar','sector 11')
df['sector'] = df['sector'].str.replace('bhim nagar','sector 6')
df['sector'] = df['sector'].str.replace('madanpuri','sector 7')

In [29]:
df['sector'] = df['sector'].str.replace('saraswati vihar','sector 28')
df['sector'] = df['sector'].str.replace('arjun nagar','sector 8')
df['sector'] = df['sector'].str.replace('ravi nagar','sector 9')
df['sector'] = df['sector'].str.replace('vishnu garden','sector 105')
df['sector'] = df['sector'].str.replace('bhondsi','sector 11')
df['sector'] = df['sector'].str.replace('surya vihar','sector 21')
df['sector'] = df['sector'].str.replace('devilal colony','sector 9')
df['sector'] = df['sector'].str.replace('valley view estate','gwal pahari')

In [30]:
df['sector'] = df['sector'].str.replace('mehrauli  road','sector 14')
df['sector'] = df['sector'].str.replace('jyoti park','sector 7')
df['sector'] = df['sector'].str.replace('ansal plaza','sector 23')
df['sector'] = df['sector'].str.replace('dayanand colony','sector 6')
df['sector'] = df['sector'].str.replace('sushant lok phase 2','sector 55')
df['sector'] = df['sector'].str.replace('chakkarpur','sector 28')
df['sector'] = df['sector'].str.replace('greenwood city','sector 45')
df['sector'] = df['sector'].str.replace('subhash nagar','sector 12')

In [31]:
df['sector'] = df['sector'].str.replace('sohna road road','sohna road')
df['sector'] = df['sector'].str.replace('malibu town','sector 47')
df['sector'] = df['sector'].str.replace('surat nagar 1','sector 104')
df['sector'] = df['sector'].str.replace('new colony','sector 7')
df['sector'] = df['sector'].str.replace('mianwali colony','sector 12')
df['sector'] = df['sector'].str.replace('jacobpura','sector 12')
df['sector'] = df['sector'].str.replace('rajiv nagar','sector 13')
df['sector'] = df['sector'].str.replace('ashok vihar','sector 3')

In [32]:
df['sector'] = df['sector'].str.replace('dlf phase 1','sector 26')
df['sector'] = df['sector'].str.replace('nirvana country','sector 50')
df['sector'] = df['sector'].str.replace('palam vihar','sector 2')
df['sector'] = df['sector'].str.replace('dlf phase 2','sector 25')
df['sector'] = df['sector'].str.replace('sushant lok phase 1','sector 43')
df['sector'] = df['sector'].str.replace('laxman vihar','sector 4')
df['sector'] = df['sector'].str.replace('dlf phase 4','sector 28')
df['sector'] = df['sector'].str.replace('dlf phase 3','sector 24')

In [33]:
df['sector'] = df['sector'].str.replace('sushant lok phase 3','sector 57')
df['sector'] = df['sector'].str.replace('dlf phase 5','sector 43')
df['sector'] = df['sector'].str.replace('rajendra park','sector 105')
df['sector'] = df['sector'].str.replace('uppals southend','sector 49')
df['sector'] = df['sector'].str.replace('sohna','sohna road')
df['sector'] = df['sector'].str.replace('ashok vihar phase 3 extension','sector 5')
df['sector'] = df['sector'].str.replace('south city 1','sector 41')
df['sector'] = df['sector'].str.replace('ashok vihar phase 2','sector 5')

In [37]:
import re
df['society'] = df['society'].apply(lambda name: re.sub(r'\d+(\.\d+)?\s?★', '', str(name)).strip()).str.lower()

In [39]:
df.drop(columns=['society1','address'],inplace=True)

In [43]:
df[df['additionalRoom'].isnull()]

Unnamed: 0,society,price_in_cr,price_per_sqft,sector,areaWithType,bedRoom,bathroom,balcony,additionalRoom,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,BHK
0,maa bhagwati residency,0.45,5000.0,Krishna Colony,Carpet area: 900 (83.61 sq.m.),2,2,1,,4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ...",2
1,apna enclave,0.5,7692.0,Ashok Vihar,Carpet area: 650 (60.39 sq.m.),2,2,1,,1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ...",2
2,tulsiani easy in homes,0.4,6722.0,Sohna,Carpet area: 595 (55.28 sq.m.),2,2,3,,12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ...",2
5,signature global infinity mall,0.41,6269.0,Sector 36,Built Up area: 654 (60.76 sq.m.),2,2,3,,3rd of 3 Floors,,undefined,,Best in class property available at sector 36 ...,,,,2
6,the cocoon,2,13333.0,Dwarka Expressway,Super Built up area 1500(139.35 sq.m.),3,3,3,,5th of 25 Floors,,0 to 1 Year Old,"['Shri Multispeciality Hospital', 'Esic Hospit...",Residential apartment for sell.The property co...,[],,,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3017,pareena mi casa,1.7,8500.0,Sector 68,Super Built up area 1999(185.71 sq.m.),4,4,3,,23rd of 36 Floors,,0 to 1 Year Old,"['Sector 55-56 Metro Station', 'Airia Mall', '...",The property is on the 23rd floor with an road...,"['1 Modular Kitchen', 'No AC', 'No Bed', 'No C...","['Security / Fire Alarm', 'Feng Shui / Vaastu ...","['Safety5 out of 5', 'Lifestyle4 out of 5', 'E...",4
3018,palam vihar society,0.4,8602.0,Palam Vihar,Super Built up area 465(43.2 sq.m.)Built Up ar...,1,1,1,,5th of 16 Floors,North,5 to 10 Year Old,"['Palam Vihar Vyapar kendra', 'Palam triangle'...","One of the best locations with malls, schools,...","['1 Water Purifier', '1 Fan', '1 Fridge', '1 E...","['Centrally Air Conditioned', 'Lift(s)', 'Park...","['Safety4 out of 5', 'Lifestyle5 out of 5', 'E...",1
3019,m3m woodshire,1.4,5929.0,Sector 107,Super Built up area 2361(219.34 sq.m.),3,4,0,,1st of 14 Floors,East,0 to 1 Year Old,,Looking for a good 3 bhk apartment in sector 1...,"['1 Light', 'No AC', 'No Bed', 'No Chimney', '...","['Power Back-up', 'Intercom Facility', 'Lift(s...","['Safety1 out of 5', 'Lifestyle2 out of 5', 'E...",3
3020,krishna appartment,0.35,3500.0,Dharam Colony,Super Built up area 1000(92.9 sq.m.),3,3,1,,2nd of 5 Floors,,10+ Year Old,"['Palam triangle', 'Palam Vihar Vyapar kendra'...",Residential apartment for sell.Located on 2nd ...,"['3 Wardrobe', '3 Fan', '3 Light', 'No AC', 'N...",,"['Safety4 out of 5', 'Lifestyle4 out of 5', 'E...",3


In [44]:
df['additionalRoom'].value_counts()

additionalRoom
Servant Room                                     629
Study Room                                       232
Others                                           179
Pooja Room                                       132
Study Room,Servant Room                           81
Store Room                                        76
Pooja Room,Servant Room                           60
Servant Room,Others                               52
Servant Room,Pooja Room                           30
Study Room,Others                                 27
Pooja Room,Study Room,Servant Room,Others         25
Pooja Room,Study Room,Servant Room                24
Servant Room,Store Room                           19
Pooja Room,Study Room                             13
Pooja Room,Study Room,Servant Room,Store Room     12
Study Room,Pooja Room                              8
Servant Room,Study Room                            8
Study Room,Servant Room,Store Room                 7
Pooja Room,Store Room          

In [47]:
df.isnull().sum()

society               0
price_in_cr           0
price_per_sqft        0
sector                0
areaWithType          0
bedRoom               0
bathroom              0
balcony               0
additionalRoom     1304
floorNum              2
facing              874
agePossession         1
nearbyLocations      91
description           0
furnishDetails      796
features            407
rating              328
BHK                   0
dtype: int64

In [34]:
df['areaWithType'].value_counts()

# carpet area is the area that is actually used for living  (internal wall thickness counted and external not counted)
# builtup area = carpet area + external walls + service shafts + balconies
# super built up area = everything included 

areaWithType
Super Built up area 1350(125.42 sq.m.)                                                                                    17
Super Built up area 1578(146.6 sq.m.)                                                                                     17
Super Built up area 1950(181.16 sq.m.)Carpet area: 1161 sq.ft. (107.86 sq.m.)                                             17
Super Built up area 1650(153.29 sq.m.)Carpet area: 1022.58 sq.ft. (95 sq.m.)                                              15
Super Built up area 2010(186.74 sq.m.)                                                                                    14
                                                                                                                          ..
Super Built up area 1976(183.58 sq.m.)Built Up area: 1600 sq.ft. (148.64 sq.m.)Carpet area: 1400 sq.ft. (130.06 sq.m.)     1
Carpet area: 4328 (402.08 sq.m.)                                                                                