In [239]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 

In [240]:
pd.set_option("display.max_rows", None) 
pd.set_option("display.max_columns", None)

In [241]:
# import the data 
flats_df = pd.read_csv("Datasets/flats.csv")
flats_df.head()

Unnamed: 0,property_name,link,society,price,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,property_id
0,2 BHK Flat in Krishna Colony,https://www.99acres.com/2-bhk-bedroom-apartmen...,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ...",C68850746
1,2 BHK Flat in Ashok Vihar,https://www.99acres.com/2-bhk-bedroom-apartmen...,Apna Enclave,50 Lac,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ...",H68850564
2,2 BHK Flat in Sohna,https://www.99acres.com/2-bhk-bedroom-apartmen...,Tulsiani Easy in Homes,40 Lac,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ...",J68850120
3,2 BHK Flat in Sector 61 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Smart World Orchard,1.47 Crore,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",,S68849476
4,2 BHK Flat in Sector 92 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Parkwood Westend,70 Lac,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ...",L47956793


### Basic EDA 

In [242]:
# check for number of rows and columns 
flats_df.shape

(3008, 20)

In [243]:
# get the overall information 
flats_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3008 entries, 0 to 3007
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   property_name    3008 non-null   object
 1   link             3008 non-null   object
 2   society          3007 non-null   object
 3   price            3007 non-null   object
 4   area             2996 non-null   object
 5   areaWithType     3008 non-null   object
 6   bedRoom          3008 non-null   object
 7   bathroom         3008 non-null   object
 8   balcony          3008 non-null   object
 9   additionalRoom   1694 non-null   object
 10  address          3002 non-null   object
 11  floorNum         3006 non-null   object
 12  facing           2127 non-null   object
 13  agePossession    3007 non-null   object
 14  nearbyLocations  2913 non-null   object
 15  description      3008 non-null   object
 16  furnishDetails   2203 non-null   object
 17  features         2594 non-null   

In [244]:
# check for null values 
flats_df.isna().sum()

property_name         0
link                  0
society               1
price                 1
area                 12
areaWithType          0
bedRoom               0
bathroom              0
balcony               0
additionalRoom     1314
address               6
floorNum              2
facing              881
agePossession         1
nearbyLocations      95
description           0
furnishDetails      805
features            414
rating              332
property_id           0
dtype: int64

In [245]:
# check for duplicate values(rows) 
flats_df.duplicated().sum()

0

In [246]:
# Drop the unnessary columns 
# dropping the link and property id column 
flats_df.drop(columns=["link", "property_id"], inplace=True)

In [247]:
# rename the columns 
flats_df.rename(columns={'area': "price_per_sqft"}, inplace=True)

### Data Cleaning 

In [248]:
flats_df.sample()

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
1264,3 BHK Flat in Sector 49 Gurgaon,Vatika City4.1 ★,1.72 Crore,"₹ 7,818/sq.ft.",Carpet area: 2200 (204.39 sq.m.),3 Bedrooms,3 Bathrooms,2 Balconies,,"Sector 49 Gurgaon, Gurgaon, Haryana",2nd of 17 Floors,North-East,10+ Year Old,"['BOB ATM', 'PNB ATM', 'HDFC bank ATM', 'YES b...",Vatika city is one of gurgaon's most sought af...,"['1 Bed', '4 Fan', '1 Exhaust Fan', '10 Light'...","['Security / Fire Alarm', 'Feng Shui / Vaastu ...","['Green Area5 out of 5', 'Construction4 out of..."


In [249]:
# cleaning the society column (too many categories)
# some of the society names are containing the ratings along with their names 
flats_df["society"].value_counts().shape

(636,)

In [250]:
import re 
flats_df["society"] = flats_df["society"].apply(lambda name: re.sub(r'\d+(\.\d+)?\s?★', '', str(name)).strip()).str.lower()

In [251]:
# number of categories are reduced 
flats_df["society"].value_counts().shape

(602,)

In [252]:
# cleaning the price column, align the price to same scale (crores)  
flats_df[flats_df["price"] == "Price on Request"]

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
25,5 BHK Flat in Sector 1 Gurgaon,the new people apartments,Price on Request,,Built Up area: 4500 (418.06 sq.m.),5 Bedrooms,1 Bathroom,No Balcony,,"Sector 1, Sector 1 Gurgaon, Gurgaon, Haryana",Ground of 1 Floors,,undefined,"['Pooja Clinic', 'Dr. Sahil Clinic', 'Prakash ...",Best in class property available at sector 1 l...,,,
1644,4 BHK Flat in Sector 50 Gurgaon,ss hibiscus,Price on Request,,Carpet area: 4800 (445.93 sq.m.),4 Bedrooms,4 Bathrooms,3 Balconies,Servant Room,"Sector 50 Gurgaon, Gurgaon, Haryana",7th of 18 Floors,East,5 to 10 Year Old,"['Baani Square', 'SS Plaza', 'Good Earth City ...",4bedroom 4800sq.Ft middle floor apartment avai...,"['4 Fan', '8 Light', 'No AC', 'No Bed', 'No Ch...","['Lift(s)', 'Water purifier', 'Swimming Pool',...","['Environment5 out of 5', 'Lifestyle5 out of 5..."
1972,1 BHK Flat in Sector 31 Gurgaon,raheja atlantis,Price on Request,,Carpet area: 650 (60.39 sq.m.),1 Bedroom,1 Bathroom,1 Balcony,"Pooja Room,Study Room,Servant Room","Yes, Sector 31 Gurgaon, Gurgaon, Haryana",3rd of 15 Floors,East,0 to 1 Year Old,"['Huda Metro Station', 'Reliance Fresh, Indira...",It is my property and good condition and good ...,,"['Feng Shui / Vaastu Compliant', 'Security / F...","['Green Area4 out of 5', 'Construction4 out of..."
2054,2 BHK Flat in Sector 79 Gurgaon,supertech araville,Price on Request,,Built Up area: 1295 (120.31 sq.m.),2 Bedrooms,2 Bathrooms,No Balcony,,"Sector 79, Sector 79 Gurgaon, Gurgaon, Haryana",15th of 15 Floors,,undefined,,"2 bath, unfurnished, 15th floor (Of 18), super...",,,"['Environment4 out of 5', 'Lifestyle3 out of 5..."
2357,2 BHK Flat in Sector 37D Gurgaon,the millennia 2,Price on Request,,Carpet area: 552 (51.28 sq.m.),2 Bedrooms,1 Bathroom,1 Balcony,,"Sector 37D Gurgaon, Gurgaon, Haryana",13rd of 13 Floors,,undefined,"['Shri Balaji Hospital and Trauma Center', 'Sh...","1 bath, unfurnished, 13th floor (Of 13), at se...",,,"['Environment4 out of 5', 'Lifestyle4 out of 5..."
2522,2 BHK Flat in Sector 95A Gurgaon,the roselia 2,Price on Request,,Carpet area: 593 (55.09 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,,"Sector 95a, Pataudi Road, Sector 95A Gurgaon, ...",14th of 14 Floors,,undefined,,Best in class property available at sector 95a...,,,"['Environment5 out of 5', 'Lifestyle4.5 out of..."
2549,3 BHK Flat in Sector-2 Sohna,supertech hilltown,Price on Request,,Built Up area: 1720 (159.79 sq.m.),3 Bedrooms,3 Bathrooms,3 Balconies,,"Sohna Sector 2,gurgaon, Sector-2 Sohna, Gurgao...",4th of 4 Floors,North-East,undefined,,"3 bath, unfurnished, 4th floor (Of 4), overloo...",,,
2640,2 BHK Flat in Sector 95A Gurgaon,the roselia 2,Price on Request,,Carpet area: 593 (55.09 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,,"Sector 95a, Pataudi Road, Sector 95A Gurgaon, ...",14th of 14 Floors,,undefined,,Best in class property available at sector 95a...,,,"['Environment5 out of 5', 'Safety5 out of 5', ..."
2950,3 BHK Flat in Sector 104 Gurgaon,godrej summit,Price on Request,,Super Built up area 1647(153.01 sq.m.)Carpet a...,3 Bedrooms,3 Bathrooms,3 Balconies,,"1402, Sector 104 Gurgaon, Gurgaon, Haryana",14th of 17 Floors,,1 to 5 Year Old,"['MG Road Metro Station', 'The Esplanade Mall'...",We are the proud owners of this 3 bhk apartmen...,[],"['Intercom Facility', 'Lift(s)', 'Maintenance ...","['Management3 out of 5', 'Green Area4.5 out of..."
2963,4 BHK Flat in Gwal Pahari,krrish provence estate,Price on Request,,Built Up area: 5800 (538.84 sq.m.),4 Bedrooms,5 Bathrooms,1 Balcony,,"Gwal Pahari, Gurgaon, Haryana",17th of 24 Floors,,Under Construction,"['Tniy Toons School', 'Pathways School Gurgoan...",Residential apartment for sell.Located on 17th...,,,


In [253]:
flats_df[flats_df["price"] == "Price on Request"]

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
25,5 BHK Flat in Sector 1 Gurgaon,the new people apartments,Price on Request,,Built Up area: 4500 (418.06 sq.m.),5 Bedrooms,1 Bathroom,No Balcony,,"Sector 1, Sector 1 Gurgaon, Gurgaon, Haryana",Ground of 1 Floors,,undefined,"['Pooja Clinic', 'Dr. Sahil Clinic', 'Prakash ...",Best in class property available at sector 1 l...,,,
1644,4 BHK Flat in Sector 50 Gurgaon,ss hibiscus,Price on Request,,Carpet area: 4800 (445.93 sq.m.),4 Bedrooms,4 Bathrooms,3 Balconies,Servant Room,"Sector 50 Gurgaon, Gurgaon, Haryana",7th of 18 Floors,East,5 to 10 Year Old,"['Baani Square', 'SS Plaza', 'Good Earth City ...",4bedroom 4800sq.Ft middle floor apartment avai...,"['4 Fan', '8 Light', 'No AC', 'No Bed', 'No Ch...","['Lift(s)', 'Water purifier', 'Swimming Pool',...","['Environment5 out of 5', 'Lifestyle5 out of 5..."
1972,1 BHK Flat in Sector 31 Gurgaon,raheja atlantis,Price on Request,,Carpet area: 650 (60.39 sq.m.),1 Bedroom,1 Bathroom,1 Balcony,"Pooja Room,Study Room,Servant Room","Yes, Sector 31 Gurgaon, Gurgaon, Haryana",3rd of 15 Floors,East,0 to 1 Year Old,"['Huda Metro Station', 'Reliance Fresh, Indira...",It is my property and good condition and good ...,,"['Feng Shui / Vaastu Compliant', 'Security / F...","['Green Area4 out of 5', 'Construction4 out of..."
2054,2 BHK Flat in Sector 79 Gurgaon,supertech araville,Price on Request,,Built Up area: 1295 (120.31 sq.m.),2 Bedrooms,2 Bathrooms,No Balcony,,"Sector 79, Sector 79 Gurgaon, Gurgaon, Haryana",15th of 15 Floors,,undefined,,"2 bath, unfurnished, 15th floor (Of 18), super...",,,"['Environment4 out of 5', 'Lifestyle3 out of 5..."
2357,2 BHK Flat in Sector 37D Gurgaon,the millennia 2,Price on Request,,Carpet area: 552 (51.28 sq.m.),2 Bedrooms,1 Bathroom,1 Balcony,,"Sector 37D Gurgaon, Gurgaon, Haryana",13rd of 13 Floors,,undefined,"['Shri Balaji Hospital and Trauma Center', 'Sh...","1 bath, unfurnished, 13th floor (Of 13), at se...",,,"['Environment4 out of 5', 'Lifestyle4 out of 5..."
2522,2 BHK Flat in Sector 95A Gurgaon,the roselia 2,Price on Request,,Carpet area: 593 (55.09 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,,"Sector 95a, Pataudi Road, Sector 95A Gurgaon, ...",14th of 14 Floors,,undefined,,Best in class property available at sector 95a...,,,"['Environment5 out of 5', 'Lifestyle4.5 out of..."
2549,3 BHK Flat in Sector-2 Sohna,supertech hilltown,Price on Request,,Built Up area: 1720 (159.79 sq.m.),3 Bedrooms,3 Bathrooms,3 Balconies,,"Sohna Sector 2,gurgaon, Sector-2 Sohna, Gurgao...",4th of 4 Floors,North-East,undefined,,"3 bath, unfurnished, 4th floor (Of 4), overloo...",,,
2640,2 BHK Flat in Sector 95A Gurgaon,the roselia 2,Price on Request,,Carpet area: 593 (55.09 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,,"Sector 95a, Pataudi Road, Sector 95A Gurgaon, ...",14th of 14 Floors,,undefined,,Best in class property available at sector 95a...,,,"['Environment5 out of 5', 'Safety5 out of 5', ..."
2950,3 BHK Flat in Sector 104 Gurgaon,godrej summit,Price on Request,,Super Built up area 1647(153.01 sq.m.)Carpet a...,3 Bedrooms,3 Bathrooms,3 Balconies,,"1402, Sector 104 Gurgaon, Gurgaon, Haryana",14th of 17 Floors,,1 to 5 Year Old,"['MG Road Metro Station', 'The Esplanade Mall'...",We are the proud owners of this 3 bhk apartmen...,[],"['Intercom Facility', 'Lift(s)', 'Maintenance ...","['Management3 out of 5', 'Green Area4.5 out of..."
2963,4 BHK Flat in Gwal Pahari,krrish provence estate,Price on Request,,Built Up area: 5800 (538.84 sq.m.),4 Bedrooms,5 Bathrooms,1 Balcony,,"Gwal Pahari, Gurgaon, Haryana",17th of 24 Floors,,Under Construction,"['Tniy Toons School', 'Pathways School Gurgoan...",Residential apartment for sell.Located on 17th...,,,


In [254]:
# drop the above rows, as there is no price mention so no need to take this rows in consideration 
flats_df = flats_df[flats_df["price"] != "Price on Request"]

In [255]:
# scale down the price to same scale (here scale down the price to crore scale) 
def scale_price(p):
    if type(p) == float:
        return p
    else:
        if p[1] == "Lac":
            return round(float(p[0])/100, 2) 
        else:
            return round(float(p[0]), 2)

In [256]:
flats_df["price"] = flats_df["price"].str.split(' ').apply(scale_price)

In [292]:
flats_df["price"]

0        0.45
1        0.50
2        0.40
3        1.47
4        0.70
5        0.41
6        2.00
7        1.80
8        1.10
9        4.75
10       0.96
11       0.29
12       1.35
13       0.95
14       0.70
15       3.95
16       0.90
17       1.05
18        NaN
19       2.20
20       1.01
21       1.85
22       0.86
23       1.85
24       2.85
26       0.42
27       6.15
28       6.25
29       0.42
30       1.35
31       1.60
32       3.25
33       0.45
34       0.85
35       0.75
36       0.82
37       1.80
38       0.30
39       0.78
40       2.00
41       0.74
42       3.20
43       1.30
44       0.25
45       0.95
46       1.99
47       1.05
48       1.83
49       2.25
50       2.80
51       1.60
52       0.83
53       0.83
54       0.80
55       1.25
56       0.23
57       0.30
58       1.55
59       0.79
60       0.50
61       1.01
62       0.95
63       0.99
64       1.10
65       1.90
66       1.00
67       2.50
68       1.10
69       0.29
70       0.55
71       0.65
72    

In [257]:
# cleaning price_per_sqft 
flats_df["price_per_sqft"] = flats_df["price_per_sqft"].str.split('/').str.get(0).str.replace('₹', '').str.replace(',', '').str.strip().astype("float")

In [258]:
# cleaning the bedroom column 
flats_df["bedRoom"].value_counts()

3 Bedrooms    1437
2 Bedrooms     944
4 Bedrooms     478
1 Bedroom      104
5 Bedrooms      31
6 Bedrooms       3
Name: bedRoom, dtype: int64

In [259]:
flats_df["bedRoom"] = flats_df["bedRoom"].str.split(' ').str.get(0).str.strip().astype("int")

In [260]:
# cleaning the bathroom column 
flats_df["bathroom"] = flats_df["bathroom"].str.split(' ').str.get(0).str.strip().astype("int")

In [261]:
# cleaning the balcony column 
flats_df["balcony"] = flats_df["balcony"].str.split(' ').str.get(0).str.replace('+', '').str.replace("No", "0")

  flats_df["balcony"] = flats_df["balcony"].str.split(' ').str.get(0).str.replace('+', '').str.replace("No", "0")


In [262]:
flats_df["balcony"].value_counts()

3    1836
2     749
1     315
0      97
Name: balcony, dtype: int64

In [263]:
# cleaning the additionalroom column (this column needs feature engineering)
flats_df["additionalRoom"].value_counts()

Servant Room                                     629
Study Room                                       232
Others                                           179
Pooja Room                                       132
Study Room,Servant Room                           81
Store Room                                        76
Pooja Room,Servant Room                           60
Servant Room,Others                               52
Servant Room,Pooja Room                           30
Study Room,Others                                 27
Pooja Room,Study Room,Servant Room,Others         25
Pooja Room,Study Room,Servant Room                24
Servant Room,Store Room                           19
Pooja Room,Study Room                             13
Pooja Room,Study Room,Servant Room,Store Room     12
Study Room,Pooja Room                              8
Servant Room,Study Room                            8
Study Room,Servant Room,Store Room                 7
Pooja Room,Store Room                         

In [264]:
flats_df["additionalRoom"].isna().sum() # flats with no additional room 

1305

In [265]:
# fill the NaN with not available in additionalRoom column 
flats_df["additionalRoom"].fillna("not available", inplace=True)

In [266]:
# converting all the categories to lower case (as this might reduce the number of categories)
flats_df["additionalRoom"] = flats_df["additionalRoom"].str.lower()

In [267]:
# this column needs feature engineering 
flats_df["address"]

0                        Krishna Colony, Gurgaon, Haryana
1                      46b, Ashok Vihar, Gurgaon, Haryana
2                                 Sohna, Gurgaon, Haryana
3                     Sector 61 Gurgaon, Gurgaon, Haryana
4                     Sector 92 Gurgaon, Gurgaon, Haryana
5       Sohna Sector 36, Sector 36 Gurgaon, Gurgaon, H...
6             Dwarka Expressway Gurgaon, Gurgaon, Haryana
7                    Sector 104 Gurgaon, Gurgaon, Haryana
8              H 23, Sector 88B Gurgaon, Gurgaon, Haryana
9                     Sector 78 Gurgaon, Gurgaon, Haryana
10                    Sector 81 Gurgaon, Gurgaon, Haryana
11                                Sohna, Gurgaon, Haryana
12                    Sector 79 Gurgaon, Gurgaon, Haryana
13      Flat No. :- 301, Sector 33 Gurgaon, Gurgaon, H...
14                    Sector 90 Gurgaon, Gurgaon, Haryana
15       Sector 108, Sector 108 Gurgaon, Gurgaon, Haryana
16      Senate Court, Sector 62, Gurgaon, Sector 62 Gu...
17            

In [268]:
# cleaning the floorNum column 
flats_df["floorNum"].value_counts()

2nd   of 4 Floors           74
3rd   of 4 Floors           71
4th   of 4 Floors           62
1st   of 4 Floors           61
12nd   of 14 Floors         49
14th   of 14 Floors         48
Ground of 14 Floors         40
10th   of 14 Floors         35
7th   of 14 Floors          35
8th   of 14 Floors          34
4th   of 14 Floors          28
6th   of 14 Floors          27
2nd   of 2 Floors           26
1st   of 14 Floors          26
3rd   of 3 Floors           26
3rd   of 14 Floors          24
5th   of 14 Floors          24
8th   of 19 Floors          24
11st   of 14 Floors         23
1st   of 1 Floors           23
9th   of 14 Floors          23
9th   of 9 Floors           23
5th   of 12 Floors          22
2nd   of 3 Floors           22
2nd   of 14 Floors          21
8th   of 18 Floors          20
10th   of 19 Floors         18
6th   of 18 Floors          18
10th   of 18 Floors         17
9th   of 13 Floors          17
7th   of 15 Floors          17
12nd   of 12 Floors         17
Ground o

In [275]:
flats_df["floorNum"] = flats_df["floorNum"].str.split(' ').str.get(0).replace("Ground", "0").str.replace("Basement", "-1").str.replace("Lower", "0").str.extract(r'(\d+)').astype("float")

In [276]:
# cleaning the facing column 
flats_df["facing"].value_counts()

North-East    505
East          490
North         301
South         203
West          183
North-West    162
South-East    144
South-West    135
Name: facing, dtype: int64

In [277]:
flats_df["facing"].isna().sum()

874

In [278]:
# filling the missing value with "NA"
flats_df["facing"].fillna("NA", inplace=True)

In [279]:
flats_df["facing"].value_counts()

NA            874
North-East    505
East          490
North         301
South         203
West          183
North-West    162
South-East    144
South-West    135
Name: facing, dtype: int64

In [280]:
flats_df.head()

Unnamed: 0,property_name,society,price,price_per_sqft,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
0,2 BHK Flat in Krishna Colony,maa bhagwati residency,0.45,5000.0,Carpet area: 900 (83.61 sq.m.),2,2,1,not available,"Krishna Colony, Gurgaon, Haryana",4.0,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ..."
1,2 BHK Flat in Ashok Vihar,apna enclave,0.5,7692.0,Carpet area: 650 (60.39 sq.m.),2,2,1,not available,"46b, Ashok Vihar, Gurgaon, Haryana",1.0,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ..."
2,2 BHK Flat in Sohna,tulsiani easy in homes,0.4,6722.0,Carpet area: 595 (55.28 sq.m.),2,2,3,not available,"Sohna, Gurgaon, Haryana",12.0,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ..."
3,2 BHK Flat in Sector 61 Gurgaon,smart world orchard,1.47,12250.0,Carpet area: 1200 (111.48 sq.m.),2,2,2,study room,"Sector 61 Gurgaon, Gurgaon, Haryana",2.0,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",
4,2 BHK Flat in Sector 92 Gurgaon,parkwood westend,0.7,5204.0,Super Built up area 1345(124.95 sq.m.),2,2,3,study room,"Sector 92 Gurgaon, Gurgaon, Haryana",5.0,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ..."


In [281]:
# calculating the area from price and price_per_sqft columns (creating new column area) 
flats_df.insert(loc=4, column="area", value=round((flats_df["price"]*10000000)/flats_df["price_per_sqft"]))

In [282]:
flats_df["area"]

0          900.0
1          650.0
2          595.0
3         1200.0
4         1345.0
5          654.0
6         1500.0
7         2290.0
8         1350.0
9         2813.0
10         983.0
11         519.0
12        1945.0
13        1385.0
14        1300.0
15        2343.0
16         639.0
17         915.0
18           NaN
19        2100.0
20        1103.0
21        2150.0
22        1103.0
23        2025.0
24        2217.0
26         531.0
27        3355.0
28        2572.0
29         750.0
30        1056.0
31         594.0
32        3200.0
33         626.0
34        1300.0
35        1300.0
36        1180.0
37        2030.0
38         555.0
39        1365.0
40        1320.0
41        1194.0
42        2000.0
43        2038.0
44         700.0
45        1211.0
46        2727.0
47        1385.0
48        1829.0
49        2100.0
50        1650.0
51        1100.0
52        1530.0
53        1120.0
54        1250.0
55        2651.0
56         460.0
57         450.0
58        1570.0
59        1198

In [285]:
# adding the extra column which tells whether the property is flat or house 
flats_df.insert(loc=1, column="property_type", value="flat")

In [287]:
# export the cleaned data 
# flats_df.to_csv("flata_cleaned_phase1", index=False)
flats_df.to_csv("flats_cleaned_phase1.csv", index=False)