In [7]:
# importing the libraries
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import pathlib
import os


In [15]:
# current working directory
working_directory = pathlib.Path(os.getcwd())
working_parent_directory = working_directory.parent

# dataset path 
flat_dataset_location = os.path.join(working_parent_directory, 'dataset/flats.csv')


In [16]:
# loading the dataset into pandas dataframe
df = pd.read_csv(flat_dataset_location)

# shape of the dataset
print(f"Shape of the dataset: {df.shape}")


Shape of the dataset: (3017, 21)


In [17]:
# removing the view restriction from pandas dataframe
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


In [18]:
# viewing first 5 samples from the dataset
df.head(5)

Unnamed: 0.1,Unnamed: 0,property_name,link,society,price,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating,property_id
0,0,2 BHK Flat in Krishna Colony,https://www.99acres.com/2-bhk-bedroom-apartmen...,maa bhagwati residency,45 Lac,"₹ 5,000/sq.ft.",Carpet area: 900 (83.61 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Krishna Colony, Gurgaon, Haryana",4th of 4 Floors,West,1 to 5 Year Old,"['Chintapurni Mandir', 'State bank ATM', 'Pear...",So with lift.Maa bhagwati residency is one of ...,"['3 Fan', '4 Light', '1 Wardrobe', 'No AC', 'N...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Environment4 out of 5', 'Safety4 out of 5', ...",C68850746
1,1,2 BHK Flat in Ashok Vihar,https://www.99acres.com/2-bhk-bedroom-apartmen...,Apna Enclave,50 Lac,"₹ 7,692/sq.ft.",Carpet area: 650 (60.39 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"46b, Ashok Vihar, Gurgaon, Haryana",1st of 3 Floors,West,10+ Year Old,"['Chintapurni Mandir', 'Sheetla Mata Mandir', ...","Property situated on main road, railway statio...","['3 Wardrobe', '4 Fan', '1 Exhaust Fan', '1 Ge...","['Security / Fire Alarm', 'Maintenance Staff',...","['Environment4 out of 5', 'Safety4 out of 5', ...",H68850564
2,2,2 BHK Flat in Sohna,https://www.99acres.com/2-bhk-bedroom-apartmen...,Tulsiani Easy in Homes,40 Lac,"₹ 6,722/sq.ft.",Carpet area: 595 (55.28 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,,"Sohna, Gurgaon, Haryana",12nd of 14 Floors,,0 to 1 Year Old,"['Huda City Metro', 'Golf Course extn road', '...","This property is 15 km away from badshapur, gu...",,"['Power Back-up', 'Feng Shui / Vaastu Complian...","['Environment4 out of 5', 'Safety4 out of 5', ...",J68850120
3,3,2 BHK Flat in Sector 61 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Smart World Orchard,1.47 Crore,"₹ 12,250/sq.ft.",Carpet area: 1200 (111.48 sq.m.),2 Bedrooms,2 Bathrooms,2 Balconies,Study Room,"Sector 61 Gurgaon, Gurgaon, Haryana",2nd of 4 Floors,,Dec 2023,"['Sector 55-56 Metro station', 'Bestech Centra...",Near to metro station of sector 56 and opposit...,,"['Security / Fire Alarm', 'Private Garden / Te...",,S68849476
4,4,2 BHK Flat in Sector 92 Gurgaon,https://www.99acres.com/2-bhk-bedroom-apartmen...,Parkwood Westend,70 Lac,"₹ 5,204/sq.ft.",Super Built up area 1345(124.95 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Study Room,"Sector 92 Gurgaon, Gurgaon, Haryana",5th of 8 Floors,,Under Construction,"['Yadav Clinic', 'Bangali Clinic', 'Dr. J. S. ...",We are the proud owners of this 2 bhk alongwit...,[],,"['Environment5 out of 5', 'Safety3 out of 5', ...",L47956793


In [19]:
# checking datatypes in the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3017 entries, 0 to 3016
Data columns (total 21 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Unnamed: 0       3017 non-null   int64 
 1   property_name    3017 non-null   object
 2   link             3017 non-null   object
 3   society          3016 non-null   object
 4   price            3007 non-null   object
 5   area             3004 non-null   object
 6   areaWithType     3008 non-null   object
 7   bedRoom          3008 non-null   object
 8   bathroom         3008 non-null   object
 9   balcony          3008 non-null   object
 10  additionalRoom   1694 non-null   object
 11  address          3002 non-null   object
 12  floorNum         3006 non-null   object
 13  facing           2127 non-null   object
 14  agePossession    3007 non-null   object
 15  nearbyLocations  2913 non-null   object
 16  description      3008 non-null   object
 17  furnishDetails   2203 non-null   

In [21]:
# checking for duplicate rows
df.duplicated().sum()

0

In [26]:
# missing values in the dataset
df.isnull().sum().sort_values(ascending=False)

additionalRoom     1323
facing              890
furnishDetails      814
features            423
rating              341
nearbyLocations     104
address              15
area                 13
floorNum             11
price                10
agePossession        10
description           9
property_id           9
balcony               9
bathroom              9
bedRoom               9
areaWithType          9
society               1
property_name         0
link                  0
Unnamed: 0            0
dtype: int64

In [27]:
# dropping the columns which will not help in data analysis and machine learning
df.drop(columns=['link', 'property_id', 'Unnamed: 0'], inplace=True)

In [28]:
# checking few samples after dropping the columns
df.sample(5)

Unnamed: 0,property_name,society,price,area,areaWithType,bedRoom,bathroom,balcony,additionalRoom,address,floorNum,facing,agePossession,nearbyLocations,description,furnishDetails,features,rating
2589,2 BHK Flat in Sector 10A Gurgaon,Maxworth City Residences,90 Lac,"₹ 7,083/sq.ft.",Super Built up area 1200(111.48 sq.m.),2 Bedrooms,2 Bathrooms,3 Balconies,Others,"Maxworth City Residences, Sector 10A Gurgaon, ...",9th of 11 Floors,East,0 to 1 Year Old,"['Rajiv Chowk Mosque', 'Yadav Hospital Gurgoan...",2 bhk apartment in very good locality. Just 2....,[],"['Security / Fire Alarm', 'Intercom Facility',...","['Environment4 out of 5', 'Safety5 out of 5', ..."
790,2 BHK Flat in Sector 99A Gurgaon,Prime Habitat,27.1 Lac,"₹ 6,049/sq.ft.",Carpet area: 448 (41.62 sq.m.),2 Bedrooms,2 Bathrooms,1 Balcony,,"Sector 99A Gurgaon, Gurgaon, Haryana",10th of 12 Floors,South,1 to 5 Year Old,"['The Hive Mall', 'Omaxe Gurgaon Mall', 'Gurug...",This lovely 2 bhk apartment/flat in sector 99a...,,"['Security / Fire Alarm', 'Lift(s)', 'Maintena...","['Environment4 out of 5', 'Lifestyle3 out of 5..."
828,3 BHK Flat in Sector 69 Gurgaon,Tulip Violet4.2 ★,1.43 Crore,"₹ 8,886/sq.ft.",Super Built up area 1608(149.39 sq.m.),3 Bedrooms,3 Bathrooms,2 Balconies,Pooja Room,"Sector 69 Gurgaon, Gurgaon, Haryana",1st of 14 Floors,North-West,1 to 5 Year Old,"['Airia Mall Sector 68', 'Southern Peripheral ...",This beautiful 3 bhk flat in sector 69 gurgaon...,"['1 Modular Kitchen', 'No AC', 'No Bed', 'No C...","['Power Back-up', 'Feng Shui / Vaastu Complian...","['Green Area5 out of 5', 'Construction4 out of..."
1388,2 BHK Flat in Sector 83 Gurgaon,Vatika Gurgaon 213.7 ★,83 Lac,"₹ 6,666/sq.ft.",Super Built up area 1245(115.66 sq.m.)Built Up...,2 Bedrooms,2 Bathrooms,2 Balconies,Pooja Room,"Tower F1, Sector 83 Gurgaon, Gurgaon, Haryana",12nd of 18 Floors,North,1 to 5 Year Old,"['Huda Metro Station (Gurugram)', 'Sapphire 83...",Vatika gurgaon 21 is one of the most popular d...,,"['Water purifier', 'Security / Fire Alarm', 'P...","['Green Area5 out of 5', 'Construction4.5 out ..."
693,3 BHK Flat in Sector 109 Gurgaon,ATS Kocoon,1.75 Crore,"₹ 10,028/sq.ft.",Super Built up area 1745(162.12 sq.m.)Built Up...,3 Bedrooms,3 Bathrooms,3 Balconies,Pooja Room,"Middle, Sector 109 Gurgaon, Gurgaon, Haryana",10th of 25 Floors,North-East,1 to 5 Year Old,"['Jagdish Super Market', 'Dwarka', 'Euro Inter...",This beautiful 3 bhk flat in sector 109 gurgao...,"['1 Water Purifier', '5 Fan', '1 Exhaust Fan',...","['Feng Shui / Vaastu Compliant', 'Security / F...","['Green Area5 out of 5', 'Construction5 out of..."
