In [36]:
import numpy as np
import pandas as pd
import re
from dotenv import load_dotenv
import os

# Read data

In [2]:
# Read data from csv
df = pd.read_csv("data/rental_prices_singapore.csv")

In [3]:
# Show dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5360 entries, 0 to 5359
Data columns (total 9 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   name                           5360 non-null   object 
 1   address                        5011 non-null   object 
 2   price                          5360 non-null   object 
 3   size                           5360 non-null   object 
 4   bedrooms                       5360 non-null   object 
 5   bathrooms                      4941 non-null   float64
 6   property_type_furnishing_year  5360 non-null   object 
 7   mrt_distance                   4641 non-null   object 
 8   agent_description              5360 non-null   object 
dtypes: float64(1), object(8)
memory usage: 377.0+ KB


In [4]:
# Show top and bottom five rows
df

Unnamed: 0,name,address,price,size,bedrooms,bathrooms,property_type_furnishing_year,mrt_distance,agent_description
0,"Brand new Attic Studio, in a Peranakan Conserv...",Lorong 34 Geylang,3000,400 sqft,1,1.0,\nApartment\nFully Furnished\n,,One and only attic studio! Beautifully done up...
1,Astor,51C Lengkong Empat,2000,1130 sqft,Room,,\nApartment\nFully Furnished\n,11 mins (810 m) to DT28 Kaki Bukit MRT,Comes with In House Maid
2,Springhill Terrace,Sunrise avenue,7400,3800 sqft,5,4.0,\nApartment\nFully Furnished\n,,"Close to MRT and short drive to French, Austra..."
3,704 Yishun Avenue 5,704 Yishun Avenue 5,1000,120 sqft,Room,,\nApartment\nFully Furnished\n,9 mins (700 m) to NS13 Yishun MRT,Room for 1 or 2 single ladies
4,Espada,48 Saint Thomas Walk,4300,689 sqft,1,1.0,\nApartment\nFully Furnished\n,6 mins (420 m) to NS23 Somerset MRT,All units virtual online viewing available! An...
...,...,...,...,...,...,...,...,...,...
5355,Avenue South Residence,13 Silat Avenue,7500,947 sqft,3,2.0,\nBungalow House\nPartially Furnished\n,,Newly TOP! Be the first to stay near town!
5356,453B Fernvale Road,453B Fernvale Road,999,150 sqft,Room,,\nBungalow House\nPartially Furnished\n,4 mins (290 m) to SW5 Fernvale LRT,Close to fernvale LRT
5357,Riviere,1 Jiak Kim Street,8000,840 sqft,2,2.0,\nBungalow House\nPartially Furnished\n,5 mins (370 m) to TE16 Havelock MRT,Brand new pool facing unit!
5358,Bukit Villas,1 Rasok Drive,7100,"3025 sqft (floor), 2256 sqft (land)",5,4.0,\nBungalow House\nPartially Furnished\n,,"Tranquil surroundings, facilities included! Po..."


# Remove duplicates

In [5]:
# Diagnose duplicates
df.duplicated().value_counts()

False    5082
True      278
dtype: int64

In [6]:
# Remove duplicates
df = df.drop_duplicates().copy()

In [7]:
# Show dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5082 entries, 0 to 5359
Data columns (total 9 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   name                           5082 non-null   object 
 1   address                        4754 non-null   object 
 2   price                          5082 non-null   object 
 3   size                           5082 non-null   object 
 4   bedrooms                       5082 non-null   object 
 5   bathrooms                      4680 non-null   float64
 6   property_type_furnishing_year  5082 non-null   object 
 7   mrt_distance                   4418 non-null   object 
 8   agent_description              5082 non-null   object 
dtypes: float64(1), object(8)
memory usage: 397.0+ KB


# Extract features

## Property type

In [8]:
# Create function to extract property type 
def extract_type(string):
    if "Condominium" in string:
        return "Condominium"
    elif "Apartment" in string:
        return "Apartment"
    elif "HDB Flat" in string:
        return "HDB Flat"
    elif "Semi-Detached House" in string:
        return "Semi-Detached House"
    elif "Good Class Bungalow" in string:
        return "Good Class Bungalow"
    elif "Corner Terrace" in string:
        return "Corner Terrace"
    elif "Detached House" in string:
        return "Detached House"
    elif "Executive Condominium" in string:
        return "Executive Condominium"
    elif "Terraced House" in string:
        return "Terraced House"
    elif "Bungalow House" in string:
        return "Bungalow House"
    elif "Cluster House" in string:
        return "Cluster House"
    else:
        return np.nan

In [9]:
# Apply function to create property type column
df["property_type"] = df["property_type_furnishing_year"].apply(extract_type)

In [10]:
# Frequencies of property types
df["property_type"].value_counts()

Condominium            2770
Apartment               905
HDB Flat                763
Semi-Detached House     188
Good Class Bungalow     174
Corner Terrace          103
Detached House           79
Terraced House           40
Bungalow House           40
Cluster House            20
Name: property_type, dtype: int64

## Furnishing

In [11]:
# Create function to extract information about furnishing  
def extract_furnishing(string):
    if "Fully Furnished" in string:
        return "Fully Furnished"
    elif "Partially Furnished" in string:
        return "Partially Furnished"
    elif "Unfurnished" in string:
        return "Unfurnished"
    else:
        return np.nan

In [12]:
# Apply function to create furnishing column
df["furnishing"] = df["property_type_furnishing_year"].apply(extract_furnishing)

In [13]:
# Frequencies of furnishing
df["furnishing"].value_counts()

Partially Furnished    3058
Fully Furnished        1600
Unfurnished              84
Name: furnishing, dtype: int64

## Built year

In [14]:
# Create function to extract built year 
def extract_year(string):
    year = re.search(r"\b\d{4}\b", string)
    if year:
        return year.group()
    else:
        return np.nan

In [15]:
# Apply function to create built year column
df["year"] = df["property_type_furnishing_year"].apply(extract_year).astype("Int32")

In [16]:
# Frequencies of built year
df["year"].value_counts()

2023    391
2024    314
2013    297
2017    293
2010    248
2011    239
2014    178
2007    167
2016    160
2009    145
2022    142
2015    140
2021    140
2012    112
2008     99
1997     87
2019     80
2005     80
1995     80
1999     80
2000     63
2001     62
1984     60
1993     60
1985     60
2004     60
1994     60
1986     43
1996     40
1998     40
1987     40
2018     40
1981     40
2003     40
1975     38
1992     20
1971     20
1991     20
2006     20
1983     20
1974     20
1979     20
2002     20
1977     20
Name: year, dtype: Int64

In [17]:
# Delete "property_type_furnishing_year" column
df.drop("property_type_furnishing_year", axis=1, inplace=True)

## Meters to mrt

In [18]:
# Extract MRT distance in meters
df["meters_to_mrt"] = df["mrt_distance"].str.split(r"m\)").str[0].str.split(r"\(").str[1].astype("Int32")

In [19]:
# Delete "mrt_distance" column
df.drop("mrt_distance", axis=1, inplace=True)

## Renovated

In [20]:
# Create function to extract information about renovation  
def extract_renovated(string):
    if "renovated" in string.lower() or "renovation" in string.lower():
        return True
    else:
        return False

In [21]:
# Apply function to create renovated column
df["renovated"] = df["agent_description"].apply(extract_renovated)

## Pool

In [22]:
# Create function to extract information about pool  
def extract_pool(string):
    if "pool" in string.lower():
        return True
    else:
        return False

In [23]:
# Apply function to create pool column
df["pool"] = df["agent_description"].apply(extract_pool)

## Sea view

In [24]:
# Create function to extract information about sea view  
def extract_sea_view(string):
    if "seaview" in string.lower() or "sea view" in string.lower():
        return True
    else:
        return False

In [25]:
# Apply function to create sea view column
df["sea_view"] = df["agent_description"].apply(extract_sea_view)

## Penthouse

In [26]:
# Create function to extract penthouse information 
def extract_penthouse(string):
    if "penthouse" in string.lower():
        return True
    else:
        return False

In [27]:
# Apply function to create penthouse column
df["penthouse"] = df["agent_description"].apply(extract_penthouse)

In [28]:
# Delete "agent_description" column
# df.drop("agent_description", axis=1, inplace=True)

## Latitude and longitude

In [38]:
# Load environment variables from .env file
load_dotenv()

True

In [39]:
# Get google maps api key from .env 
google_maps_api_key = os.getenv("google_maps_api_key")

# Convert data types

In [29]:
# Convert price from str to int
df["price"] = df["price"].str.replace(",", "").astype("Int32")

In [30]:
# Convert size (sqft) from str to int
df["size"] = df["size"].str.split("sqft").str[0].astype("Int32")

In [31]:
# Convert bathrooms from str to int
df["bathrooms"] = df["bathrooms"].astype("Int32")

# Check data

In [32]:
# Show dataframe info
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5082 entries, 0 to 5359
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   name               5082 non-null   object
 1   address            4754 non-null   object
 2   price              5082 non-null   Int32 
 3   size               5082 non-null   Int32 
 4   bedrooms           5082 non-null   object
 5   bathrooms          4680 non-null   Int32 
 6   agent_description  5082 non-null   object
 7   property_type      5082 non-null   object
 8   furnishing         4742 non-null   object
 9   year               4398 non-null   Int32 
 10  meters_to_mrt      4418 non-null   Int32 
 11  renovated          5082 non-null   bool  
 12  pool               5082 non-null   bool  
 13  sea_view           5082 non-null   bool  
 14  penthouse          5082 non-null   bool  
dtypes: Int32(5), bool(4), object(6)
memory usage: 421.8+ KB


In [33]:
# Explore properties with missing address
filtered_names = df.loc[pd.isna(df["address"]), "agent_description"]

for name in filtered_names:
    print(name)

Lovely tropical resort good class bungalow holland village mrt station
Modern Bungalow in Braddell Heights Estate for Rent
High floor. Bright and Windy. Available end Sept.
High floor, fully furnished unit! Avail from 22 Aug
Modern, Spacious Detached House at Prime Location
Tudor Close, Cosy Bungallow Walk to MRT, Supermarkets and Restaurants
Spacious Unit w/ 4 Ensuite Bedrooms For Rent, In The Heart Of Orchard!
New listing hot from the oven!!
Brand new freehold 3 bedroom duplex penthouse for immediate occupancy
Modern nicely renovated gcb for lease
Monthly Flexible Rental Apartment Near Somerset Station in Singapore
Rare Charming Bungalow with Great Indoor/Outdoor Lifestyle
Monthly Flexible Rental Apartment Near Sentosa Station in Singapore
Expat Choice! 3 bedroom for rent! Located at a fantastic place
Dalvey: Highly Sought After Locale
Monthly Flexible Rental Apartment Near Haw Par Villa Station
Cheap! Hill top Good Class Bungalow for immediate rent!
Beautiful colonial style 2 bedroo

In [34]:
# Explore properties with missing address
df.loc[pd.isna(df["address"])]

Unnamed: 0,name,address,price,size,bedrooms,bathrooms,agent_description,property_type,furnishing,year,meters_to_mrt,renovated,pool,sea_view,penthouse
31,Belmont/Morley Tropical Resort Style Good Clas...,,40000,8500,5,6,Lovely tropical resort good class bungalow hol...,Condominium,Partially Furnished,2019,900,False,False,False,False
113,Braddell Heights Estate,,28000,7200,6,6,Modern Bungalow in Braddell Heights Estate for...,Condominium,Fully Furnished,1995,530,False,False,False,False
118,J Gateway,,4200,506,1,1,High floor. Bright and Windy. Available end Sept.,Condominium,Fully Furnished,1995,260,False,False,False,False
198,Angullia Park Residences @ Orchard,,6800,1001,3,2,"High floor, fully furnished unit! Avail from 2...",Condominium,,2023,340,False,False,False,False
209,Renovated Detached within 1km of Nanyang Primary,,36500,7000,6,5,"Modern, Spacious Detached House at Prime Location",Condominium,,2022,340,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5237,Tudor Close,,17000,4000,4,4,"Tudor Close, Cosy Bungallow Walk to MRT, Super...",Condominium,Partially Furnished,2024,930,False,False,False,False
5238,Monthly Flexible Rental Apartment Near Harbou...,,3800,110,1,1,Monthly Flexible Rental Apartment Near Harbour...,Condominium,Partially Furnished,2024,330,False,False,False,False
5310,3 Storey Semi-D in Watten Estate,,12999,4500,5,5,Rare Tastefully Renovated 3 Storey Semi-D in W...,Condominium,Partially Furnished,2011,730,True,False,False,False
5339,Monthly Flexible Rental Apartment Near Novena...,,4760,264,1,1,Monthly Flexible Rental Apartment Near Novena ...,Condominium,Partially Furnished,2014,960,False,False,False,False


In [35]:
# Show top and bottom five rows
df

Unnamed: 0,name,address,price,size,bedrooms,bathrooms,agent_description,property_type,furnishing,year,meters_to_mrt,renovated,pool,sea_view,penthouse
0,"Brand new Attic Studio, in a Peranakan Conserv...",Lorong 34 Geylang,3000,400,1,1,One and only attic studio! Beautifully done up...,Apartment,Fully Furnished,,,False,False,False,False
1,Astor,51C Lengkong Empat,2000,1130,Room,,Comes with In House Maid,Apartment,Fully Furnished,,810,False,False,False,False
2,Springhill Terrace,Sunrise avenue,7400,3800,5,4,"Close to MRT and short drive to French, Austra...",Apartment,Fully Furnished,,,False,False,False,False
3,704 Yishun Avenue 5,704 Yishun Avenue 5,1000,120,Room,,Room for 1 or 2 single ladies,Apartment,Fully Furnished,,700,False,False,False,False
4,Espada,48 Saint Thomas Walk,4300,689,1,1,All units virtual online viewing available! An...,Apartment,Fully Furnished,,420,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5355,Avenue South Residence,13 Silat Avenue,7500,947,3,2,Newly TOP! Be the first to stay near town!,Bungalow House,Partially Furnished,,,False,False,False,False
5356,453B Fernvale Road,453B Fernvale Road,999,150,Room,,Close to fernvale LRT,Bungalow House,Partially Furnished,,290,False,False,False,False
5357,Riviere,1 Jiak Kim Street,8000,840,2,2,Brand new pool facing unit!,Bungalow House,Partially Furnished,,370,False,True,False,False
5358,Bukit Villas,1 Rasok Drive,7100,3025,5,4,"Tranquil surroundings, facilities included! Po...",Bungalow House,Partially Furnished,,,False,True,False,False


# To Do
- Get distance to attractive locations in Singapore (e.g., marina bay, orchard, botanic garden)
- Handle missing values
  - address
  - bathrooms
  - mrt distance
  - furnishing
  - built year