# Import

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
import json

In [11]:
my_key = "8f8e4f67-63b7-47c6-8ce8-b9fb0e4a09b1"
headers = {'AccessKey': my_key, 'User-Agent': 'Mozilla/5.0'}

def get_token():
    response = requests.get("https://www.ura.gov.sg/uraDataService/insertNewToken.action",
                             headers=headers, data=headers, json=headers)

    if response.status_code == 200:
        return response.json()["Result"]
    else:
        return "Error"


my_token = get_token()

# Private Non-Landed Residential Properties Median Rentals by Name

This data service will return past 3 years of median rentals of private non-landed residential 
properties with at least 10 rental contracts for the reference period in JSON format.\
Update Frequency: End of day of every 15th of the month. If it is a public holiday, the data 
will be updated on the following working day.

In [12]:
headers = {'AccessKey': my_key, "Token": my_token, 'User-Agent': 'Mozilla/5.0'}
response = requests.get("https://www.ura.gov.sg/uraDataService/invokeUraDS?service=PMI_Resi_Rental_Median", headers=headers)
if response.status_code == 200:
    result = response.json()
else:
    print("Unsuccessful")

In [14]:
median_rentals = pd.DataFrame()
for ele in result["Result"]:
    # print(ele.keys())
    curr_details = pd.DataFrame(ele["rentalMedian"]) # refPeriod, psg75, median, psf25, district
    curr_details["street"] = ele["street"]
    curr_details["x"] = ele["x"]
    curr_details["y"] = ele["y"]
    curr_details["project"] = ele["project"]
    median_rentals = pd.concat([median_rentals, curr_details], axis=0)

# casting the correct datatypes
for col in ['psf75', 'median', 'psf25', 'x', 'y']:
    median_rentals[col] = median_rentals[col].astype(float)

median_rentals = median_rentals.reset_index(drop=True)
median_rentals

Unnamed: 0,refPeriod,psf75,median,psf25,district,street,x,y,project
0,2021Q4,2.40,2.17,1.98,23,HILLVIEW AVENUE,19806.57039,37591.36252,MERAWOODS
1,2021Q2,2.28,2.06,1.86,23,HILLVIEW AVENUE,19806.57039,37591.36252,MERAWOODS
2,2022Q1,2.43,2.07,1.94,23,HILLVIEW AVENUE,19806.57039,37591.36252,MERAWOODS
3,2022Q3,2.94,2.60,2.38,23,HILLVIEW AVENUE,19806.57039,37591.36252,MERAWOODS
4,2022Q4,3.25,3.01,2.80,23,HILLVIEW AVENUE,19806.57039,37591.36252,MERAWOODS
...,...,...,...,...,...,...,...,...,...
7304,2022Q3,6.00,4.97,4.79,20,SIN MING ROAD,28255.63458,37271.68720,THOMSON V TWO
7305,2022Q2,6.83,6.54,5.89,08,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER
7306,2022Q1,6.63,6.39,6.01,08,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER
7307,2023Q4,7.43,7.15,6.49,08,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER


# Private Residential Properties Rental Contract

This data service will return past 3 years of private residential properties with rental contracts 
submitted to IRAS for Stamp Duty assessment in JSON format. As the rental records > 3 years 
ago could be modified/aborted, we would advise to refresh your database on a monthly basis 
and just retain the latest 3 years record for better accuracy. \
Update Frequency: End of day of every 4th Friday of January, April, July and October. If it 
is a public holiday, the data will be updated on the following working 
day. In other words, quarterly.


Here are the descriptions of some of the data fields. Those not mentioned are self-explanatory, or can refer to docs:

 Parameter Name | Description 
 -------------- | ----------- 
 project | The name of the project 
 street | The street name that the project is on. 
 x | The x coordinates of the address of the property in SVY21 format. Important: This is the location of the property and does not represent the location of the transacted unit. 
 y | The y coordinates of the address of the property in SVY21 format. Important: This is the location of the property and does not represent the location of the transacted unit. 
 propertyType | The property type of the transacted property. Note that there are properties with a mixture of property types. 
 district | The postal district that the transacted property falls in. Note that there are properties that fall across multiple postal district. 
 noOfBedRoom | The number of bed rooms. Information is only available for non-landed property. Empty value for non-landed properties means that the information was not provided for this property. 
 leaseDate | The lease commencement date of the rental. 

## Store data in dataframe
- Some observations do not have x and y coords. Will fill with 0.
- Get data belonging to all the `refPeriod` from the previous dataframe so it can be joined with the previous dataframe
- Null noOfBedRoom will be filled with -1. 

In [18]:
def get_rental_contracts_per_refPeriod(period):
    """period should be a string e.g. '2014Q1' means 2014 1st quarter"""
    refPeriod = period[2:].lower() # format should be like 14q1 instead of 2014Q1
    # refPeriod is mandatory parameter
    headers = {'AccessKey': my_key, "Token": my_token, 'User-Agent': 'Mozilla/5.0'}
    response = requests.get(f"https://www.ura.gov.sg/uraDataService/invokeUraDS?service=PMI_Resi_Rental&refPeriod={refPeriod}", headers=headers)
    if response.status_code == 200:
        result = response.json()
    else:
        print("Unsuccessful")
    rental_contracts_per_refPeriod = pd.DataFrame()
    for ele in result["Result"]:
        curr_details = pd.DataFrame(ele["rental"]) # areaSqm	leaseDate	propertyType	district	areaSqft	noOfBedRoom	rent
        curr_details["street"] = ele["street"]

        # some dont have x and y fields. Set to 0.
        try:
            curr_details["x"] = float(ele["x"])
        except KeyError:
            curr_details["x"] = 0
        try:
            curr_details["y"] = float(ele["y"])
        except KeyError:
            curr_details["y"] = 0
        curr_details["project"] = ele["project"]
        rental_contracts_per_refPeriod = pd.concat([rental_contracts_per_refPeriod, curr_details], axis=0)
    rental_contracts_per_refPeriod["refPeriod"] = period
    return rental_contracts_per_refPeriod.reset_index(drop=True)

rental_contracts = pd.DataFrame()
for period in median_rentals["refPeriod"].value_counts().index:
    print(period)
    rental_contracts = pd.concat([rental_contracts, get_rental_contracts_per_refPeriod(period)], axis=0)


2021Q3


2022Q3
2021Q4
2023Q3
2021Q2
2022Q1
2021Q1
2022Q4
2022Q2
2023Q2
2023Q1
2023Q4


In [19]:
# fill NA noOfBedRoom with -1
rental_contracts["noOfBedRoom"] = rental_contracts.apply(lambda row: -1 if row["noOfBedRoom"] == "NA" else row["noOfBedRoom"], axis=1)

In [20]:
# casting the correct datatypes
for col in ['rent', 'x', 'y']:
    rental_contracts[col] = rental_contracts[col].astype(float)

for col in ['noOfBedRoom']:
    rental_contracts[col] = rental_contracts[col].astype(int)

rental_contracts = rental_contracts.reset_index(drop=True)
rental_contracts

Unnamed: 0,areaSqm,leaseDate,propertyType,district,areaSqft,noOfBedRoom,rent,street,x,y,project,refPeriod
0,120-130,0721,Non-landed Properties,15,1300-1400,3,4200.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3
1,90-100,0721,Non-landed Properties,15,900-1000,2,3200.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3
2,190-200,0721,Non-landed Properties,15,2100-2200,3,5000.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3
3,120-130,0721,Non-landed Properties,15,1300-1400,3,3500.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3
4,240-250,0921,Non-landed Properties,15,2600-2700,4,5500.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3
...,...,...,...,...,...,...,...,...,...,...,...,...
281596,60-70,1223,Non-landed Properties,08,600-700,3,5500.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4
281597,40-50,1023,Non-landed Properties,08,500-600,2,4000.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4
281598,70-80,1023,Non-landed Properties,08,700-800,3,5300.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4
281599,40-50,1123,Non-landed Properties,08,500-600,2,4000.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4


# Join

Left join `rental_contracts` and `median_rentals`. \
Resulting dataframe has columns: \
'areaSqm', 'leaseDate', 'propertyType', 'district', 'areaSqft',
       'noOfBedRoom', 'rent', 'street', 'x', 'y', 'project', 'refPeriod',
       'psf75', 'median', 'psf25'


In [23]:
merged_df = pd.merge(left=rental_contracts, right=median_rentals, how="left", on=["project", "street", "district", "x", "y", "refPeriod"])
merged_df

Unnamed: 0,areaSqm,leaseDate,propertyType,district,areaSqft,noOfBedRoom,rent,street,x,y,project,refPeriod,psf75,median,psf25
0,120-130,0721,Non-landed Properties,15,1300-1400,3,4200.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3,3.09,2.78,2.62
1,90-100,0721,Non-landed Properties,15,900-1000,2,3200.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3,3.09,2.78,2.62
2,190-200,0721,Non-landed Properties,15,2100-2200,3,5000.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3,3.09,2.78,2.62
3,120-130,0721,Non-landed Properties,15,1300-1400,3,3500.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3,3.09,2.78,2.62
4,240-250,0921,Non-landed Properties,15,2600-2700,4,5500.0,ELLIOT ROAD,38865.17182,32621.92134,ELLIOT AT THE EAST COAST,2021Q3,3.09,2.78,2.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281778,60-70,1223,Non-landed Properties,08,600-700,3,5500.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4,7.43,7.15,6.49
281779,40-50,1023,Non-landed Properties,08,500-600,2,4000.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4,7.43,7.15,6.49
281780,70-80,1023,Non-landed Properties,08,700-800,3,5300.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4,7.43,7.15,6.49
281781,40-50,1123,Non-landed Properties,08,500-600,2,4000.0,PERUMAL ROAD,30477.87296,32796.58779,UPTOWN @ FARRER,2023Q4,7.43,7.15,6.49


In [26]:
merged_df.to_csv("../data/URA_data.csv", index=False)

In [24]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 281783 entries, 0 to 281782
Data columns (total 15 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   areaSqm       281783 non-null  object 
 1   leaseDate     281783 non-null  object 
 2   propertyType  281783 non-null  object 
 3   district      281783 non-null  object 
 4   areaSqft      281783 non-null  object 
 5   noOfBedRoom   281783 non-null  int32  
 6   rent          281783 non-null  float64
 7   street        281783 non-null  object 
 8   x             281783 non-null  float64
 9   y             281783 non-null  float64
 10  project       281783 non-null  object 
 11  refPeriod     281783 non-null  object 
 12  psf75         154640 non-null  float64
 13  median        154640 non-null  float64
 14  psf25         154640 non-null  float64
dtypes: float64(6), int32(1), object(8)
memory usage: 33.3+ MB
