In [12]:
import json
import pandas as pd
import requests

# Obtain the data using Google Places API

In [13]:
API_KEY = "AIzaSyDhllDLg90ZCeB7IgRgv7nRxvZ6y_sO_4g"
base_url = "https://maps.googleapis.com/maps/api/place/textsearch/json?"
types = [
    "amusement_park",
    "aquarium",
    "art_gallery",
    "bakery",
    "bar",
    "beauty_salon",
    "book_store",
    "bowling_alley",
    "cafe",
    "clothing_store",
    "convenience_store",
    "department_store",
    "gym",
    "hardware_store",
    "hindu_temple",
    "home_goods_store",
    "library",
    "mosque",
    "movie_theater",
    "museum",
    "night_club",
    "park",
    "restaurant",
    "rv_park",
    "shoe_store",
    "shopping_mall",
    "spa",
    "stadium",
    "store",
    "subway_station",
    "supermarket",
    "tourist_attraction",
    "train_station",
    "zoo"
]

type_query = ",".join(types)
type_query = "&type=" + type_query

singapore_centre_lon = 1.3521
singapore_centre_lat = 103.8198
radius = 27000

location_query = "location=" + str(singapore_centre_lon) + "," + str(singapore_centre_lat) + "&radius=" + str(radius)
region_code = "region=sg"

key_query = "&key=" + API_KEY

In [23]:
planning_areas = [
    "Ang Mo Kio",
    "Bedok",
    "Bishan",
    "Boon Lay",
    "Bukit Batok",
    "Bukit Merah",
    "Bukit Panjang",
    "Bukit Timah",
    "Centre Water Catchment",
    "Changi",
    "Changi Bay",
    "Choa Chu Kang",
    "Clementi",
    "Downtown Core",
    "Geylang",
    "Hougang",
    "Jurong East",
    "Jurong West",
    "Kallang",
    "Lim Chu Kang",
    "Mandai",
    "Marina East",
    "Marina South",
    "Marine Parade",
    "Museum",
    "Newton",
    "North-Eastern Islands",
    "Novena",
    "Orchard",
    "Outram",
    "Pasir Ris",
    "Paya Lebar",
    "Pioneer",
    "Punggol",
    "Queenstown",
    "River Valley",
    "Rochor",
    "Seletar",
    "Sembawang",
    "Sengkang",
    "Serangoon",
    "Simpang",
    "Singapore River",
    "Southern Islands",
    "Straits View",
    "Sungei Kadut",
    "Tampines",
    "Tanglin",
    "Tengah",
    "Toa Payoh",
    "Tuas",
    "Western Islands",
    "Western Water Catchment",
    "Woodlands",
    "Yishun"
]

In [22]:
queries = [
    "restaurant",
    "cafe",
    "library",
    "park",
    "shopping_mall",
    "shop",
    "cinema",
    "art_gallery",
    "museum",
    "gym",
    "tourist_attraction"
]

In [27]:
for planning_area in planning_areas:
    for query in queries:
        q = "query=" + query + "+Singapore+" + planning_area
        q = base_url + q + location_query + region_code + "&radius=1000&region=" + "sg" + "&type" + query + "&key=" + API_KEY
        res = requests.get(q, params={})
        results = json.loads(res.content)
        with open(f"{query}_in_{planning_area}.json", "w") as f:
            json.dump(results, f, indent=4)

In [None]:
# The unique types
unique_types = [
    'point_of_interest', 'cafe', 'car_wash', 'movie_theater', 'spa', 'pet_store', 'movie_rental', 'airport', 'city_hall', 'bus_station', 'florist', 'department_store', 'pharmacy', 'general_contractor', 'health', 'local_government_office', 'finance', 'transit_station', 'school', 'meal_takeaway', 'furniture_store', 'physiotherapist', 'grocery_or_supermarket', 'bar', 'shoe_store', 'food', 'library', 'museum', 'bakery', 'subway_station', 'store', 'home_goods_store', 'park', 'restaurant', 'clothing_store', 'electronics_store', 'night_club', 'supermarket', 'gas_station', 'tourist_attraction', 'travel_agency', 'stadium', 'shopping_mall', 'convenience_store', 'lodging', 'meal_delivery', 'parking', 'route', 'jewelry_store', 'gym', 'post_office', 'aquarium', 'amusement_park', 'establishment', 'hair_care', 'art_gallery', 'beauty_salon', 'liquor_store', 'zoo', 'book_store'
]

# Merge all json files into one csv

In [44]:
# We store into a list of dictionaries then use Pandas Dataframe
df = []
# We avoid duplicates by storing a set of names
names = set()
for planning_area in planning_areas:
    for query in queries:
        with open(f"{query}_in_{planning_area}.json", "r") as f:
            data = json.load(f)
            results = data["results"]
            for result in results:
                if result["name"] in names:
                    continue
                row = dict()
                row["name"] = result["name"]
                row["business_status"] = result.get("business_status", "NIL")
                row["formatted_address"] = result.get("formatted_address", "NIL")
                row["lat"] = result["geometry"]["location"]["lat"]
                row["lng"] = result["geometry"]["location"]["lng"]
                row["icon"] = result["icon"]
                row["icon_bg_color"] = result["icon_background_color"]
                row["icon_mask_base_uri"] = result["icon_mask_base_uri"]
                if "photos" in result.keys():
                    row["photo"] = result["photos"][0]["photo_reference"]
                else:
                    row["photo"] = "nil"
                row["place_id"] = result["place_id"]
                if "plus_code" in result.keys():
                    row["compound_code"] = result["plus_code"]["compound_code"]
                    row["global_code"] = result["plus_code"]["global_code"]
                else:
                    row["compound_code"] = "NIL"
                    row["global_code"] = "NIL"
                row["rating"] = result.get("rating", "NIL")
                row["reference"] = result["reference"]
                row["ratings_cnt"] = result.get("user_ratings_total", "NIL")
                categories = {
                    t: 1 if t in result["types"] else 0 for t in unique_types
                }
                row.update(categories)
                df.append(row)
df = pd.DataFrame.from_dict(df)
df.to_csv("planlah_data_raw.csv")

# Categorize each place in our dataset

In [43]:
# Display the unique types
print(unique_types)

{'point_of_interest', 'cafe', 'car_wash', 'movie_theater', 'spa', 'pet_store', 'movie_rental', 'airport', 'city_hall', 'bus_station', 'florist', 'department_store', 'pharmacy', 'general_contractor', 'health', 'local_government_office', 'finance', 'transit_station', 'school', 'meal_takeaway', 'furniture_store', 'physiotherapist', 'grocery_or_supermarket', 'bar', 'shoe_store', 'food', 'library', 'museum', 'bakery', 'subway_station', 'store', 'home_goods_store', 'park', 'restaurant', 'clothing_store', 'electronics_store', 'night_club', 'supermarket', 'gas_station', 'tourist_attraction', 'travel_agency', 'stadium', 'shopping_mall', 'convenience_store', 'lodging', 'meal_delivery', 'parking', 'route', 'jewelry_store', 'gym', 'post_office', 'aquarium', 'amusement_park', 'establishment', 'hair_care', 'art_gallery', 'beauty_salon', 'liquor_store', 'zoo', 'book_store'}


In [42]:
df = pd.read_csv("planlah_data_raw.csv")
df =

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

