In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import folium

In [2]:
data = pd.read_json("osm/amenities-vancouver.json.gz", lines=True)

In [3]:
data.head()

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,Starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ..."
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,Salad Loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...
2,49.373423,-123.291894,2016-10-10T02:14:29.000-07:00,toilets,,{}
3,49.249848,-122.959708,2011-09-06T03:52:10.000-07:00,bbq,,{}
4,49.370898,-123.280448,2015-05-03T00:42:25.000-07:00,place_of_worship,St. Monica's Anglican Church,"{'addr:housenumber': '6404', 'addr:street': 'W..."


In [4]:
food_lst = ['cafe','restaurant', 'fast_food']
trans_lst = ['parking','bus_station']
enter_lst = ['pub', 'cinema']
shop_lst = ['atm', 'bank','marketplace']

In [5]:
count_dict = {}
for k in food_lst + trans_lst + enter_lst + shop_lst:
    count_dict[k] = 0
for index, row in data.iterrows():
    if row['amenity'] in count_dict:
        count_dict[row['amenity']] += 1

In [6]:
data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)

In [7]:
for index, row in data.iterrows():
    if row['amenity'] in food_lst:
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color='red',
        ).add_to(data_map)

In [8]:
for index, row in data.iterrows():
    if row['amenity'] in trans_lst:
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color='yellow',
        ).add_to(data_map)

In [9]:
for index, row in data.iterrows():
    if row['amenity'] in enter_lst:
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color='blue',
        ).add_to(data_map)

In [10]:
for index, row in data.iterrows():
    if row['amenity'] in shop_lst:
        folium.Circle(
            radius=10,
            location=[row['lat'], row['lon']],
            color='green',
        ).add_to(data_map)

In [11]:
data_map

In [12]:
from sklearn.cluster import KMeans
cluster_num = 6
clean_data = data[data['amenity'].isin(food_lst + trans_lst + enter_lst + shop_lst)]
X = np.stack([clean_data['lat'], clean_data['lon']], axis=1)
model = KMeans(n_clusters=cluster_num)
y = model.fit_predict(X)
mean_points = []
for i in range(cluster_num):
    cnt = 0
    sum_lat = 0
    sum_lon = 0
    for index,(_, row) in enumerate(clean_data.iterrows()):
        if y[index] == i:
            cnt+=1
            sum_lat += row['lat']
            sum_lon += row['lon']
    mean_points.append((sum_lat/cnt, sum_lon/cnt))
            

In [13]:
data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)

In [14]:
for i in range(cluster_num):
    folium.Marker(
        location=mean_points[i],
        popup=str(mean_points[i]),
        icon=folium.Icon(color='blue')
    ).add_to(data_map)

In [15]:
data_map

In [16]:
air_data = pd.read_csv('listings.csv', parse_dates=['last_review'])

In [17]:
air_data_clean = air_data[(air_data['last_review'].dt.year>2020)&(air_data['minimum_nights'] < 3)&(air_data['reviews_per_month']>1)]

In [18]:
for index, row in air_data_clean.iterrows():
    folium.Circle(
        radius=10,
        location=[row['latitude'], row['longitude']],
        color='green',
    ).add_to(data_map)

In [19]:
data_map

In [20]:
from sklearn.cluster import KMeans
cluster_num = 6
clean_data = data[data['amenity'].isin(food_lst + trans_lst + enter_lst + shop_lst)]
X = np.stack([clean_data['lat'], clean_data['lon']], axis=1)
model = KMeans(n_clusters=cluster_num)
y = model.fit_predict(X)
mean_points = []
for i in range(cluster_num):
    cnt = 0
    sum_lat = 0
    sum_lon = 0
    for index,(_, row) in enumerate(clean_data.iterrows()):
        if y[index] == i:
            cnt+=1
            sum_lat += row['lat']
            sum_lon += row['lon']
    mean_points.append((sum_lat/cnt, sum_lon/cnt))

In [21]:
data_map = folium.Map(location=[49.121383503296705, -122.67246901153845], zoom_start=10)

In [22]:
for i in range(cluster_num):
    folium.Marker(
        location=mean_points[i],
        popup=str(mean_points[i]),
        icon=folium.Icon(color='blue')
    ).add_to(data_map)

In [23]:
new_air_data_clean = air_data[(air_data['last_review'].dt.year>2020)
                          &(air_data['minimum_nights'] < 3)
                          &(air_data['reviews_per_month']>1)
                          &(air_data['price']>200)]

In [24]:
for index, row in new_air_data_clean.iterrows():
    folium.Circle(
        radius=10,
        location=[row['latitude'], row['longitude']],
        color='green',
    ).add_to(data_map)

In [25]:
data_map