In [None]:
import folium
from folium import plugins
from folium.plugins import HeatMap, HeatMapWithTime

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime, timedelta
from sklearn.cluster import KMeans

import branca
import branca.colormap as cm
%matplotlib inline

# Read Data

In [None]:
poi_data = pd.read_csv('POI_price_multi_index_sheet.csv',index_col=0) #header=[0, 1]
house_data = pd.read_csv('1118transaction.csv')
long_lat_data = pd.read_csv('geoinfo_0_4048.csv')
long_lat_data.rename(columns = {'Unnamed: 0':'address'}, inplace = True)
house_data.rename(columns = {'Address':'address'}, inplace = True)

In [None]:
poi_data = poi_data.iloc[2: , :]

In [None]:
# get dataframe of specific distance
poi_data = poi_data.reset_index()
# address = poi_data['address']
# poi_500 = poi_data.loc[:,(slice(None),'500')]
# poi_500['address'] = address
# poi_1000 = poi_data.loc[:,(slice(None),'1000')]
# poi_1000['address'] = address
# poi_3000 = poi_data.loc[:,(slice(None),'3000')]
# poi_3000['address'] = address

In [None]:
poi_data.rename(columns = {'POI':'address'}, inplace = True)

In [None]:
# Merge dataframe of two data csv files
house_data = pd.merge(house_data,long_lat_data[['address','latitude','longitude']],
              on='address', how='inner')

In [None]:
house_data

# Process Data

In [None]:
house_data.rename(columns = {'Address':'address'}, inplace = True)

In [None]:
house_data.columns

In [None]:
# Merge dataframe of two data csv files
df = pd.merge(poi_data,house_data
              [['address','latitude','longitude','Transaction Date','Total Cost(NTD)',
                'Cost per Sqaure Meter','Building Completion Date','Transacted Floor',
                'Total Floor','Total Area Transacted (Sqaure Meter)','Bed Rooms',
                'Living/Dining Rooms', 'Bathrooms','Main construction Method',
                'Type of Building']],
              on='address', how='left')

In [None]:
df = df[~df['Transacted Floor'].str.contains('，')]

In [None]:
df['Type of Building'].value_counts()

In [None]:
df.loc[df['Type of Building'] == "Store (1F)(店鋪)", "Type of Building"] = 'Others'
df.loc[df['Type of Building'] == "Office Building ", "Type of Building"] = 'Others'

In [None]:
df

In [None]:
construction_method_other = ['Brick', 'Steel Constructurer', 'Reinforced Concrete and Reinforced Brick', 'Prestressed Concrete']
df.loc[df["Main construction Method"] == "Steel Constructurer", "Main construction Method"] = 'Other'
df.loc[df["Main construction Method"] == "Brick", "Main construction Method"] = 'Other'
df.loc[df["Main construction Method"] == 'Reinforced Concrete and Reinforced Brick',"Main construction Method"] = 'Other'
df.loc[df["Main construction Method"] == "Prestressed Concrete","Main construction Method"] = 'Other'

In [None]:
drop_list = ['全','三11','地下1', '三13','地下層']
for i in drop_list:
    df = df[~df['Transacted Floor'].str.contains(i)]

In [None]:
df['Transacted Floor']=df['Transacted Floor'].astype('int')

In [None]:
# clean #value error rows 
df = df[~df['Building Completion Date'].str.contains('#VALUE!')]

In [None]:
df.loc[:,'Building Completion Date'] = pd.to_datetime(df['Building Completion Date'])

In [None]:
df['transaction year'] = df['Transaction Date'].apply(lambda x:x[:4])
df['transaction year'] = df['transaction year'].astype('int')
df = df[df['transaction year']<=2022]
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'])
df['time delta'] = df['Transaction Date'] - df['Building Completion Date']

In [None]:
df['time delta year'] = df['time delta'].astype("timedelta64[Y]")

# Initital Map

In [None]:
taipei_center = [25.048651234895363, 121.54614479464894]
m = folium.Map(taipei_center, zoom_start=12)
marker_colors = ['red','blue','gray','orange','green','purple','darkpurple','pink','cadetblue','lightgray','black']

In [None]:
m

## Raw data based on Total Price

In [None]:
df_price = df[['latitude','longitude','Total Cost(NTD)','address']].copy()
start_coord = (25.048651234895363, 121.54614479464894)

colormap = cm.LinearColormap(colors=['lightblue','blue','darkblue','red'],vmin=5000000,vmax=80000000)

maps = folium.Map(location=start_coord, zoom_start=12)

for index, row in df_price.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Total Cost(NTD)']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Total Cost(NTD)']), popup=(pop_info)).add_to(maps)
maps.add_child(colormap)
display(maps)

## Raw data based on Cost per squaremeter 

In [None]:
df_price_m2 = df[['latitude','longitude','Cost per Sqaure Meter','address']].copy()
start_coord = (25.048651234895363, 121.54614479464894)

colormap = cm.LinearColormap(colors=['lightblue','blue','red'],vmin=100000,vmax=400000)
maps2 = folium.Map(location=start_coord, zoom_start=12)

for index, row in df_price_m2.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Cost per Sqaure Meter']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Cost per Sqaure Meter']), popup=(pop_info)).add_to(maps2)
maps2.add_child(colormap)
display(maps2)

# Clustering

## Only POI Cluster

### POI 500m 

In [None]:
def draw_map(df, marker_colors, label):
    for index, row in df.iterrows():
        for i in range(len(marker_colors)):
                       if row[label] == i:
                           folium.CircleMarker([row['latitude'], row['longitude']],
                            radius=3,color=marker_colors[i],tooltip=(row['address'],
                            row['Total Cost(NTD)'],row['Cost per Sqaure Meter'])).add_to(m)
    return m

In [None]:
df_poi_500 = df.loc[:, ['police','hospital','shopping_mall',
                 'subway_station','supermarket','park','church','library',
                'night_club','university','primary_school','bus_station']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_500)
df['500_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '500_label')

### POI 1000m / 1km

In [None]:
df_poi_1km = df.loc[:, ['police.1','hospital.1','shopping_mall.1',
                 'subway_station.1','supermarket.1','park.1','church.1','library.1',
                'night_club.1','university.1','primary_school.1','bus_station.1']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_1km)
df['1km_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '1km_label')

### POI 3000m / 3km

In [None]:
df_poi_3km = df.loc[:, ['police.2','hospital.2','shopping_mall.2',
                 'subway_station.2','supermarket.2','park.2','church.2','library.2',
                'night_club.2','university.2','primary_school.2','bus_station.2']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_3km)
df['3km_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '3km_label')

## POI + Price (Cost/m2)

### POI 500m + Price 

In [None]:
df_poi_500_price = df.loc[:, ['Cost per Sqaure Meter','police','hospital','shopping_mall',
                 'subway_station','supermarket','park','church','library',
                'night_club','university','primary_school','bus_station']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_500_price)
df['500_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '500_price_label')

### POI 1000m / 1km

In [None]:
df_poi_1km_price = df.loc[:, ['Cost per Sqaure Meter','shopping_mall.1',
                 'subway_station.1','supermarket.1','park.1','church.1','library.1',
                'night_club.1','university.1','primary_school.1','bus_station.1']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_1km_price)
df['1km_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '1km_price_label')

### POI 3000m / 3km

In [None]:
df_poi_3km_price = df.loc[:, ['Cost per Sqaure Meter','police.2','hospital.2','shopping_mall.2',
                 'subway_station.2','supermarket.2','park.2','church.2','library.2',
                'night_club.2','university.2','primary_school.2','bus_station.2']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_3km_price)
df['3km_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '3km_price_label')

## Only House Properties 

In [None]:
df.columns

In [None]:
# Merge dataframe of two data csv files
house_df = df[['address','latitude', 'longitude', 'Total Cost(NTD)',
       'Cost per Sqaure Meter', 'Transacted Floor',
       'Total Floor', 'Total Area Transacted (Sqaure Meter)', 'Bed Rooms',
       'Living/Dining Rooms', 'Bathrooms', 'Main construction Method',
       'Type of Building','time delta year']].copy()

In [None]:
house_df = pd.get_dummies(house_df, columns = ['Main construction Method','Type of Building'])

In [None]:
house_df_temp = house_df.copy()
house_df_temp = house_df_temp.drop(['address','latitude', 'longitude', 'Total Cost(NTD)','Cost per Sqaure Meter','Total Floor'], axis=1)

In [None]:
house_df_temp

In [None]:
K_cluster = 3
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(house_df_temp)
df['house_no_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, 'house_no_price_label')

In [None]:
house_df_temp = house_df.copy()
house_df_temp = house_df_temp.drop(['address','latitude', 'longitude', 'Total Cost(NTD)','Total Floor'], axis=1)

In [None]:
K_cluster = 3
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(house_df_temp)
df['house_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, 'house_price_label')

In [None]:
import json
import requests
taipei_district = pd.read_csv("2020_november_distrcit_activity.csv")
url = 'https://od.moi.gov.tw/api/v1/rest/datastore/301000100G-000887-003.json'
with open('G97_A_CADIST_P.geojson', 'r') as f:
    taipei_geo = json.load(f)    #for state level data

In [None]:
taipei_district

In [None]:
taipei_district['TOWN_ID'] = taipei_district['TOWN_ID'].astype(str)
taipei_district['CPID'] = '630' + taipei_district['TOWN_ID'].str[5:7]

In [None]:
import geopandas as gpd
pdf_Taipei = gpd.read_file('G97_A_CADIST_P.geojson')
pdf_Taipei

In [None]:
url = (
    "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data"
)
state_geo = f"{url}/us-states.json"
state_unemployment = f"{url}/US_Unemployment_Oct2012.csv"
state_data = pd.read_csv(state_unemployment)

m = folium.Map(location=[48, -102], zoom_start=3)

folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "Unemployment"],
    key_on="feature.id",
    fill_color="YlGn",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Unemployment Rate (%)",
).add_to(m)

folium.LayerControl().add_to(m)
m

In [None]:
taipei_city = folium.Map(location=[25.048651234895363, 121.54614479464894], zoom_start=12)
folium.Choropleth(
    geo_data = pdf_Taipei[['CPID','geometry']],                  #json
    name ='choropleth',                  
    data = taipei_district,                     
    columns = ['CPID', 'NIGHT_WORK'], #columns to work on
    key_on ='feature.properties.CPID',
    fill_color ='YlGnBu',     #I passed colors Yellow,Green,Blue
    fill_opacity = 0,
    line_opacity = 0.5,
    legend_name = "Unemployment scale"
).add_to(taipei_city)
folium.LayerControl().add_to(taipei_city)
taipei_city

In [None]:
layer = folium.GeoJson(
    data=(open('G97_A_CADIST_P.geojson', "r").read()),
    name='geogeo',
).add_to(taipei_city) # 1. keep a reference to GeoJSON layer

taipei_city.fit_bounds(layer.get_bounds())

In [None]:
taipei_city

In [None]:
taipei_city = folium.Map(location=[25.048651234895363, 121.54614479464894], zoom_start=12)

folium.Choropleth(
    geo_data = 'G97_A_CADIST_P.geojson',                  #json
    name ='choropleth',                  
    data = taipei_district,                     
    columns = ['CPID', 'NIGHT_WORK'], #columns to work on
    key_on ='feature.properties.CPID',
    fill_color ='YlGnBu',     #I passed colors Yellow,Green,Blue
    fill_opacity = 0,
    line_opacity = 0,
    legend_name = "Unemployment scale"
).add_to(taipei_city)
taipei_city

In [None]:
taipei_city = folium.Map(location=[25.048651234895363, 121.54614479464894], zoom_start=12)
taipei_city

In [None]:
taipei_city.geo_json(geo_path='G97_A_CADIST_P.geojson')

In [None]:
taipei_district