In [None]:
import folium
from folium import plugins
from folium.plugins import HeatMap, HeatMapWithTime

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime, timedelta
from sklearn.cluster import KMeans

import branca
import branca.colormap as cm
%matplotlib inline

# Read Data

In [None]:
poi_data = pd.read_csv('POI_price_multi_index_sheet.csv',index_col=0) #header=[0, 1]
house_data = pd.read_csv('1118transaction.csv')
long_lat_data = pd.read_csv('geoinfo_0_4048.csv')
long_lat_data.rename(columns = {'Unnamed: 0':'address'}, inplace = True)
house_data.rename(columns = {'Address':'address'}, inplace = True)

In [None]:
poi_data = poi_data.iloc[2: , :]

In [None]:
# get dataframe of specific distance
poi_data = poi_data.reset_index()
# address = poi_data['address']
# poi_500 = poi_data.loc[:,(slice(None),'500')]
# poi_500['address'] = address
# poi_1000 = poi_data.loc[:,(slice(None),'1000')]
# poi_1000['address'] = address
# poi_3000 = poi_data.loc[:,(slice(None),'3000')]
# poi_3000['address'] = address

In [None]:
poi_data.rename(columns = {'POI':'address'}, inplace = True)

In [None]:
# Merge dataframe of two data csv files
house_data = pd.merge(house_data,long_lat_data[['address','latitude','longitude']],
              on='address', how='inner')

In [None]:
house_data

# Process Data

In [None]:
house_data.rename(columns = {'Address':'address'}, inplace = True)

In [None]:
house_data.columns

In [None]:
# Merge dataframe of two data csv files
df = pd.merge(poi_data,house_data
              [['address','latitude','longitude','Transaction Date','Total Cost(NTD)',
                'Cost per Sqaure Meter','Building Completion Date','Transacted Floor',
                'Total Floor','Total Area Transacted (Sqaure Meter)','Bed Rooms',
                'Living/Dining Rooms', 'Bathrooms','Main construction Method',
                'Type of Building']],
              on='address', how='left')

In [None]:
df = df[~df['Transacted Floor'].str.contains('，')]

In [None]:
df['Type of Building'].value_counts()

In [None]:
df.loc[df['Type of Building'] == "Store (1F)(店鋪)", "Type of Building"] = 'Others'
df.loc[df['Type of Building'] == "Office Building ", "Type of Building"] = 'Others'

In [None]:
df

In [None]:
construction_method_other = ['Brick', 'Steel Constructurer', 'Reinforced Concrete and Reinforced Brick', 'Prestressed Concrete']
df.loc[df["Main construction Method"] == "Steel Constructurer", "Main construction Method"] = 'Other'
df.loc[df["Main construction Method"] == "Brick", "Main construction Method"] = 'Other'
df.loc[df["Main construction Method"] == 'Reinforced Concrete and Reinforced Brick',"Main construction Method"] = 'Other'
df.loc[df["Main construction Method"] == "Prestressed Concrete","Main construction Method"] = 'Other'

In [None]:
drop_list = ['全','三11','地下1', '三13','地下層']
for i in drop_list:
    df = df[~df['Transacted Floor'].str.contains(i)]

In [None]:
df['Transacted Floor']=df['Transacted Floor'].astype('int')

In [None]:
# clean #value error rows 
df = df[~df['Building Completion Date'].str.contains('#VALUE!')]

In [None]:
df.loc[:,'Building Completion Date'] = pd.to_datetime(df['Building Completion Date'])

In [None]:
df['transaction year'] = df['Transaction Date'].apply(lambda x:x[:4])
df['transaction year'] = df['transaction year'].astype('int')
df = df[df['transaction year']<=2022]
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'])
df['time delta'] = df['Transaction Date'] - df['Building Completion Date']

In [None]:
df['time delta year'] = df['time delta'].astype("timedelta64[Y]")

# Initital Map

In [None]:
taipei_center = [25.048651234895390, 121.54614479464894]
m = folium.Map(taipei_center, zoom_start=11.5)
marker_colors = ['red','blue','gray','orange','green','purple','darkpurple','pink','cadetblue','lightgray','black']

In [None]:
m

## Raw data based on Total Price

In [None]:
df_price = df[['latitude','longitude','Total Cost(NTD)','address']].copy()
start_coord = (25.048651234895390, 121.54614479464894)

colormap = cm.LinearColormap(colors=['lightblue','blue','darkblue','red'],vmin=5000000,vmax=80000000)

maps = folium.Map(location=start_coord, zoom_start=11.5)

for index, row in df_price.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Total Cost(NTD)']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Total Cost(NTD)']), popup=(pop_info)).add_to(maps)
maps.add_child(colormap)
display(maps)

## Raw data based on Cost per squaremeter 

In [None]:
df_price_m2 = df[['latitude','longitude','Cost per Sqaure Meter','address']].copy()
start_coord = (25.048651234895363, 121.54614479464894)

colormap = cm.LinearColormap(colors=['lightblue','blue','red'],vmin=100000,vmax=400000)
maps2 = folium.Map(location=start_coord, zoom_start=12)

for index, row in df_price_m2.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Cost per Sqaure Meter']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Cost per Sqaure Meter']), popup=(pop_info)).add_to(maps2)
maps2.add_child(colormap)
folium.TileLayer('stamentoner').add_to(maps2)
display(maps2)

# Clustering

## Only POI Cluster

### POI 500m 

In [None]:
def draw_map(df, marker_colors, label):
    for index, row in df.iterrows():
        for i in range(len(marker_colors)):
                       if row[label] == i:
                           folium.CircleMarker([row['latitude'], row['longitude']],
                            radius=3,color=marker_colors[i],tooltip=(row['address'],
                            row['Total Cost(NTD)'],row['Cost per Sqaure Meter'],row[label])).add_to(m)
    return m

In [None]:
df_poi_500 = df.loc[:, ['police','hospital','shopping_mall',
                 'subway_station','supermarket','park','church','library',
                'night_club','university','primary_school','bus_station']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_500)
df['500_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '500_label')

### POI 1000m / 1km

In [None]:
df_poi_1km = df.loc[:, ['police.1','hospital.1','shopping_mall.1',
                 'subway_station.1','supermarket.1','park.1','church.1','library.1',
                'night_club.1','university.1','primary_school.1','bus_station.1']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_1km)
df['1km_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '1km_label')

### POI 3000m / 3km

In [None]:
df_poi_3km = df.loc[:, ['police.2','hospital.2','shopping_mall.2',
                 'subway_station.2','supermarket.2','park.2','church.2','library.2',
                'night_club.2','university.2','primary_school.2','bus_station.2']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_3km)
df['3km_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '3km_label')

## POI + Price (Cost/m2)

### POI 500m + Price 

In [None]:
df_poi_500_price = df.loc[:, ['Cost per Sqaure Meter','police','hospital','shopping_mall',
                 'subway_station','supermarket','park','church','library',
                'night_club','university','primary_school','bus_station']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_500_price)
df['500_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '500_price_label')

### POI 1000m / 1km

In [None]:
df_poi_1km_price = df.loc[:, ['Cost per Sqaure Meter','shopping_mall.1',
                 'subway_station.1','supermarket.1','park.1','church.1','library.1',
                'night_club.1','university.1','primary_school.1','bus_station.1']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_1km_price)
df['1km_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '1km_price_label')

### POI 3000m / 3km

In [None]:
df_poi_3km_price = df.loc[:, ['Cost per Sqaure Meter','police.2','hospital.2','shopping_mall.2',
                 'subway_station.2','supermarket.2','park.2','church.2','library.2',
                'night_club.2','university.2','primary_school.2','bus_station.2']]
K_cluster = 4
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(df_poi_3km_price)
df['3km_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, '3km_price_label')

## Only House Properties 

In [None]:
df.columns

In [None]:
# Merge dataframe of two data csv files
house_df = df[['address','latitude', 'longitude', 'Total Cost(NTD)',
       'Cost per Sqaure Meter', 'Transacted Floor',
       'Total Floor', 'Total Area Transacted (Sqaure Meter)', 'Bed Rooms',
       'Living/Dining Rooms', 'Bathrooms', 'Main construction Method',
       'Type of Building','time delta year']].copy()

In [None]:
house_df = pd.get_dummies(house_df, columns = ['Main construction Method','Type of Building'])

In [None]:
house_df_temp = house_df.copy()
house_df_temp = house_df_temp.drop(['address','latitude', 'longitude', 'Total Cost(NTD)','Cost per Sqaure Meter','Total Floor'], axis=1)

In [None]:
house_df_temp

In [None]:
K_cluster = 3
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(house_df_temp)
df['house_no_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, 'house_no_price_label')

In [None]:
df.loc[df['house_no_price_label'] == 0].mean()

In [None]:
house_df_temp = house_df.copy()
house_df_temp = house_df_temp.drop(['address','latitude', 'longitude', 'Total Cost(NTD)','Total Floor'], axis=1)

In [None]:
K_cluster = 3
kmeans = KMeans(init="random", n_clusters= K_cluster, n_init=10, max_iter=300, random_state=42)
kmeans.fit(house_df_temp)
df['house_price_label'] = kmeans.labels_

In [None]:
draw_map(df, marker_colors, 'house_price_label')

# Social Economic Factors

## Geojson Data

In [None]:
# EPSG: 3824 is the encoding method for the district TWD97 coordinate
dfSubdist = gpd.read_file('subdistrict.json')
dfTaipeiDistrict = dfSubdist[dfSubdist.COUNTYNAME=='臺北市'][['COUNTYNAME', 'TOWNNAME', 'VILLNAME', 'VILLENG','TOWNID', 'TOWNCODE', 'geometry']].reset_index()

dfName = pd.read_csv('district_name_translation.csv',index_col=1)['TOWNNAME_EN']
dfTaipeiDistrict = dfTaipeiDistrict.join(dfName,on='TOWNNAME')

In [None]:
df_activity = pd.read_csv("2020_november_distrcit_activity.csv")
df_activity['TOWN_ID'] = df_activity['TOWN_ID'].astype(str)
df_activity['CPID'] = '630' + df_activity['TOWN_ID'].str[5:7]
df_activity

In [None]:
def getSubdistrictTuple(col1,col2,col3):
    
    return '_'.join([col1,col2,col3])

## Activity Flow Data

In [None]:
dfTaipeiActivity = df_activity.merge(dfTaipeiDistrict,how='left',left_on=['TOWN_ID'],right_on=['TOWNCODE'])
dfTaipeiActivity = gpd.GeoDataFrame(dfTaipeiActivity)


In [None]:
dfTaipeiActivity['district'] = dfTaipeiActivity.apply(lambda df:getSubdistrictTuple(df.COUNTYNAME,df.TOWNNAME,df.VILLNAME),axis=1)
dfTaipeiActivity['daytime_ratio'] = dfTaipeiActivity['DAY_WORK']/dfTaipeiActivity['DAY_WEEKEND']
dfTaipeiActivity['nighttime_ratio'] = dfTaipeiActivity['NIGHT_WORK']/dfTaipeiActivity['NIGHT_WEEKEND']
dfTaipeiActivity['work_daynight_ratio'] = dfTaipeiActivity['DAY_WORK']/dfTaipeiActivity['NIGHT_WORK']
dfTaipeiActivity.columns

In [None]:
colormap_act = cm.LinearColormap(colors=['blue','white','red'],vmin=0,vmax=5)

### Draw District Activity Map

In [None]:
m_activity = folium.Map(location=[25.048651234895390, 121.54614479464894], zoom_start=11.5)

dfTemp = dfTaipeiActivity[['district','geometry']]

folium.Choropleth(
    geo_data=dfTemp,
    name="Day Time Ratio",
    data=dfTaipeiActivity,
    columns=["district", "daytime_ratio"],
    key_on="feature.properties.district",
    fill_color='PuOr',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Daytime Ratio (Weekday/Weekend)",
).add_to(m_activity)

# folium.Choropleth(
#     geo_data=dfTemp,
#     name="Night Time Ratio",
#     data=dfTaipeiActivity,
#     columns=["district", "nighttime_ratio"],
#     key_on="feature.properties.district",
#     fill_color="PuOr",
#     fill_opacity=0.7,
#     line_opacity=0.2,
#     legend_name="Night Ratio (Weekday/Weekend)",
# ).add_to(m_activity)

folium.Choropleth(
    geo_data=dfTemp,
    name="work_daynight_ratio",
    data=dfTaipeiActivity,
    columns=["district", "work_daynight_ratio"],
    key_on="feature.properties.district",
    fill_color="PuOr",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="work_daynight_ratio",
).add_to(m_activity)

colormap = cm.LinearColormap(colors=['azure','lightcyan','lightblue','deepskyblue','dodgerblue','blue'],vmin=5000000,vmax=80000000)
#['mistyrose','salmon','tomato','red', 'darkred']

for index, row in df_price.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Total Cost(NTD)']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Total Cost(NTD)']),opacity=0.4, popup=(pop_info)).add_to(m_activity)
m_activity.add_child(colormap)

folium.LayerControl().add_to(m_activity)

m_activity

In [None]:
style_function = lambda x: {'fillColor': '#ffffff', 
                            'color':'#000000', 
                            'fillOpacity': 0.1, 
                            'weight': 0.1}
highlight_function = lambda x: {'fillColor': '#000000', 
                                'color':'#000000', 
                                'fillOpacity': 0.50, 
                                'weight': 0.1}
NIL = folium.features.GeoJson(
    dfTaipeiActivity,
    style_function=style_function, 
    control=False,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['TOWN','VILLENG','DAY_WORK','NIGHT_WORK','daytime_ratio','nighttime_ratio'],
        aliases=['District: ','Village','Workday Daytime: ', 'Workday Nighttime: ','Day Ratio:','Night Ratio: '],
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    )
)
m_activity.add_child(NIL)
m_activity.keep_in_front(NIL)
m_activity

## Income Data 

### Read in income data

In [None]:
dfIncome = pd.read_csv('income.csv')
dfTaipeiIncome = dfIncome[dfIncome['COUNTYNAME']=='臺北市'].merge(dfTaipeiDistrict,how='right',left_on=['COUNTYNAME','TOWNNAME','VILLNAME'],right_on=['COUNTYNAME','TOWNNAME','VILLNAME'])
dfTaipeiIncome = gpd.GeoDataFrame(dfTaipeiIncome)

In [None]:
def getSubdistrictTuple(col1,col2,col3):
    
    return '_'.join([col1,col2,col3])

dfTaipeiIncome['district'] = dfTaipeiIncome.apply(lambda df:getSubdistrictTuple(df.COUNTYNAME,df.TOWNNAME,df.VILLNAME),axis=1)

In [None]:
dfTaipeiIncome

### Draw District Total Income Map

In [None]:
m = folium.Map(location=[25.048651234895390, 121.54614479464894], zoom_start=11.5)

dfTemp = dfTaipeiIncome[['district','geometry']]

folium.Choropleth(
    geo_data=dfTemp,
    name="Total Income",
    data=dfTaipeiIncome,
    columns=["district", "AVG"],
    key_on="feature.properties.district",
    fill_color="Reds",
    fill_opacity=0.8,
    line_opacity=0.2,
    legend_name="Total household income",
).add_to(m)

folium.LayerControl().add_to(m)

m

### Draw District Total Income Map + House Prcie

In [None]:
colormap = cm.LinearColormap(colors=['azure','lightcyan','lightblue','deepskyblue','dodgerblue','blue'],vmin=5000000,vmax=80000000)
#['mistyrose','salmon','tomato','red', 'darkred']

for index, row in df_price.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Total Cost(NTD)']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Total Cost(NTD)']),opacity=0.4, popup=(pop_info)).add_to(m)
m.add_child(colormap)

In [None]:
colormap = cm.LinearColormap(colors=['azure','lightcyan','lightblue','deepskyblue','dodgerblue','blue'],vmin=100000,vmax=400000)
for index, row in df_price_m2.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Cost per Sqaure Meter']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Cost per Sqaure Meter']),opacity=0.4, popup=(pop_info)).add_to(m)
m.add_child(colormap)
display(m)

### Income Map on Subdistricts

In [None]:
fig, ax = plt.subplots(1, 1,figsize=(10,10))
b = dfTaipeiIncome.plot(column='TTL_INCOME',
              ax=ax, 
              legend=True,
              legend_kwds={
                'label': "Total Income in Taipei Sub-district (in Thousands NTD$)",
                'orientation': "vertical"
              },
              cmap='OrRd')
plt.savefig('total_income_village.png')

In [None]:
fig, ax = plt.subplots(1, 1,figsize=(10,10))
b = dfTaipeiIncome.plot(column='AVG',
              ax=ax, 
              legend=True,
              legend_kwds={
                'label': "Average Incoame in Taipei Sub-district (in Thousands NTD$)",
                'orientation': "vertical"
              },
              cmap='OrRd')
plt.savefig('average_income.png')

In [None]:
fig, ax = plt.subplots(1, 1,figsize=(10,10))
b = dfTaipeiIncome.plot(column='MEDIAN',
              ax=ax, 
              legend=True,
              legend_kwds={
                'label': "Average Incoame in Taipei Sub-district (in Thousands NTD$)",
                'orientation': "vertical"
              },
              cmap='OrRd')
plt.savefig('median_income.png')

In [None]:
dfTaipeiIncome

## Education

### Data Cleaning and Filter

In [None]:
dfEdu = pd.read_csv('education.csv')
dfEdu = dfEdu[1:].reindex()
dfTaipeiEdu = dfEdu[dfEdu.site_id.str.contains('臺北市')].copy()
dfTaipeiEdu['COUNTYNAME'] = dfTaipeiEdu['site_id'].apply(lambda x: x[:3])
dfTaipeiEdu['TOWNNAME'] = dfTaipeiEdu['site_id'].apply(lambda x: x[3:])
dfTaipeiEdu = dfTaipeiEdu.join(dfName,on='TOWNNAME')

In [None]:
dfTaipeiEdu

In [None]:
dfTaipeiEdu = dfTaipeiEdu.astype({"edu_doctor_graduated_m": int, "edu_doctor_graduated_f": int, "edu_master_graduated_m": int, "edu_master_graduated_f": int,  "edu_university_graduated_m": int,
                "edu_university_graduated_f": int,  "edu_juniorcollege_2ys_graduated_m": int,  "edu_juniorcollege_2ys_graduated_f": int,  "edu_senior_graduated_m": int,  "edu_senior_graduated_f": int, 
                "edu_seniorvocational_graduated_m": int,  "edu_seniorvocational_graduated_f": int,  "edu_junior_graduated_m": int,  "edu_junior_graduated_f": int,  "edu_juniorvocational_graduated_m": int,
                 "edu_juniorvocational_graduated_f": int,  "edu_primary_graduated_m": int,  "edu_primary_graduated_f": int})

In [None]:
edu_list = ['edu_age_15up_total',
 'edu_doctor_graduated_m',
 'edu_doctor_graduated_f',
 'edu_master_graduated_m',
 'edu_master_graduated_f',
 'edu_university_graduated_m',
 'edu_university_graduated_f',
 'edu_juniorcollege_2ys_graduated_m',
 'edu_juniorcollege_2ys_graduated_f',
 'edu_juniorcollege_5ys_final2y_graduated_m',
 'edu_juniorcollege_5ys_final2y_graduated_f',
 'edu_senior_graduated_m',
 'edu_senior_graduated_f',
 'edu_seniorvocational_graduated_m',
 'edu_seniorvocational_graduated_f',
 'edu_junior_graduated_m',
 'edu_junior_graduated_f',
 'edu_juniorvocational_graduated_m',
 'edu_juniorvocational_graduated_f',
 'edu_primary_graduated_m',
 'edu_primary_graduated_f',
 'edu_selftaughtl_m',
 'edu_selftaughtl_f',
 'edu_illiterate_m',
 'edu_illiterate_f']

dfTaipeiEdu['edu_doctor'] = dfTaipeiEdu['edu_doctor_graduated_m']+dfTaipeiEdu['edu_doctor_graduated_f'] 
dfTaipeiEdu['edu_master'] = dfTaipeiEdu['edu_master_graduated_m']+dfTaipeiEdu['edu_master_graduated_f'] 
dfTaipeiEdu['edu_undergrad'] = dfTaipeiEdu['edu_university_graduated_m']+dfTaipeiEdu['edu_university_graduated_f'] 
dfTaipeiEdu['edu_cc'] = dfTaipeiEdu['edu_juniorcollege_2ys_graduated_m']+dfTaipeiEdu['edu_juniorcollege_2ys_graduated_f'] 
dfTaipeiEdu['edu_senior'] = dfTaipeiEdu['edu_senior_graduated_m']+dfTaipeiEdu['edu_senior_graduated_f'] +dfTaipeiEdu['edu_seniorvocational_graduated_m']+dfTaipeiEdu['edu_seniorvocational_graduated_f'] 
dfTaipeiEdu['edu_junior'] = dfTaipeiEdu['edu_junior_graduated_m']+dfTaipeiEdu['edu_junior_graduated_f'] +dfTaipeiEdu['edu_juniorvocational_graduated_m']+dfTaipeiEdu['edu_juniorvocational_graduated_f'] 
dfTaipeiEdu['edu_primary'] = dfTaipeiEdu['edu_primary_graduated_m']+dfTaipeiEdu['edu_primary_graduated_f'] 

In [None]:
edu_list = ['edu_age_15up_total','edu_doctor','edu_master',
           'edu_undergrad','edu_cc','edu_senior','edu_junior','edu_primary']

In [None]:
dfTaipeiEdu

In [None]:
dfTempEdu = dfTaipeiEdu[['COUNTYNAME', 'TOWNNAME','village',
       'edu_age_15up_total','edu_doctor', 'edu_master', 'edu_undergrad', 'edu_cc',
       'edu_senior', 'edu_junior', 'edu_primary']]

In [None]:
dfTempEdu

In [None]:
dfTaipeiEducation = dfTempEdu[dfTempEdu['COUNTYNAME']=='臺北市'].merge(dfTaipeiDistrict,how='right',left_on=['COUNTYNAME','TOWNNAME','village'],right_on=['COUNTYNAME','TOWNNAME','VILLNAME'])
dfTaipeiEducation = gpd.GeoDataFrame(dfTaipeiEducation)
dfTaipeiEducation['district'] = dfTaipeiEducation.apply(lambda df:getSubdistrictTuple(df.COUNTYNAME,df.TOWNNAME,df.VILLNAME),axis=1)

In [None]:
dfTaipeiEducation

### Education Map and House Prices of Doctorate and Masters

In [None]:
m = folium.Map(location=[25.048651234895390, 121.54614479464894], zoom_start=11.5)

dfTemp = dfTaipeiEducation[['district','geometry']]

folium.Choropleth(
    geo_data=dfTemp,
    name="Doctor",
    data=dfTaipeiEducation,
    columns=["district", "edu_doctor"],
    key_on="feature.properties.district",
    fill_color="Reds",
    fill_opacity=0.8,
    line_opacity=0.2,
    legend_name="Number of Doctorate Degree",
).add_to(m)

folium.Choropleth(
    geo_data=dfTemp,
    name="Masters",
    data=dfTaipeiEducation,
    columns=["district", "edu_master"],
    key_on="feature.properties.district",
    fill_color="Reds",
    fill_opacity=0.8,
    line_opacity=0.2,
    legend_name="Number of Master Degree",
).add_to(m)

folium.TileLayer('Stamen Terrain').add_to(m)
folium.TileLayer('Stamen Toner').add_to(m)



colormap = cm.LinearColormap(colors=['azure','lightcyan','lightblue','deepskyblue','dodgerblue','blue'],vmin=100000,vmax=400000)
for index, row in df_price_m2.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Cost per Sqaure Meter']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Cost per Sqaure Meter']),opacity=0.4, popup=(pop_info)).add_to(m)
m.add_child(colormap)
folium.LayerControl().add_to(m)
m

### Education Map and House Prices of Undergradaute

In [None]:
m = folium.Map(location=[25.048651234895390, 121.54614479464894], zoom_start=11.5)

dfTemp = dfTaipeiEducation[['district','geometry']]

folium.Choropleth(
    geo_data=dfTemp,
    name="Undergrad",
    data=dfTaipeiEducation,
    columns=["district", "edu_undergrad"],
    key_on="feature.properties.district",
    fill_color="Reds",
    fill_opacity=0.8,
    line_opacity=0.2,
    legend_name="Number of Undergrad Degree",
).add_to(m)
for index, row in df_price_m2.iterrows():
    pop_info = 'Address:' + row['address'] + "Total Cost (NTD):" + str(row['Cost per Sqaure Meter']) + "Lat/Lon:" + str(row['latitude']) + str(row['longitude'])
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,color=colormap(row['Cost per Sqaure Meter']),opacity=0.4, popup=(pop_info)).add_to(m)
m.add_child(colormap)
# folium.Choropleth(
#     geo_data=dfTemp,
#     name="CC/Associates",
#     data=dfTaipeiEducation,
#     columns=["district", "edu_cc"],
#     key_on="feature.properties.district",
#     fill_color="Reds",
#     fill_opacity=0.8,
#     line_opacity=0.2,
#     legend_name="Number of CC/Associate Degree",
# ).add_to(m)

# folium.Choropleth(
#     geo_data=dfTemp,
#     name="Senior High",
#     data=dfTaipeiEducation,
#     columns=["district", "edu_senior"],
#     key_on="feature.properties.district",
#     fill_color="Reds",
#     fill_opacity=0.8,
#     line_opacity=0.2,
#     legend_name="Number of Senior High Degree",
# ).add_to(m)

folium.TileLayer('Stamen Terrain').add_to(m)
folium.TileLayer('Stamen Toner').add_to(m)

folium.LayerControl().add_to(m)

m

## Calcuating Average House Price in District

In [None]:
house_data
house_average = house_data.groupby(['Urban District']).mean()
house_average = house_average[['Total Cost(NTD)']]

In [None]:
house_average['Total Cost(NTD)'] = house_average['Total Cost(NTD)'] / 1000

In [None]:
house_average.sort_values(by='Total Cost(NTD)', ascending=False)

In [None]:
house_averagehouse_average