In [97]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import geopandas as gpd
from shapely.geometry import Point, Polygon
import geohash
import folium
import branca


In [98]:
gis_db = create_engine("postgresql://postgres:gallon1995@localhost/blackhead")


In [99]:
gis_query = f"""
    select f1.id, f1.title, f1.kind, f1.price, f1.area, f1.lon, f1.lat
    from rental.rentals_detail f1
    where f1.create_at >= '2022-09-02'
    and f1.lon > 110
    and f1.lon < 130
    and f1.lat > 20
    and f1.lat < 30
    and f1.kind in ('獨立套房', '分租套房', '雅房')
"""
df_rentals = pd.read_sql(gis_query, con=gis_db)
df_rentals.head()


Unnamed: 0,id,title,kind,price,area,lon,lat
0,13125063,西藏路上超大套房1人舒適2人剛好,獨立套房,12000,8.0,121.493489,25.028762
1,13123293,飯店管理&拎包入住（琢豐）,獨立套房,118800,31.6,121.533704,25.053658
2,13126952,飯店客房月租，24小時櫃台飯店式管理,分租套房,16000,8.0,121.505484,25.045717
3,13167799,Alife士林全新電梯陽台套房,獨立套房,25800,14.6,121.533142,25.097358
4,13169922,近林口街福德街松山路口1樓免爬高福德國小,雅房,10000,7.0,121.579754,25.038764


In [100]:
df_rentals.kind.unique()

array(['獨立套房', '分租套房', '雅房'], dtype=object)

In [101]:
len(df_rentals)


8475

In [102]:
show_center = [25.0462014984023, 121.51754184910867]


In [103]:
gdf_boundary = gpd.read_file(
    './data/202208/VILLAGE_MOI_1110613.shp', encoding='utf-8')
gdf_boundary.rename(columns={'VILLCODE': 'villcode', 'COUNTYNAME': 'countyname',
                             'TOWNNAME': 'townname', 'VILLNAME': 'villname'}, inplace=True)
gdf_boundary = gdf_boundary[['villcode',
                             'countyname', 'townname', 'villname', 'geometry']]
gdf_boundary = gdf_boundary.query('countyname in ["臺北市", "新北市"]').reset_index(drop=True)
gdf_boundary.crs = 'epsg:3824'
gdf_boundary = gdf_boundary.to_crs('epsg:4326')

geom = [Point(data.lon, data.lat) for idx, data in df_rentals.iterrows()]
df_rentals = gpd.GeoDataFrame(df_rentals, crs='epsg:4326', geometry=geom)


In [24]:
# df_rentals['price_per_unit'] = df_rentals['price'] / df_rentals['area']


In [104]:
df_result = gpd.sjoin(df_rentals, gdf_boundary, op='within').groupby(
    ['villcode', 'countyname', 'townname', 'villname']).agg(price_median=('price', 'median'), cnt=('id', 'count')).reset_index()
df_result = pd.merge(gdf_boundary, df_result).reset_index(drop=True)


In [29]:
# df_result = gpd.sjoin(df_rentals, gdf_boundary, op='within').groupby(
#     ['villcode', 'countyname', 'townname', 'villname'])['price'].median().reset_index(name='price_median')
# df_result = pd.merge(gdf_boundary, df_result).reset_index(drop=True)


In [105]:
df_result_bk = df_result.copy()

In [106]:
df_result = df_result_bk.query('cnt>=5').reset_index(drop=True)


In [114]:
df_result.query('cnt>=5').sort_values(by=['price_median'], ascending=[False]).head(10)


Unnamed: 0,villcode,countyname,townname,villname,geometry,price_median,cnt,price_median_limit,id,color
399,63000020019,臺北市,信義區,富台里,"POLYGON ((121.57199 25.04283, 121.57199 25.042...",33950.0,18,30000.0,399,#800026ff
245,63000020001,臺北市,信義區,西村里,"POLYGON ((121.56841 25.03904, 121.56836 25.036...",33000.0,6,30000.0,245,#800026ff
253,63000030002,臺北市,大安區,仁慈里,"POLYGON ((121.54436 25.03793, 121.54517 25.037...",30000.0,16,30000.0,253,#800026ff
429,63000020009,臺北市,信義區,安康里,"POLYGON ((121.57663 25.03908, 121.57663 25.039...",29000.0,59,29000.0,429,#930026ff
560,63000040037,臺北市,中山區,埤頭里,"POLYGON ((121.54389 25.04654, 121.54387 25.045...",28000.0,12,28000.0,560,#a60026ff
557,63000090005,臺北市,南港區,東新里,"POLYGON ((121.60534 25.05786, 121.60534 25.057...",28000.0,8,28000.0,557,#a60026ff
280,63000020004,臺北市,信義區,中興里,"POLYGON ((121.55958 25.03301, 121.55956 25.032...",27750.0,18,27750.0,280,#aa0026ff
71,63000070008,臺北市,萬華區,福音里,"POLYGON ((121.50318 25.03987, 121.50322 25.039...",25000.0,15,25000.0,71,#d20e20ff
354,63000040010,臺北市,中山區,恆安里,"POLYGON ((121.52426 25.06490, 121.52539 25.064...",25000.0,29,25000.0,354,#d20e20ff
521,63000060012,臺北市,大同區,南芳里,"POLYGON ((121.50665 25.06363, 121.50929 25.063...",24999.5,10,24999.5,521,#d20e20ff


In [108]:
low = 4000
df_result['price_median_limit'] = np.clip(df_result.price_median, low, 30000)


In [109]:
variable = 'price_median_limit'
df_result['id'] = df_result.index
color = df_result.set_index('id')[variable]
high = df_result[variable].max()
colorscale = branca.colormap.linear.YlOrRd_09.scale(low, high)
df_result['color'] = color.apply(lambda x : colorscale(x))


In [110]:
def style_function(feature):
    colors = color.get(int(feature['id']), None)
    return {
        'fillOpacity': 0.5,
        'weight': 1,
        'color': 'a2a2a2',
        'fillColor': 'a2a2a2' if colors is None else colorscale(colors)
    }


In [111]:
mm = folium.Map(location=show_center, zoom_start=13)
if 1 == 0:
    num = len(df_rentals)
    lat = np.array(df_rentals["lat"])
    lon = np.array(df_rentals["lon"])
    data1 = [[lat[i], lon[i], 1] for i in range(num)]
    fg1 = folium.FeatureGroup(name='points', show=True)
    for i in range(num):
        folium.Circle(location=[data1[i][0], data1[i][1]],
                      color='#000000',  # Circle 顏色
                      radius=35,  # Circle 寬度
                      fill=True,  # 填滿中間區域
                      fill_opacity=0.7  # 設定透明度
                      ).add_to(fg1)
    mm.add_child(fg1)
if 1 == 1:
    fg2 = folium.FeatureGroup(name='villages', show=True)
    folium.GeoJson(
        df_result[['geometry', 'countyname',
                   'townname', 'villname', 'price_median', 'cnt']],
        name='price_median',
        style_function=style_function,
        highlight_function=lambda x: {
            'weight': 3,
            'fillOpacity': 0.75
        },
        smooth_factor=2.0,
        tooltip=folium.features.GeoJsonTooltip(fields=['countyname', 'townname', 'villname', 'price_median', 'cnt'],
                                               aliases=[
                                                   'countyname', 'townname', 'villname', 'price_median', 'cnt'],
                                               labels=True,
                                               sticky=True,
                                               toLocaleString=True)
    ).add_to(fg2)
    mm.add_child(fg2)

colorscale.add_to(mm)
folium.LayerControl().add_to(mm)


<folium.map.LayerControl at 0x2450c915d88>

In [112]:
mm

In [113]:
mm.save('./html/rentals_by_village.html')

In [64]:
df_rentals['geohash'] = [geohash.encode(
    data['geometry'].y, data['geometry'].x, precision=6) for idx, data in df_rentals.iterrows()]


In [65]:
df_goehash = df_rentals.groupby(
    ['geohash']).agg(price_median=('price', 'median'), cnt=('id', 'count')).reset_index()


In [66]:
geohashs = []
for idx, data in df_goehash.iterrows():
    decoded = geohash.bbox(data['geohash'])
    geohashs.append(Polygon([(decoded['w'], decoded['s']), (decoded['e'], decoded['s']),
                             (decoded['e'], decoded['n']), (decoded['w'], decoded['n'])]))
geom = gpd.GeoSeries(geohashs)
df_goehash = gpd.GeoDataFrame(
    df_goehash, crs='epsg:4326', geometry=geom)


In [67]:
df_goehash_bk = df_goehash.copy()

In [68]:
df_goehash = df_goehash_bk.query('cnt>=5').reset_index(drop=True)

In [69]:
low = 4000
df_goehash['price_median_limit'] = np.clip(df_goehash.price_median, low, 30000)


In [71]:
variable = 'price_median_limit'
df_goehash['id'] = df_goehash.index
color = df_goehash.set_index('id')[variable]
high = df_goehash[variable].max()
colorscale = branca.colormap.linear.YlOrRd_09.scale(low, high)
df_goehash['color'] = color.apply(lambda x: colorscale(x))


In [89]:
df_goehash_cnt = df_goehash.copy()
low = 5
df_goehash_cnt['cnt_limit'] = np.clip(df_goehash_cnt.cnt, low, 100)


In [91]:
variable = 'cnt_limit'
df_goehash_cnt['id'] = df_goehash_cnt.index
color2 = df_goehash_cnt.set_index('id')[variable]
high = df_goehash_cnt[variable].max()
colorscale2 = branca.colormap.linear.YlGnBu_09.scale(low, high)
df_goehash_cnt['color'] = color2.apply(lambda x: colorscale2(x))

In [92]:
def style_function2(feature):
    colors = color2.get(int(feature['id']), None)
    return {
        'fillOpacity': 0.5,
        'weight': 1,
        'color': 'a2a2a2',
        'fillColor': 'a2a2a2' if colors is None else colorscale2(colors)
    }

In [93]:
# variable = 'cnt_limit'
# df_goehash['id'] = df_goehash.index
# color = df_goehash.set_index('id')[variable]
# high = df_goehash[variable].max()
# colorscale = branca.colormap.linear.YlOrRd_09.scale(low, high)
# df_goehash['color'] = color.apply(lambda x: colorscale(x))


In [94]:
mm = folium.Map(location=show_center, zoom_start=13)
if 1 == 0:
    num = len(df_rentals)
    lat = np.array(df_rentals["lat"])
    lon = np.array(df_rentals["lon"])
    data1 = [[lat[i], lon[i], 1] for i in range(num)]
    fg1 = folium.FeatureGroup(name='points', show=True)
    for i in range(num):
        folium.Circle(location=[data1[i][0], data1[i][1]],
                      color='#000000',  # Circle 顏色
                      radius=35,  # Circle 寬度
                      fill=True,  # 填滿中間區域
                      fill_opacity=0.7  # 設定透明度
                      ).add_to(fg1)
    mm.add_child(fg1)
if 1 == 1:
    fg2 = folium.FeatureGroup(name='geohash', show=True)
    folium.GeoJson(
        df_goehash[['geometry', 'geohash', 'price_median', 'cnt']],
        name='price_median',
        style_function=style_function,
        highlight_function=lambda x: {
            'weight': 3,
            'fillOpacity': 0.75
        },
        smooth_factor=2.0,
        tooltip=folium.features.GeoJsonTooltip(fields=['geohash', 'price_median', 'cnt'],
                                               aliases=[
                                                   'geohash', 'price_median', 'cnt'],
                                               labels=True,
                                               sticky=True,
                                               toLocaleString=True)
    ).add_to(fg2)
    mm.add_child(fg2)

if 1 == 1:
    fg3 = folium.FeatureGroup(name='cnt', show=True)
    folium.GeoJson(
        df_goehash_cnt[['geometry', 'geohash', 'price_median', 'cnt']],
        name='cnt',
        style_function=style_function2,
        highlight_function=lambda x: {
            'weight': 3,
            'fillOpacity': 0.75
        },
        smooth_factor=2.0,
        tooltip=folium.features.GeoJsonTooltip(fields=['geohash', 'price_median', 'cnt'],
                                               aliases=[
                                                   'geohash', 'price_median', 'cnt'],
                                               labels=True,
                                               sticky=True,
                                               toLocaleString=True)
    ).add_to(fg3)
    mm.add_child(fg3)

colorscale.add_to(mm)
folium.LayerControl().add_to(mm)


<folium.map.LayerControl at 0x2450c193d88>

In [95]:
mm

In [96]:
mm.save('./html/rentals_price_cnt.html')