In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from geopandas.tools import sjoin
import time
import pytz
import shapely
from shapely.geometry import Point, Polygon
from shapely import LineString
from shapely import wkb
from shapely.wkt import loads
from datetime import datetime, timedelta
from pyproj import Proj
import pyproj
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import xml.etree.ElementTree as ET
import dask_geopandas as dgpd
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from function import *

## Load Dataset

In [2]:
# Config
ROOT_PATH = r"D:\Chu's Document!\02 Project\06 道路塌陷防治專案(天坑)"

In [3]:
# hexagon grid
hex_raw_path = ROOT_PATH + r"\03 Data\Model_building\Hex_TP_Road_5m_intersects.gpkg"
hex_raw = dgpd.read_file(hex_raw_path, chunksize=1000000)
hex_raw = hex_raw.compute()

# time_series table
time_data_raw_path = ROOT_PATH + r"\03 Data\Model_building\time_series_data.csv"
time_data_raw = pd.read_csv(time_data_raw_path)

# Loading time: 4min 50s

In [4]:
hex = hex_raw.copy()
time_data = time_data_raw.copy()

## Create DataFrame

### Parameter Setting

In [9]:
# Config
filter_district = ""

### Filter Hexagon by District

In [10]:
if filter_district is not None and filter_district != "":
    hex_filter = hex[hex["TNAME"] == filter_district]
else:
    hex_filter = hex

hex_filter.shape

(1826272, 4)

### Spatial data preprocessing
This section is aims to process the data which contains only spatial information.
- soil_liquefaction
- road_properties
- pipe_count
- flood_area

#### Soil_liquefaction

In [11]:
# 土壤液化區域
soil_liquid_raw_path = ROOT_PATH + r"\01 QGIS\00 Processed Data\Geopackage\土壤液化潛勢_dissolved_class.gpkg"
soil_liquid_raw = gpd.read_file(soil_liquid_raw_path)
soil_liquid = soil_liquid_raw.copy()

In [12]:
soil_liquid = soil_liquid.to_crs(epsg=3826)

In [13]:
hex_filter = process_soil_liquid(hex_filter, soil_liquid)
hex_filter.shape #14m50s

(1397685, 5)

In [None]:
path = ROOT_PATH + r"\03 Data\Model_building\Hex_for model_TP_01.gpkg"
hex_filter.to_file(path, driver="GPKG") #4m52s

#### Road Properties

In [14]:
# 大於8m道路
above_8m_road_raw_path = ROOT_PATH + r"\03 Data\Raw\臺北市寬度超過8公尺道路GIS檔\Road.shp"
above_8m_road_raw = gpd.read_file(above_8m_road_raw_path)
above_8m_road = above_8m_road_raw.copy()

# 小於8m道路
under_8m_road_raw_path = ROOT_PATH + r"\01 QGIS\00 Processed Data\Geopackage\TP_Road_less_than_8m.gpkg"
under_8m_road_raw = gpd.read_file(under_8m_road_raw_path)
under_8m_road = under_8m_road_raw.copy()

print(f'above_8m_road：{above_8m_road.shape}, under_8m_road：{under_8m_road.shape}')

above_8m_road：(13419, 25), under_8m_road：(27233, 24)


In [15]:
hex_filter = road_properties_process(above_8m_road, under_8m_road, hex_filter)
hex_filter.drop_duplicates(subset=["id"], inplace=True)
print(hex_filter.shape)

(1397685, 8)


In [16]:
path = ROOT_PATH + r"\03 Data\Model_building\Hex_for model_TP_01.gpkg"
hex_filter.to_file(path, driver="GPKG")

#### Count pipes in hex

In [17]:
path = ROOT_PATH + r"\03 Data\Model_building\Hex_for model_TP_01.gpkg"
hex_filter = dgpd.read_file(path, chunksize=1000000)
hex_filter = hex_filter.compute()

In [18]:
# 分批導入並計算管線數量
sp_pipe_raw = dgpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\sp_pipe(汙水管).gpkg", chunksize=1000000)
sp_pipe_raw = sp_pipe_raw.compute()
hex_5m_rd_sample_sp = calculate_case_count(hex_filter, sp_pipe_raw, count_column='sp_count')

rp_pipe_raw = dgpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\rp_pipe(雨水管).gpkg", chunksize=1000000)
rp_pipe_raw = rp_pipe_raw.compute()
hex_5m_rd_sample_rp = calculate_case_count(hex_5m_rd_sample_sp, rp_pipe_raw, count_column='rp_count')

rp_ditch_raw = gpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\rp_ditch(雨水側溝).gpkg", )
hex_5m_rd_sample_rd = calculate_case_count(hex_5m_rd_sample_rp, rp_ditch_raw, count_column='rd_count')

wp_pipe_raw_01 = dgpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\cw_pipe(自來水管)_01.gpkg", chunksize=1000000)
wp_pipe_raw_01 = wp_pipe_raw_01.compute()
hex_5m_rd_sample_wp = calculate_case_count(hex_5m_rd_sample_rd, wp_pipe_raw_01, count_column='wp_01_count')

wp_pipe_raw_02 = dgpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\cw_pipe(自來水管)_02.gpkg", chunksize=1000000)
wp_pipe_raw_02 = wp_pipe_raw_02.compute()
hex_5m_rd_sample_wp = calculate_case_count(hex_5m_rd_sample_wp, wp_pipe_raw_02, count_column='wp_02_count')

wp_pipe_raw_03 = dgpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\cw_pipe(自來水管)_03.gpkg", chunksize=1000000)
wp_pipe_raw_03 = wp_pipe_raw_03.compute()
hex_5m_rd_sample_wp = calculate_case_count(hex_5m_rd_sample_wp, wp_pipe_raw_03, count_column='wp_03_count')

wp_pipe_raw_04 = dgpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\cw_pipe(自來水管)_04.gpkg", chunksize=1000000)
wp_pipe_raw_04 = wp_pipe_raw_04.compute()
hex_5m_rd_sample_wp = calculate_case_count(hex_5m_rd_sample_wp, wp_pipe_raw_04, count_column='wp_04_count')

cn_pipe_raw = dgpd.read_file(ROOT_PATH + r"\03 Data\Processed\管線資料讀取\cn_pipe(連接管).gpkg", chunksize=1000000)
cn_pipe_raw = cn_pipe_raw.compute()
hex_5m_rd_sample_cn = calculate_case_count(hex_5m_rd_sample_wp, cn_pipe_raw, count_column='cn_count')

In [19]:
hex_5m_rd_sample_cn = summarize_pipe_counts(hex_5m_rd_sample_cn)
hex_5m_rd_sample_cn.head()

Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,rd_count,cn_count,wp_count,pipe_count
0,1594752,POINT (296728.0221868334 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296725.135 2778844.494, 296726.579 2...",0,0,0,0,0,0
1,1600243,POINT (296732.3523138523 2778846.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778846.994, 296730.909 2...",0,0,0,0,0,0
2,1600244,POINT (296732.3523138523 2778841.9942000005),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778841.994, 296730.909 2...",0,0,0,0,0,0
3,1600245,POINT (296732.3523138523 2778836.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778836.994, 296730.909 2...",0,0,0,0,0,0
4,1605736,POINT (296736.68244087126 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296733.796 2778844.494, 296735.239 2...",0,0,0,0,1,1


In [20]:
hex = hex_5m_rd_sample_cn.copy()

#### Pavement information

##### Park Area

In [21]:
# 都市計畫使用分區
raw_urban_plan_path = ROOT_PATH + r"\03 Data\Raw\都市計畫使用分區\臺北市使用分區細計圖(對外)-面.shp"
urban_plan_raw = gpd.read_file(raw_urban_plan_path, encoding='big5')
urban_plan = urban_plan_raw.copy()
urban_plan.crs = 'epsg:3826'
urban_plan = urban_plan.to_crs(epsg=3826)

In [22]:
# Drop missing value
urban_plan = urban_plan.dropna(subset=['使用分區'])

# Check missing value
print(urban_plan.isnull().sum())

編號              0
分區簡稱            0
使用分區            0
分區說明        15396
原屬分區        15192
geometry        0
dtype: int64


In [23]:
# Filter park
mask = urban_plan['使用分區'].str.contains('公園')
urban_park = urban_plan[mask]
print(urban_park.shape, urban_plan.shape)

(849, 6) (15509, 6)


In [24]:
def calculate_pavement_area(gdf_1, gdf_2, column='area'):
    """
    Calculate the area of intersections between polygons in one GeoDataFrame and another.

    Parameters:
    gdf_1 : GeoDataFrame
        The GeoDataFrame containing the polygons.
    gdf_2 : GeoDataFrame
        The GeoDataFrame containing other geometries to calculate the intersection area with the polygons.
    column : str, default 'area'
        The name of the new column to store the intersection area. Default is 'area'.
    Returns:
    GeoDataFrame
        A GeoDataFrame containing the original polygon data along with a new column for the intersection area.
    """
        # Check if 'id' column exists before dropping it
    if 'id' in gdf_2.columns:
        gdf_2 = gdf_2.rename(columns={'id': 'id_right'})

    # Set crs
    gdf_1 = gdf_1.to_crs(epsg=3826)
    gdf_2 = gdf_2.to_crs(epsg=3826)

    # Calculate gdf_2 area
    gdf_2_area = gdf_2.copy()
    gdf_2_area[column] = gdf_2_area['geometry'].area
    
    # Spatial join
    keep_col = [column, "geometry"]
    drop_col = [col for col in gdf_2_area.columns.tolist() if col not in keep_col]
    temp = gpd.sjoin(gdf_1, gdf_2_area, how='left', predicate='intersects')
    temp = temp.drop(columns=drop_col)
    temp = temp.drop(columns=['index_right'])
    temp[column] = temp[column].fillna(0)
    temp = temp.drop_duplicates(subset=["id"])
    temp = temp.reset_index(drop=True)
    
    return temp

In [25]:
hex = calculate_pavement_area(hex, urban_park, column='park_area')
hex.head()

Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,rd_count,cn_count,wp_count,pipe_count,park_area
0,1594752,POINT (296728.0221868334 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296725.135 2778844.494, 296726.579 2...",0,0,0,0,0,0,0.0
1,1600243,POINT (296732.3523138523 2778846.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778846.994, 296730.909 2...",0,0,0,0,0,0,0.0
2,1600244,POINT (296732.3523138523 2778841.9942000005),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778841.994, 296730.909 2...",0,0,0,0,0,0,0.0
3,1600245,POINT (296732.3523138523 2778836.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778836.994, 296730.909 2...",0,0,0,0,0,0,0.0
4,1605736,POINT (296736.68244087126 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296733.796 2778844.494, 296735.239 2...",0,0,0,0,1,1,0.0


##### Greenery: school greening, riverside highland, birds conservation, agriculture area

In [26]:
# Green space: school greening
raw_school_greening_path = ROOT_PATH + r"\03 Data\Raw\鋪面\公園綠地_校園綠化.geojson"
school_greening_raw = gpd.read_file(raw_school_greening_path)
school_greening = school_greening_raw.copy()
school_greening = school_greening.to_crs(epsg=3826)

# Green space: riverside highland
raw_riverside_highland_path = ROOT_PATH + r"\03 Data\Raw\鋪面\公園綠地_河濱高灘地.geojson"
riverside_highland_raw = gpd.read_file(raw_riverside_highland_path)
riverside_highland = riverside_highland_raw.copy()
riverside_highland = riverside_highland.to_crs(epsg=3826)

# Green space: birds conservation
raw_birds_conservation_path = ROOT_PATH + r"\03 Data\Raw\鋪面\都市綠地_野雁保護區.geojson"
birds_conservation_raw = gpd.read_file(raw_birds_conservation_path)
birds_conservation = birds_conservation_raw.copy()
birds_conservation = birds_conservation.to_crs(epsg=3826)

# Green space: agriculture area
raw_agriculture_area_path = ROOT_PATH + r"\03 Data\Raw\鋪面\都市綠地_都市計畫保護區、農業區.geojson"
agriculture_area_raw = gpd.read_file(raw_agriculture_area_path)
agriculture_area = agriculture_area_raw.copy()
agriculture_area = agriculture_area.to_crs(epsg=3826)

In [27]:
# Create green space area columns
school_greening['geometry'] = school_greening['geometry'].buffer(0)
hex = calculate_pavement_area(hex, school_greening, column='school_greening_area')

riverside_highland['geometry'] = riverside_highland['geometry'].buffer(0)
hex = calculate_pavement_area(hex, riverside_highland, column='riverside_highland_area')

birds_conservation['geometry'] = birds_conservation['geometry'].buffer(0)
hex = calculate_pavement_area(hex, birds_conservation, column='birds_conservation_area')

agriculture_area['geometry'] = agriculture_area['geometry'].buffer(0)
hex = calculate_pavement_area(hex, agriculture_area, column='agriculture_area')

hex.head()

Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,rd_count,cn_count,wp_count,pipe_count,park_area,school_greening_area,riverside_highland_area,birds_conservation_area,agriculture_area
0,1594752,POINT (296728.0221868334 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296725.135 2778844.494, 296726.579 2...",0,0,0,0,0,0,0.0,0.0,63142.812738,0.0,0.0
1,1600243,POINT (296732.3523138523 2778846.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778846.994, 296730.909 2...",0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0
2,1600244,POINT (296732.3523138523 2778841.9942000005),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778841.994, 296730.909 2...",0,0,0,0,0,0,0.0,0.0,63142.812738,0.0,0.0
3,1600245,POINT (296732.3523138523 2778836.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778836.994, 296730.909 2...",0,0,0,0,0,0,0.0,0.0,63142.812738,0.0,0.0
4,1605736,POINT (296736.68244087126 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296733.796 2778844.494, 296735.239 2...",0,0,0,0,1,1,0.0,0.0,0.0,0.0,0.0


##### Permeable pavement: PAC, sidewalk, parkinglot, park, school

In [28]:
# Permeable pavement PAC
pavement_pac_raw_path = ROOT_PATH + r"\03 Data\Raw\鋪面\透水鋪面_PAC鋪面.geojson"
pavement_pac_raw = gpd.read_file(pavement_pac_raw_path)
pavement_pac = pavement_pac_raw.copy()
pavement_pac = pavement_pac.to_crs(epsg=3826)
# Create permeable pavement area of pac
pac_overlayed_area = gpd.sjoin(above_8m_road, pavement_pac, how='inner', predicate='intersects')
drop_col = ['id', '項次', '名稱', '面積', '體積', '圖形', 'index_right']
pavement_pac = pac_overlayed_area.drop(columns=drop_col)

# Permeable pavement sidewalk
pavement_sidewalk_raw_path = ROOT_PATH + r"\03 Data\Raw\鋪面\透水鋪面_人行道透水鋪面.geojson"
pavement_sidewalk_raw = gpd.read_file(pavement_sidewalk_raw_path)
pavement_sidewalk = pavement_sidewalk_raw.copy()
pavement_sidewalk = pavement_sidewalk.to_crs(epsg=3826)

# Permeable pavement parkinglot
pavement_parkinglot_raw_path = ROOT_PATH + r"\03 Data\Raw\鋪面\透水鋪面_停車場透水鋪面.geojson"
pavement_parkinglot_raw = gpd.read_file(pavement_parkinglot_raw_path)
pavement_parkinglot = pavement_parkinglot_raw.copy()
pavement_parkinglot = pavement_parkinglot.to_crs(epsg=3826)

# Permeable pavement park
pavement_park_raw_path = ROOT_PATH + r"\03 Data\Raw\鋪面\透水鋪面_公園透水鋪面.geojson"
pavement_park_raw = gpd.read_file(pavement_park_raw_path)
pavement_park = pavement_park_raw.copy()
pavement_park = pavement_park.to_crs(epsg=3826)

# Permeable pavement school
pavement_school_raw_path = ROOT_PATH + r"\03 Data\Raw\鋪面\透水鋪面_學校透水鋪面.geojson"
pavement_school_raw = gpd.read_file(pavement_school_raw_path)
pavement_school = pavement_school_raw.copy()
pavement_school = pavement_school.to_crs(epsg=3826)

In [29]:
# Create permeable pavement area columns
pavement_sidewalk['geometry'] = pavement_sidewalk['geometry'].buffer(0)
hex = calculate_pavement_area(hex, pavement_sidewalk, column='pavement_sidewalk_area')

pavement_parkinglot['geometry'] = pavement_parkinglot['geometry'].buffer(0)
hex = calculate_pavement_area(hex, pavement_parkinglot, column='pavement_parkinglot_area')

pavement_park['geometry'] = pavement_park['geometry'].buffer(0)
hex = calculate_pavement_area(hex, pavement_park, column='pavement_park_area')

pavement_school['geometry'] = pavement_school['geometry'].buffer(0)
hex = calculate_pavement_area(hex, pavement_school, column='pavement_school_area')

pavement_pac['geometry'] = pavement_pac['geometry'].buffer(0)
hex = calculate_pavement_area(hex, pavement_pac, column='pavement_pac_area')

hex.head()

Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,...,park_area,school_greening_area,riverside_highland_area,birds_conservation_area,agriculture_area,pavement_sidewalk_area,pavement_parkinglot_area,pavement_park_area,pavement_school_area,pavement_pac_area
0,1594752,POINT (296728.0221868334 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296725.135 2778844.494, 296726.579 2...",0,0,...,0.0,0.0,63142.812738,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1600243,POINT (296732.3523138523 2778846.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778846.994, 296730.909 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1600244,POINT (296732.3523138523 2778841.9942000005),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778841.994, 296730.909 2...",0,0,...,0.0,0.0,63142.812738,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1600245,POINT (296732.3523138523 2778836.9941999996),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296729.466 2778836.994, 296730.909 2...",0,0,...,0.0,0.0,63142.812738,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1605736,POINT (296736.68244087126 2778844.4942),北投區,1,7.67212,知行路,知行路-05,"POLYGON ((296733.796 2778844.494, 296735.239 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Building volume and floor count

In [30]:
# Taipei building
raw_tp_building_path = ROOT_PATH + r"\03 Data\Raw\容積與地下結構\tp_building_height.csv"
tp_building_raw = pd.read_csv(raw_tp_building_path, dtype={'出入口高程': str})
tp_building = tp_building_raw.copy()

In [31]:
#  創建一個空的幾何列
tp_building['geometry'] = None

# 遍歷每個 WKB 字符串，嘗試轉換為幾何對象
for idx, wkb_str in enumerate(tp_building['wkb_geometry']):
    try:
        # 將 WKB 字符串轉換為 Shapely 的幾何對象
        geometry = wkb.loads(bytes.fromhex(wkb_str))
        # 如果成功轉換，將幾何對象存儲到 'geometry' 列中
        tp_building.at[idx, 'geometry'] = geometry
    except Exception as e:
        # 如果轉換出錯，捕獲錯誤並打印相關信息
        print(f"Error converting WKB at index {idx}: {e}")

# 創建 GeoDataFrame
tp_building = gpd.GeoDataFrame(tp_building, geometry='geometry', crs='epsg:4326')

# 將座標系統轉換為EPSG:3826(重要！)
tp_building.set_crs(epsg=4326, inplace=True)
tp_building = tp_building.to_crs(epsg=3826)

# Reduce columns
drop_col = [
    '圖層', '地形碼', '線形碼', '圖例碼', '出入口高程', '樓層註記', '測製日期', '修測日期', 
    'wkb_geometry', '1_bud_high', '_ctime', '_mtime', 'ogc_fid', '1_top_high', '1_ent_heig'
]
tp_building_clean = tp_building.drop(columns=drop_col).copy()

# Calculate area and volume
tp_building_clean['area'] = tp_building_clean['geometry'].area
tp_building_clean['volume'] = tp_building_clean['area'] * tp_building_clean['1_floor']
tp_building_clean.head()

Error converting WKB at index 100150: IllegalArgumentException: Invalid number of points in LinearRing found 2 - must be 0 or >= 3


Unnamed: 0,no,屋頂高程,1_floortyp,1_floor,1_type,geometry,area,volume
0,62,7.08,2M,2.0,M,"MULTIPOLYGON (((295750.395 2777584.157, 295736...",142.70797,285.415941
1,709,48.34,2R,2.0,R,"MULTIPOLYGON (((296789.595 2779465.005, 296787...",50.124408,100.248816
2,710,74.05,6R,6.0,R,"MULTIPOLYGON (((296794.126 2779459.656, 296795...",74.277659,445.665957
3,745,10.49,1R,1.0,R,"MULTIPOLYGON (((296800.000 2778908.549, 296800...",21.276537,21.276537
4,1022,77.22,9R,9.0,R,"MULTIPOLYGON (((296729.195 2779948.047, 296733...",32.000574,288.00517


In [32]:
# Spatial join
keep_col = ["1_floor", "volume", "area", "geometry"]
drop_col = [col for col in tp_building_clean.columns.tolist() if col not in keep_col]
temp = gpd.sjoin(hex, tp_building_clean, how='left', predicate='intersects')
temp = temp.drop(columns=drop_col)
temp = temp.drop(columns=['index_right'])

# Fill missing value
fill_col = ["1_floor", "volume", "area"]
temp[fill_col] = temp[fill_col].fillna(0)

# Drop duplicates by id and floors
temp = temp.sort_values(by=['id', '1_floor'], ascending=False) 
temp = temp.drop_duplicates(subset=["id"])
temp = temp.reset_index(drop=True)

# Rename column
rename_col = {
    "1_floor": "building_floor",
    "volume": "building_volume",
    "area": "building_area"
}
hex = temp.rename(columns=rename_col)
hex.head()

Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,...,birds_conservation_area,agriculture_area,pavement_sidewalk_area,pavement_parkinglot_area,pavement_park_area,pavement_school_area,pavement_pac_area,building_floor,building_area,building_volume
0,22114579,POINT (312905.37672952673 2770269.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770269.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,22114578,POINT (312905.37672952673 2770274.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770274.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,22114574,POINT (312905.37672952673 2770294.4942),南港區,0,6.0,無名,A0041167,"POLYGON ((312902.490 2770294.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22109086,POINT (312901.0466025078 2770271.9942000005),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770271.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,22109085,POINT (312901.0466025078 2770276.9941999996),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770276.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Building underground floor count

In [33]:
# Load
raw_tp_build_under_path = ROOT_PATH + r"\01 QGIS\00 Processed Data\Geopackage\tp_building_underground_floor.gpkg"
tp_build_under_raw = gpd.read_file(raw_tp_build_under_path)
tp_build_under = tp_build_under_raw.copy()

# 將座標系統轉換為EPSG:3826(重要！)
tp_build_under = tp_build_under.to_crs(epsg=3826)
tp_build_under.head()

Unnamed: 0,no,屋頂高程,1_floortyp,1_floor,1_type,area,volume,地上層數,地下層數,geometry
0,62,7.08,2M,2.0,M,142.70797,285.415941,,0.0,"MULTIPOLYGON (((295750.395 2777584.157, 295736..."
1,709,48.34,2R,2.0,R,50.124408,100.248816,,0.0,"MULTIPOLYGON (((296789.595 2779465.005, 296787..."
2,710,74.05,6R,6.0,R,74.277659,445.665957,,1.0,"MULTIPOLYGON (((296794.126 2779459.656, 296795..."
3,745,10.49,1R,1.0,R,21.276537,21.276537,,0.0,"MULTIPOLYGON (((296800.000 2778908.549, 296800..."
4,1022,77.22,9R,9.0,R,32.000574,288.00517,,2.0,"MULTIPOLYGON (((296729.195 2779948.047, 296733..."


In [34]:
# Spatial join
keep_col = ["地下層數", "geometry"]
drop_col = [col for col in tp_build_under.columns.tolist() if col not in keep_col]
temp = gpd.sjoin(hex, tp_build_under, how='left', predicate='intersects')
temp = temp.drop(columns=drop_col)
temp = temp.drop(columns=['index_right'])

# Fill missing value
temp["地下層數"] = temp["地下層數"].fillna(0)
temp = temp.rename(columns={"地下層數": "underground_floor"})

# Drop duplicates by id and floors
temp = temp.sort_values(by=['id'], ascending=False) 
temp = temp.drop_duplicates(subset=["id"])
hex = temp.reset_index(drop=True)

hex.head()

Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,...,agriculture_area,pavement_sidewalk_area,pavement_parkinglot_area,pavement_park_area,pavement_school_area,pavement_pac_area,building_floor,building_area,building_volume,underground_floor
0,22114579,POINT (312905.37672952673 2770269.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770269.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,22114578,POINT (312905.37672952673 2770274.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770274.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,22114574,POINT (312905.37672952673 2770294.4942),南港區,0,6.0,無名,A0041167,"POLYGON ((312902.490 2770294.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,22109086,POINT (312901.0466025078 2770271.9942000005),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770271.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,22109085,POINT (312901.0466025078 2770276.9941999996),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770276.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Underground structure exist: Taiwan Railway, Taiwan Highspeed Rail, Taipei Metro.

- 參考《捷運隧道主體地下結構3D資料庫建置》圖10　潛盾隧道標準剖面圖，標準捷運軌道半徑為280cm，直徑則為560cm。若以去回雙向並預留1m做為緩衝，捷運線應向外buffer 1220cm作為地下結構之範圍。目前取整數以1300cm作為直徑。
- 有與地下構造物交集的網格為1，無則為0
- 地下道：由於僅具有出口點位，不清楚中間連接關係，故先不納入
- 地下街：由於僅具有出口點位，不清楚中間連接關係，故先不納入

In [35]:
# Load
hsr_route_raw_path = ROOT_PATH + r"\03 Data\Processed\地下構造物\Taiwan_HighSpeedRail_route_TPpart.csv"
hsr_route_raw = pd.read_csv(hsr_route_raw_path)
hsr_route = hsr_route_raw.copy()
hsr_route['geometry'] = hsr_route['geometry'].apply(loads)
hsr_route = gpd.GeoDataFrame(hsr_route, geometry='geometry', crs='epsg:3826')

tr_route_raw_path = ROOT_PATH + r"\03 Data\Processed\地下構造物\Taiwan_Railway_route_TPpart.csv"
tr_route_raw = pd.read_csv(tr_route_raw_path)
tr_route = tr_route_raw.copy()
tr_route['geometry'] = tr_route['geometry'].apply(loads)
tr_route = gpd.GeoDataFrame(tr_route, geometry='geometry', crs='epsg:3826')

trtc_route_raw_path = ROOT_PATH + r"\03 Data\Processed\地下構造物\TP_MRT_route.csv"
trtc_route_raw = pd.read_csv(trtc_route_raw_path)
trtc_route = trtc_route_raw.copy()
trtc_route['geometry'] = trtc_route['geometry'].apply(loads)
trtc_route = gpd.GeoDataFrame(trtc_route, geometry='geometry', crs='epsg:3826')

# Create route buffer
hsr_route['geometry'] = hsr_route['geometry'].buffer(13)
tr_route['geometry'] = tr_route['geometry'].buffer(13)
trtc_route['geometry'] = trtc_route['geometry'].buffer(13)

In [36]:
# Spatial join
keep_col = ["geometry"]
drop_col = [col for col in trtc_route.columns.tolist() if col not in keep_col]
temp_trtc = gpd.sjoin(hex, trtc_route, how='left', predicate='intersects') #1m 27s

#  transform the data
temp_trtc['underground_mrt'] = temp_trtc['index_right'].notna().astype(int)

# Drop columns
temp_trtc = temp_trtc.drop(columns=drop_col)
temp_trtc = temp_trtc.drop(columns=['index_right'])
temp_trtc = temp_trtc.drop_duplicates(subset=["id"])
hex  = temp_trtc.copy()
print(hex['underground_mrt'].value_counts())
hex.head()

underground_mrt
0    1312933
1      84752
Name: count, dtype: int64


Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,...,pavement_sidewalk_area,pavement_parkinglot_area,pavement_park_area,pavement_school_area,pavement_pac_area,building_floor,building_area,building_volume,underground_floor,underground_mrt
0,22114579,POINT (312905.37672952673 2770269.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770269.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,22114578,POINT (312905.37672952673 2770274.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770274.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,22114574,POINT (312905.37672952673 2770294.4942),南港區,0,6.0,無名,A0041167,"POLYGON ((312902.490 2770294.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,22109086,POINT (312901.0466025078 2770271.9942000005),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770271.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,22109085,POINT (312901.0466025078 2770276.9941999996),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770276.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [37]:
# Spatial join
keep_col = ["geometry"]
drop_col = [col for col in hsr_route.columns.tolist() if col not in keep_col]
temp_hsr = gpd.sjoin(hex, hsr_route, how='left', predicate='intersects') #55s

#  transform the data
temp_hsr['underground_hsr'] = temp_hsr['index_right'].notna().astype(int)

# Drop columns
temp_hsr = temp_hsr.drop(columns=drop_col)
temp_hsr = temp_hsr.drop(columns=['index_right'])
temp_hsr = temp_hsr.drop_duplicates(subset=["id"])
hex = temp_hsr.copy()
print(hex['underground_hsr'].value_counts())
hex.head()

underground_hsr
0    1381320
1      16365
Name: count, dtype: int64


Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,...,pavement_parkinglot_area,pavement_park_area,pavement_school_area,pavement_pac_area,building_floor,building_area,building_volume,underground_floor,underground_mrt,underground_hsr
0,22114579,POINT (312905.37672952673 2770269.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770269.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,22114578,POINT (312905.37672952673 2770274.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770274.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,22114574,POINT (312905.37672952673 2770294.4942),南港區,0,6.0,無名,A0041167,"POLYGON ((312902.490 2770294.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,22109086,POINT (312901.0466025078 2770271.9942000005),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770271.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,22109085,POINT (312901.0466025078 2770276.9941999996),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770276.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [38]:
# Spatial join
keep_col = ["geometry"]
drop_col = [col for col in tr_route.columns.tolist() if col not in keep_col]
temp_tr = gpd.sjoin(hex, tr_route, how='left', predicate='intersects') #15s

#  transform the data
temp_tr['underground_tr'] = temp_tr['index_right'].notna().astype(int)

# Drop columns
temp_tr = temp_tr.drop(columns=drop_col)
temp_tr = temp_tr.drop(columns=['index_right'])
temp_tr = temp_tr.drop_duplicates(subset=["id"])
hex = temp_tr.copy()
print(hex['underground_tr'].value_counts())
hex.head()

underground_tr
0    1382417
1      15268
Name: count, dtype: int64


Unnamed: 0,id,centroid,TNAME,soil_liquid_class,width,road_name,road_id,geometry,sp_count,rp_count,...,pavement_park_area,pavement_school_area,pavement_pac_area,building_floor,building_area,building_volume,underground_floor,underground_mrt,underground_hsr,underground_tr
0,22114579,POINT (312905.37672952673 2770269.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770269.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
1,22114578,POINT (312905.37672952673 2770274.4942),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312902.490 2770274.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
2,22114574,POINT (312905.37672952673 2770294.4942),南港區,0,6.0,無名,A0041167,"POLYGON ((312902.490 2770294.494, 312903.933 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
3,22109086,POINT (312901.0466025078 2770271.9942000005),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770271.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0
4,22109085,POINT (312901.0466025078 2770276.9941999996),南港區,0,8.0,舊莊街,A0039948,"POLYGON ((312898.160 2770276.994, 312899.603 2...",0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0


### Output

In [39]:
# Export to GeoPackage
path_gpkg = r"D:\Chu's Document!\02 Project\06 道路塌陷防治專案(天坑)\03 Data\Model_building\training_data_TP_temp.gpkg"
hex.to_file(path_gpkg, driver="GPKG")

# Export to csv
path_csv = r"D:\Chu's Document!\02 Project\06 道路塌陷防治專案(天坑)\03 Data\Model_building\training_data_TP_temp.csv"
hex.to_csv(path_csv, index=False)

In [3]:
path_csv = r"D:\Chu's Document!\02 Project\06 道路塌陷防治專案(天坑)\03 Data\Model_building\training_data_TP_temp.csv"
data = dd.read_csv(path_csv)
data.compute()
data.columns

Index(['id', 'centroid', 'TNAME', 'soil_liquid_class', 'width', 'road_name',
       'road_id', 'geometry', 'sp_count', 'rp_count', 'rd_count', 'cn_count',
       'wp_count', 'pipe_count', 'park_area', 'school_greening_area',
       'riverside_highland_area', 'birds_conservation_area',
       'agriculture_area', 'pavement_sidewalk_area',
       'pavement_parkinglot_area', 'pavement_park_area',
       'pavement_school_area', 'pavement_pac_area', 'building_floor',
       'building_area', 'building_volume', 'underground_floor',
       'underground_mrt', 'underground_hsr', 'underground_tr'],
      dtype='object')

## Batch Processing

In [None]:
# Load date list
date_list = pd.read_csv(ROOT_PATH + r"\03 Data\Model_building\TP_Datelist\Training_2021_week_list.csv")

# Setting datelist
date_start = pd.to_datetime(date_list["date"][0]).date()
time_window = 7
time_step = 1

time_pred = date_start + timedelta(days=time_window)
mask = date_list["date"] >= str(time_pred)
date_list = date_list[mask].reset_index(drop=True)

# Batch Run
for i in tqdm(range(len(date_list["date"]))):
    time_pred = date_list["date"][i]
    time_pred = pd.to_datetime(time_pred).date()
    # time_start = time_pred - timedelta(days=time_window)
    # time_end = time_pred - timedelta(days=1)
    %run Training_data_TP.py $time_window $time_pred #2hr45s

In [None]:
# Load date list
date_list = pd.read_csv(ROOT_PATH + r"\03 Data\Model_building\TP_Datelist\Training_2022_week_list.csv")

# Setting datelist
date_start = pd.to_datetime(date_list["date"][28]).date()
time_window = 7
time_step = 1

time_pred = date_start + timedelta(days=time_window)
mask = date_list["date"] >= str(time_pred)
date_list = date_list[mask].reset_index(drop=True)

# Batch Run
for i in tqdm(range(len(date_list["date"]))):
    time_pred = date_list["date"][i]
    time_pred = pd.to_datetime(time_pred).date()
    # time_start = time_pred - timedelta(days=time_window)
    # time_end = time_pred - timedelta(days=1)
    %run Training_data_TP_temp.py $time_window $time_pred