In [1]:
import pandas as pd
import geopandas as gpd
from geopandas.tools import sjoin
import time
import pytz
import shapely
from shapely.geometry import Point, Polygon
from shapely import LineString
from shapely.wkt import loads
from datetime import datetime, timedelta
from pyproj import Proj
import pyproj
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
import xml.etree.ElementTree as ET
import dask_geopandas as dgpd
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from function import *

## Load Dataset
- 在原始資料中有註明資料時間區間，但實際年度資料筆數會多一筆(各月會多12筆)，其內容為歷史統計之最高或最低值，在清理時會排除此筆資料。
- 經觀察推測為各年度資料的第一筆(ex. station_data[0]['StationObsStatistics']['Annual'][0])以及各月資料的前12筆(ex.station_data[0]['StationObsStatistics']['Monthly'][0])

In [58]:
ROOT_PATH = r"D:\Chu's Document!\02 Project\06 道路塌陷防治專案(天坑)"
path = ROOT_PATH + r"\03 Data\Raw\潮汐\C-B0052-001_潮位統計-臺灣各地歷史潮位觀測逐年月統計.json"
tide_data = pd.read_json(path)
tide_data.head()

Unnamed: 0,cwaopendata
@xmlns,urn:cwa:gov:tw:cwacommon:0.1
Dataid,C-B0052-001
DatasetName,海象氣候統計
Identifier,157c1e98-a3ef-494f-b9ab-3439313d3112
MsgType,Issue


## Data Preprocess

In [59]:
# Data Overview
station_data = tide_data['cwaopendata']['Resources']['Resource']['Data']['SeaSurfaceObs']['Location']
station_data

[{'Station': {'StationID': '11006',
   'StationName': '淡海潮位站',
   'StationNameEN': 'Danhai',
   'StationLatitude': '25.18',
   'StationLongitude': '121.41',
   'StationAttribute': '潮位站',
   'Description': '相對臺灣高程基準TWVD2001基隆海平面;Relative to the TWVD2001 MSL',
   'County': {'CountyName': '新北市', 'CountyNameEN': 'New Taipei City'},
   'Town': {'TownName': '淡水區', 'TownNameEN': 'Tamsui Dist'}},
  'StationObsStatistics': {'StartYear': '2017',
   'EndYear': '2022',
   'Annual': [{'HighestHighWaterLevel': '2.086',
     'HighestAstronomicalTide': '1.978',
     'MeanHighWaterLevel': '1.369',
     'MeanTideLevel': '0.128',
     'MeanLowWaterLevel': '-1.150',
     'LowestAstronomicalTide': '-1.922',
     'LowestLowWaterLevel': '-1.985',
     'MeanTidalRange': '2.519',
     'MaxAstronomicalTidalRange': '3.900',
     'MeanHighWaterOfSpringTide': '1.870',
     'MeanLowWaterOfSpringTide': '-1.738'},
    {'HighestHighWaterLevel': '1.959',
     'HighestAstronomicalTide': '1.904',
     'MeanHighWaterLevel

In [110]:

num_list = []
for i in range(len(station_data)):
    a = len(station_data[i]['StationObsStatistics']['DataYear'])
    num_list.append(a)

print(num_list)

[7, 20, 20, 20, 20, 19, 20, 20, 20, 16, 20, 20, 17, 20, 20, 20, 9, 5, 20, 6, 20, 20, 20, 20, 17, 20, 20, 20, 20, 20, 20, 20, 20, 12, 19, 20, 19]


In [118]:
station_data[16]['StationObsStatistics']['DataYear']

['2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015']

In [115]:
# Data Extraction
station_num = len(station_data)
tide_record = []

for station in range(1, station_num):
    data_year_list = station_data[station]['StationObsStatistics']['DataYear']
    month_num = len(station_data[station]['StationObsStatistics']['Monthly'])

    for month in range(12, month_num):
        year_index = (month // 12) - 1
        temp = {
            'StationID': station_data[station]['Station']['StationID'],
            'StationName': station_data[station]['Station']['StationName'],
            'StationNameEN': station_data[station]['Station']['StationNameEN'],
            'StationLatitude': station_data[station]['Station']['StationLatitude'],
            'StationLongitude': station_data[station]['Station']['StationLongitude'],
            'StationAttribute': station_data[station]['Station']['StationAttribute'],
            'Description': station_data[station]['Station']['Description'],
            'County': station_data[station]['Station']['County']['CountyName'],
            'Town': station_data[station]['Station']['Town']['TownName'],
            'DataYear': data_year_list[year_index],
            'DataMonth': station_data[station]['StationObsStatistics']['Monthly'][month]['DataMonth'],
            'HighestHighWaterLevel': station_data[station]['StationObsStatistics']['Monthly'][month]['HighestHighWaterLevel'],
            'HighestAstronomicalTide': station_data[station]['StationObsStatistics']['Monthly'][month]['HighestAstronomicalTide'],
            'MeanHighWaterLevel': station_data[station]['StationObsStatistics']['Monthly'][month]['MeanHighWaterLevel'],
            'MeanTideLevel': station_data[station]['StationObsStatistics']['Monthly'][month]['MeanTideLevel'],
            'MeanLowWaterLevel': station_data[station]['StationObsStatistics']['Monthly'][month]['MeanLowWaterLevel'],
            'LowestAstronomicalTide': station_data[station]['StationObsStatistics']['Monthly'][month]['LowestAstronomicalTide'],
            'LowestLowWaterLevel': station_data[station]['StationObsStatistics']['Monthly'][month]['LowestLowWaterLevel'],
            'MeanTidalRange': station_data[station]['StationObsStatistics']['Monthly'][month]['MeanTidalRange'],
            'MaxAstronomicalTidalRange': station_data[station]['StationObsStatistics']['Monthly'][month]['MaxAstronomicalTidalRange'],
            'MeanHighWaterOfSpringTide': station_data[station]['StationObsStatistics']['Monthly'][month]['MeanHighWaterOfSpringTide'],
            'MeanLowWaterOfSpringTide': station_data[station]['StationObsStatistics']['Monthly'][month]['MeanLowWaterOfSpringTide'],
        }
        tide_record.append(temp)

tide_record = pd.DataFrame(tide_record)
tide_record.head()

Unnamed: 0,StationID,StationName,StationNameEN,StationLatitude,StationLongitude,StationAttribute,Description,County,Town,DataYear,...,HighestAstronomicalTide,MeanHighWaterLevel,MeanTideLevel,MeanLowWaterLevel,LowestAstronomicalTide,LowestLowWaterLevel,MeanTidalRange,MaxAstronomicalTidalRange,MeanHighWaterOfSpringTide,MeanLowWaterOfSpringTide
0,1102,淡水潮位站,Tamsui,25.18,121.42,潮位站,相對臺灣高程基準TWVD2001基隆海平面;Relative to the TWVD2001...,新北市,淡水區,2003,...,1.291,1.388,0.255,-0.791,-1.065,-1.259,2.179,2.356,1.24,-0.98
1,1102,淡水潮位站,Tamsui,25.18,121.42,潮位站,相對臺灣高程基準TWVD2001基隆海平面;Relative to the TWVD2001...,新北市,淡水區,2003,...,1.347,1.425,0.276,-0.885,-1.002,-1.289,2.31,2.349,1.271,-0.931
2,1102,淡水潮位站,Tamsui,25.18,121.42,潮位站,相對臺灣高程基準TWVD2001基隆海平面;Relative to the TWVD2001...,新北市,淡水區,2003,...,1.543,1.389,0.194,-0.999,-0.977,-1.282,2.388,2.52,1.404,-0.914
3,1102,淡水潮位站,Tamsui,25.18,121.42,潮位站,相對臺灣高程基準TWVD2001基隆海平面;Relative to the TWVD2001...,新北市,淡水區,2003,...,1.677,1.427,0.251,-0.865,-1.016,-1.285,2.292,2.693,1.502,-0.901
4,1102,淡水潮位站,Tamsui,25.18,121.42,潮位站,相對臺灣高程基準TWVD2001基隆海平面;Relative to the TWVD2001...,新北市,淡水區,2003,...,1.668,1.467,0.297,-0.835,-0.993,-1.234,2.302,2.661,1.523,-0.885


In [116]:
tide_record.shape

(7908, 22)

## Save

In [122]:
tide_record.to_csv(ROOT_PATH + r"\03 Data\Processed\潮汐\臺灣各地歷史潮位觀測逐年月統計.csv", index=False)
tide_record.to_excel(ROOT_PATH + r"\03 Data\Processed\潮汐\臺灣各地歷史潮位觀測逐年月統計.xlsx", index=False)