In [None]:
# -*- coding: utf-8 -*-
"""
Created on 01/14, 2023
@author: WillyF

"""

# https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html
# https://opendata.cwb.gov.tw/dataset/forecast/F-C0032-001
# https://opendata.cwb.gov.tw/dist/opendata-swagger.html?urls.primaryName=openAPI#/%E9%A0%90%E5%A0%B1/get_v1_rest_datastore_F_C0032_001
# https://pandas.pydata.org/docs/reference/api/pandas.pivot_table.html
# https://stackoverflow.com/questions/31306741/unmelt-pandas-dataframe
# https://medium.com/%E6%95%B8%E6%93%9A%E4%B8%8D%E6%AD%A2-not-only-data/pandas-%E5%BF%AB%E9%80%9F%E7%9E%AD%E8%A7%A3-pivot-table-%E8%88%87%E6%87%89%E7%94%A8-21e4c37b9216
# https://docs.python.org/3/library/stdtypes.html#str.isdigit
# https://datagy.io/python-string-to-date/
# https://www.itsolutionstuff.com/post/how-to-check-if-today-is-wednesday-or-not-in-pythonexample.html

In [None]:
import pandas as pd
# import urllib
import urllib.request
import json
import numpy as np
# from glob import glob
import os
import warnings
from datetime import datetime
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 1000)

In [None]:
TW_Region = pd.read_csv(
        "https://github.com/LeBronWilly/TW_Weather_FCST/raw/main/TW_Region.csv",
        encoding='utf8')
TW_Region

In [None]:
data_source = "https://opendata.cwb.gov.tw/api/v1/rest/datastore/F-C0032-001?Authorization=CWB-4FB338DD-B0B6-49EC-BDD4-8293D48B8071&format=JSON&locationName=&elementName=&sort=time"
json_url = urllib.request.urlopen(data_source)
data = json.loads(json_url.read())
data

In [None]:
data_df = pd.json_normalize(data["records"],
                            record_path = ["location","weatherElement","time"],
                            meta = [['location',"locationName"],
                                    ['location',"weatherElement","elementName"],
                                    'datasetDescription',
                                   ],
                           )
data_df.columns = [x.split(".")[-1] for x in data_df.columns]
data_df["parameterUnit"].replace("百分比", "%", inplace=True)
data_df["parameterUnit"].replace("C", "°C", inplace=True)
data_df["parameterUnit"].replace(np.nan, "", inplace=True)
data_df["elementName"].replace("Wx", "Weather Forcast", inplace=True)
data_df["elementName"].replace("PoP", "Probability of Precipitation", inplace=True)
data_df["elementName"].replace("MinT", "Min Temperature", inplace=True)
data_df["elementName"].replace("MaxT", "Max Temperature", inplace=True)
data_df["elementName"].replace("CI", "Comfort Index", inplace=True)
data_df["Parameter"] = data_df["parameterName"] + data_df["parameterUnit"]
data_df["startTime"] = data_df["startTime"].apply(lambda x: x.replace("-", "/")[:-3])
data_df["endTime"] = data_df["endTime"].apply(lambda x: x.replace("-", "/")[:-3])
data_df = data_df.merge(TW_Region, how="left", left_on='locationName', right_on='City/County', validate="many_to_one")
data_df["locationName"].replace("臺", "台", inplace=True, regex=True)

data_df

In [None]:
data_df_pivot = pd.pivot_table(data_df,
                               index=["startTime", "endTime", "Region", 'locationName'], 
                               columns=["elementName"], 
                               values=["Parameter"],
                               aggfunc= lambda x: x).reset_index()
data_df_pivot.columns = ["startTime","endTime", "Region","locationName","Comfort Index",
                         "Max Temperature","Min Temperature","Probability of Precipitation","Weather Forcast"]
data_df_pivot = data_df_pivot.sort_values(by=["locationName","startTime","endTime"]).reset_index(drop=True)
data_df_pivot["Period"] = data_df_pivot["startTime"].str.cat(data_df_pivot["endTime"], sep =" ~ ")
data_df_pivot["Temperature"] = data_df_pivot["Max Temperature"].str.cat(data_df_pivot["Min Temperature"], sep =" ~ ")
data_df_pivot = data_df_pivot.rename(columns={"locationName": "Location", "Probability of Precipitation": "PoP", "Weather Forcast": "Weather FCST"}, errors="raise")

data_df_pivot

In [None]:
api_key_WF = "CWB-4FB338DD-B0B6-49EC-BDD4-8293D48B8071"
data_source = "https://opendata.cwb.gov.tw/api/v1/rest/datastore/F-D0047-091?Authorization="+api_key_WF+"&sort=time"
json_url = urllib.request.urlopen(data_source)
data = json.loads(json_url.read())
data

In [156]:
days_name = ["Mon.","Tue.","Wed.","Thu.","Fri.","Sat.","Sun."]
data_df = pd.json_normalize(data["records"],
                            meta = [['locations','location','locationName'],
                                    ['locations','location','lon'],
                                    ['locations','location','lat'],
                                    ['locations',"location","weatherElement","elementName"],
                                    ['locations',"location","weatherElement","description"],
                                    ['locations',"location","weatherElement",'time',"startTime"],
                                    ['locations',"location","weatherElement",'time',"endTime"],
                                    ],
                            record_path = ['locations',"location","weatherElement",'time',"elementValue"])
data_df.columns = ["Value","Unit","Location","Long","Lat","Element_EN","Element","StartTime","EndTime"]
data_df = data_df[["StartTime","EndTime","Location","Element","Element_EN","Value","Unit","Long","Lat"]]
data_df = data_df[data_df["Unit"]!="自定義 Wx 單位"].reset_index(drop=True)
data_df["Unit"].replace("百分比", "%", inplace=True)
data_df["Unit"].replace("8方位", "", inplace=True)
data_df["Unit"].replace("紫外線指數", "", inplace=True)
data_df["Unit"].replace("攝氏度", "°C", inplace=True)
data_df["Unit"].replace("公尺/秒", "m/s", inplace=True)
data_df["Unit"].replace("自定義 CI 文字", "", inplace=True)
data_df["Unit"].replace("自定義 Wx 文字", "", inplace=True)
data_df["Unit"].replace("自定義 Wx 單位", "", inplace=True)
data_df.loc[(data_df["Element"].str.contains("紫外線指數")) & (data_df["Unit"].str.contains("曝曬級數")),
            "Element"] = "曝曬級數"
data_df.loc[(data_df["Element"].str.contains("最大風速")) & (data_df["Unit"].str.contains("蒲福風級")),
            "Element"] = "蒲福風級"
data_df.loc[(data_df["Element"].str.contains("最大舒適度指數")) & (~data_df["Value"].str.isdigit()),
            "Element"] = "最大舒適度"
data_df.loc[(data_df["Element"].str.contains("最小舒適度指數")) & (~data_df["Value"].str.isdigit()),
            "Element"] = "最小舒適度"
data_df["Unit"].replace("曝曬級數", "", inplace=True)
data_df["Unit"].replace("蒲福風級", "", inplace=True)
data_df["Unit"].replace("NA", "", inplace=True)
data_df["Unit"].replace("NA ", "", inplace=True)
data_df["Unit"].replace(" ", "", inplace=True)
data_df["Value"].replace("<= 1", "≤1", inplace=True)
data_df["Parameter"] = data_df["Value"] + data_df["Unit"]
data_df["Parameter"].replace(" %", "-", inplace=True)
# data_df["Element_EN"].replace("Wx", "Weather FCST", inplace=True)
# data_df["Element_EN"].replace("WeatherDescription", "Weather Desc", inplace=True)
data_df["StartTime"] = data_df["StartTime"].apply(lambda x: x.replace("-", "/")[:-3])
data_df["StartTime"] = data_df["StartTime"].apply(lambda x: x+" ("+days_name[datetime.strptime(x, '%Y/%m/%d %H:%M').weekday()]+")")
data_df["EndTime"] = data_df["EndTime"].apply(lambda x: x.replace("-", "/")[:-3])
data_df["EndTime"] = data_df["EndTime"].apply(lambda x: x+" ("+days_name[datetime.strptime(x, '%Y/%m/%d %H:%M').weekday()]+")")
data_df = data_df.merge(TW_Region, how="left", left_on='Location', right_on='City/County', validate="many_to_one")
data_df["Location"].replace("臺", "台", inplace=True, regex=True)
data_df["Long"] = data_df["Long"].astype(float)
data_df["Lat"] = data_df["Lat"].astype(float)
data_df

Unnamed: 0,StartTime,EndTime,Location,Element,Element_EN,Value,Unit,Long,Lat,Parameter,City/County,Region
0,2023/04/15 12:00 (Sat.),2023/04/15 18:00 (Sat.),新竹縣,12小時降雨機率,PoP12h,40,%,120.995698,24.841245,40%,新竹縣,北部地區
1,2023/04/15 18:00 (Sat.),2023/04/16 06:00 (Sun.),新竹縣,12小時降雨機率,PoP12h,10,%,120.995698,24.841245,10%,新竹縣,北部地區
2,2023/04/16 06:00 (Sun.),2023/04/16 18:00 (Sun.),新竹縣,12小時降雨機率,PoP12h,0,%,120.995698,24.841245,0%,新竹縣,北部地區
3,2023/04/16 18:00 (Sun.),2023/04/17 06:00 (Mon.),新竹縣,12小時降雨機率,PoP12h,0,%,120.995698,24.841245,0%,新竹縣,北部地區
4,2023/04/17 06:00 (Mon.),2023/04/17 18:00 (Mon.),新竹縣,12小時降雨機率,PoP12h,0,%,120.995698,24.841245,0%,新竹縣,北部地區
...,...,...,...,...,...,...,...,...,...,...,...,...
5539,2023/04/19 18:00 (Wed.),2023/04/20 06:00 (Thu.),新竹市,平均露點溫度,Td,21,°C,120.962110,24.818109,21°C,新竹市,北部地區
5540,2023/04/20 06:00 (Thu.),2023/04/20 18:00 (Thu.),新竹市,平均露點溫度,Td,21,°C,120.962110,24.818109,21°C,新竹市,北部地區
5541,2023/04/20 18:00 (Thu.),2023/04/21 06:00 (Fri.),新竹市,平均露點溫度,Td,20,°C,120.962110,24.818109,20°C,新竹市,北部地區
5542,2023/04/21 06:00 (Fri.),2023/04/21 18:00 (Fri.),新竹市,平均露點溫度,Td,19,°C,120.962110,24.818109,19°C,新竹市,北部地區


In [160]:
data_df_pivot = pd.pivot_table(data_df,
                               index=["StartTime", "EndTime", "Region", 'Location'], 
                               columns=["Element"], 
                               values=["Parameter"],
                               aggfunc= lambda x: x).reset_index()
data_df_pivot.columns = ["StartTime","EndTime", "Region","Location","12hr PoP", "Weather FCST", "Weather Desc", "AvgT",
                         "AvgRH(平均相對濕度)","AvgDPT(平均露點溫度)","EL(曝曬級數)","MinT","MinAT","MaxC","MaxCI",
                         "MaxWS","MinC","MinCI","MaxT","MaxAT","UVI","BWS(蒲福風級)","WD"]
data_df_pivot = data_df_pivot.sort_values(by=["Region","Location","StartTime","EndTime"]).reset_index(drop=True)
data_df_pivot["Period"] = data_df_pivot["StartTime"].str.cat(data_df_pivot["EndTime"], sep =" ~ ")
data_df_pivot["T"] = data_df_pivot["MinT"].str.cat(data_df_pivot["MaxT"], sep =" ~ ")
data_df_pivot["AT"] = data_df_pivot["MinAT"].str.cat(data_df_pivot["MaxAT"], sep =" ~ ")
data_df_pivot["C"] = data_df_pivot["MinC"].str.cat(data_df_pivot["MaxC"], sep =" ~ ")
data_df_pivot["CI"] = data_df_pivot["MinCI"].str.cat(data_df_pivot["MaxCI"], sep =" ~ ")
data_df_pivot = data_df_pivot[["Period","Region","Location","Weather FCST","12hr PoP","T","AvgT","AT",
                               "AvgRH(平均相對濕度)","AvgDPT(平均露點溫度)","UVI","EL(曝曬級數)",
                               "MaxWS", "BWS(蒲福風級)", "WD","CI","C", "Weather Desc"]]
data_df_pivot.head(23)

Unnamed: 0,Period,Region,Location,Weather FCST,12hr PoP,T,AvgT,AT,AvgRH(平均相對濕度),AvgDPT(平均露點溫度),UVI,EL(曝曬級數),MaxWS,BWS(蒲福風級),WD,CI,C,Weather Desc
0,2023/04/15 12:00 (Sat.) ~ 2023/04/15 18:00 (Sat.),中部地區,南投縣,陰短暫陣雨,30%,26°C ~ 29°C,28°C,29°C ~ 33°C,79%,22°C,5.0,中量級,3m/s,2,西北風,25 ~ 26,舒適 ~ 舒適,陰短暫陣雨。降雨機率 30%。溫度攝氏26至29度。舒適。西北風 風速2級(每秒3公尺)。相...
1,2023/04/15 18:00 (Sat.) ~ 2023/04/16 06:00 (Sun.),中部地區,南投縣,多雲,10%,20°C ~ 26°C,22°C,23°C ~ 30°C,94%,21°C,,,1m/s,≤1,偏東風,20 ~ 25,舒適 ~ 舒適,多雲。降雨機率 10%。溫度攝氏20至26度。舒適。偏東風 風速<= 1級(每秒1公尺)。相...
2,2023/04/16 06:00 (Sun.) ~ 2023/04/16 18:00 (Sun.),中部地區,南投縣,晴時多雲,0%,20°C ~ 30°C,26°C,23°C ~ 33°C,69%,20°C,8.0,過量級,2m/s,≤1,西北風,20 ~ 26,舒適 ~ 舒適,晴時多雲。降雨機率 0%。溫度攝氏20至30度。舒適。西北風 風速<= 1級(每秒2公尺)。...
3,2023/04/16 18:00 (Sun.) ~ 2023/04/17 06:00 (Mon.),中部地區,南投縣,晴時多雲,0%,20°C ~ 27°C,22°C,22°C ~ 29°C,80%,19°C,,,1m/s,≤1,東南風,20 ~ 23,舒適 ~ 舒適,晴時多雲。降雨機率 0%。溫度攝氏20至27度。舒適。東南風 風速<= 1級(每秒1公尺)。...
4,2023/04/17 06:00 (Mon.) ~ 2023/04/17 18:00 (Mon.),中部地區,南投縣,晴時多雲,0%,20°C ~ 28°C,25°C,22°C ~ 31°C,68%,18°C,7.0,高量級,2m/s,2,西北風,20 ~ 25,舒適 ~ 舒適,晴時多雲。降雨機率 0%。溫度攝氏20至28度。舒適。西北風 風速2級(每秒2公尺)。相對濕...
5,2023/04/17 18:00 (Mon.) ~ 2023/04/18 06:00 (Tue.),中部地區,南投縣,晴時多雲,0%,21°C ~ 26°C,22°C,23°C ~ 27°C,87%,20°C,,,1m/s,≤1,偏東風,20 ~ 23,舒適 ~ 舒適,晴時多雲。降雨機率 0%。溫度攝氏21至26度。舒適。偏東風 風速<= 1級(每秒1公尺)。...
6,2023/04/18 06:00 (Tue.) ~ 2023/04/18 18:00 (Tue.),中部地區,南投縣,晴時多雲,-,21°C ~ 29°C,26°C,23°C ~ 31°C,70%,19°C,6.0,高量級,2m/s,2,偏西風,20 ~ 25,舒適 ~ 舒適,晴時多雲。溫度攝氏21至29度。舒適。偏西風 風速2級(每秒2公尺)。相對濕度70%。
7,2023/04/18 18:00 (Tue.) ~ 2023/04/19 06:00 (Wed.),中部地區,南投縣,多雲短暫陣雨或雷雨,-,22°C ~ 27°C,24°C,26°C ~ 29°C,78%,20°C,,,1m/s,≤1,偏東風,22 ~ 24,舒適 ~ 舒適,多雲短暫陣雨或雷雨。溫度攝氏22至27度。舒適。偏東風 風速<= 1級(每秒1公尺)。相對濕...
8,2023/04/19 06:00 (Wed.) ~ 2023/04/19 18:00 (Wed.),中部地區,南投縣,多雲短暫陣雨或雷雨,-,22°C ~ 30°C,27°C,26°C ~ 33°C,77%,22°C,6.0,高量級,2m/s,2,東北風,22 ~ 27,舒適 ~ 悶熱,多雲短暫陣雨或雷雨。溫度攝氏22至30度。舒適至悶熱。東北風 風速2級(每秒2公尺)。相對濕...
9,2023/04/19 18:00 (Wed.) ~ 2023/04/20 06:00 (Thu.),中部地區,南投縣,陰短暫陣雨或雷雨,-,22°C ~ 27°C,24°C,25°C ~ 31°C,90%,22°C,,,2m/s,≤1,西北風,22 ~ 26,舒適 ~ 舒適,陰短暫陣雨或雷雨。溫度攝氏22至27度。舒適。西北風 風速<= 1級(每秒2公尺)。相對濕度...


In [None]:
data_df.to_excel("data_df.xlsx")

In [None]:
print(set(data_df["Element"]))
print(set(data_df["Element_EN"]))
print(set(data_df["Unit"])) # Beaufort Wind Scale
print(set(data_df["StartTime"]))

In [None]:
data_df.info()

In [None]:
data_df.info()

In [None]:
sorted(set(data_df_pivot["locationName"]))

In [None]:
sorted(set(data_df_pivot["locationName"]))