In [None]:
# -*- coding: utf-8 -*-
"""
Created on 01/14, 2023
@author: WillyF

"""

# https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html
# https://opendata.cwb.gov.tw/dataset/forecast/F-C0032-001
# https://opendata.cwb.gov.tw/dist/opendata-swagger.html?urls.primaryName=openAPI#/%E9%A0%90%E5%A0%B1/get_v1_rest_datastore_F_C0032_001
# https://pandas.pydata.org/docs/reference/api/pandas.pivot_table.html
# https://stackoverflow.com/questions/31306741/unmelt-pandas-dataframe
# https://medium.com/%E6%95%B8%E6%93%9A%E4%B8%8D%E6%AD%A2-not-only-data/pandas-%E5%BF%AB%E9%80%9F%E7%9E%AD%E8%A7%A3-pivot-table-%E8%88%87%E6%87%89%E7%94%A8-21e4c37b9216



In [None]:
import pandas as pd
# import urllib
import urllib.request
import json
import numpy as np
# from glob import glob
import os
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 1000)

In [None]:
TW_Region = pd.read_csv(
        "https://github.com/LeBronWilly/TW_Weather_FCST/raw/main/TW_Region.csv",
        encoding='utf8')
TW_Region

In [None]:
data_source = "https://opendata.cwb.gov.tw/api/v1/rest/datastore/F-C0032-001?Authorization=CWB-4FB338DD-B0B6-49EC-BDD4-8293D48B8071&format=JSON&locationName=&elementName=&sort=time"
json_url = urllib.request.urlopen(data_source)
data = json.loads(json_url.read())
data

In [None]:
data_df = pd.json_normalize(data["records"],
                            record_path = ["location","weatherElement","time"],
                            meta = [['location',"locationName"],
                                    ['location',"weatherElement","elementName"],
                                    'datasetDescription',
                                   ],
                           )
data_df.columns = [x.split(".")[-1] for x in data_df.columns]
data_df["parameterUnit"].replace("百分比", "%", inplace=True)
data_df["parameterUnit"].replace("C", "°C", inplace=True)
data_df["parameterUnit"].replace(np.nan, "", inplace=True)
data_df["elementName"].replace("Wx", "Weather Forcast", inplace=True)
data_df["elementName"].replace("PoP", "Probability of Precipitation", inplace=True)
data_df["elementName"].replace("MinT", "Min Temperature", inplace=True)
data_df["elementName"].replace("MaxT", "Max Temperature", inplace=True)
data_df["elementName"].replace("CI", "Comfort Index", inplace=True)
data_df["Parameter"] = data_df["parameterName"] + data_df["parameterUnit"]
data_df["startTime"] = data_df["startTime"].apply(lambda x: x.replace("-", "/")[:-3])
data_df["endTime"] = data_df["endTime"].apply(lambda x: x.replace("-", "/")[:-3])
data_df = data_df.merge(TW_Region, how="left", left_on='locationName', right_on='City/County', validate="many_to_one")
data_df["locationName"].replace("臺", "台", inplace=True, regex=True)

data_df

In [None]:
data_df_pivot = pd.pivot_table(data_df,
                               index=["startTime", "endTime", "Region", 'locationName'], 
                               columns=["elementName"], 
                               values=["Parameter"],
                               aggfunc= lambda x: x).reset_index()
data_df_pivot.columns = ["startTime","endTime", "Region","locationName","Comfort Index",
                         "Max Temperature","Min Temperature","Probability of Precipitation","Weather Forcast"]
data_df_pivot = data_df_pivot.sort_values(by=["locationName","startTime","endTime"]).reset_index(drop=True)
data_df_pivot["Period"] = data_df_pivot["startTime"].str.cat(data_df_pivot["endTime"], sep =" ~ ")
data_df_pivot["Temperature"] = data_df_pivot["Max Temperature"].str.cat(data_df_pivot["Min Temperature"], sep =" ~ ")
data_df_pivot = data_df_pivot.rename(columns={"locationName": "Location", "Probability of Precipitation": "PoP", "Weather Forcast": "Weather FCST"}, errors="raise")

data_df_pivot

In [56]:
data_source = "https://opendata.cwb.gov.tw/api/v1/rest/datastore/F-D0047-091?Authorization=CWB-4FB338DD-B0B6-49EC-BDD4-8293D48B8071&sort=time"
json_url = urllib.request.urlopen(data_source)
data = json.loads(json_url.read())
data

{'success': 'true',
 'result': {'resource_id': 'F-D0047-091',
  'fields': [{'id': 'contentDescription', 'type': 'String'},
   {'id': 'datasetDescription', 'type': 'String'},
   {'id': 'locationsName', 'type': 'String'},
   {'id': 'dataid', 'type': 'String'},
   {'id': 'locationName', 'type': 'String'},
   {'id': 'geocode', 'type': 'Double'},
   {'id': 'lat', 'type': 'Double'},
   {'id': 'lon', 'type': 'Double'},
   {'id': 'elementName', 'type': 'String'},
   {'id': 'description', 'type': 'String'},
   {'id': 'startTime', 'type': 'Timestamp'},
   {'id': 'endTime', 'type': 'Timestamp'},
   {'id': 'dataTime', 'type': 'Timestamp'},
   {'id': 'value', 'type': 'String'},
   {'id': 'measures', 'type': 'String'}]},
 'records': {'locations': [{'datasetDescription': '臺灣各縣市鄉鎮未來1週逐12小時天氣預報',
    'locationsName': '台灣',
    'dataid': 'D0047-091',
    'location': [{'locationName': '新竹縣',
      'geocode': '10004000',
      'lat': '24.841245',
      'lon': '120.995698',
      'weatherElement': [{'eleme

In [57]:
data_df = pd.json_normalize(data["records"],
                            meta = [['locations','location','locationName'],
                                    ['locations','location','lon'],
                                    ['locations','location','lat'],
                                    ['locations',"location","weatherElement","elementName"],
                                    ['locations',"location","weatherElement","description"],
                                    ['locations',"location","weatherElement",'time',"startTime"],
                                    ['locations',"location","weatherElement",'time',"endTime"],
                                    ],
                            record_path = ['locations',"location","weatherElement",'time',"elementValue"])
data_df.columns = ["Value","Unit","Location","Long","Lat","Element_EN","Element","StartTime","EndTime"]
data_df = data_df[["StartTime","EndTime","Location","Element","Element_EN","Value","Unit","Long","Lat"]]
data_df

Unnamed: 0,StartTime,EndTime,Location,Element,Element_EN,Value,Unit,Long,Lat
0,2023-04-15 12:00:00,2023-04-15 18:00:00,新竹縣,12小時降雨機率,PoP12h,40,百分比,120.995698,24.841245
1,2023-04-15 18:00:00,2023-04-16 06:00:00,新竹縣,12小時降雨機率,PoP12h,10,百分比,120.995698,24.841245
2,2023-04-16 06:00:00,2023-04-16 18:00:00,新竹縣,12小時降雨機率,PoP12h,0,百分比,120.995698,24.841245
3,2023-04-16 18:00:00,2023-04-17 06:00:00,新竹縣,12小時降雨機率,PoP12h,0,百分比,120.995698,24.841245
4,2023-04-17 06:00:00,2023-04-17 18:00:00,新竹縣,12小時降雨機率,PoP12h,0,百分比,120.995698,24.841245
...,...,...,...,...,...,...,...,...,...
5847,2023-04-19 18:00:00,2023-04-20 06:00:00,新竹市,平均露點溫度,Td,21,攝氏度,120.96211,24.818109
5848,2023-04-20 06:00:00,2023-04-20 18:00:00,新竹市,平均露點溫度,Td,21,攝氏度,120.96211,24.818109
5849,2023-04-20 18:00:00,2023-04-21 06:00:00,新竹市,平均露點溫度,Td,20,攝氏度,120.96211,24.818109
5850,2023-04-21 06:00:00,2023-04-21 18:00:00,新竹市,平均露點溫度,Td,19,攝氏度,120.96211,24.818109


In [58]:
print(set(data_df["Element"]))
print(set(data_df["Element_EN"]))
print(set(data_df["Unit"]))
print(set(data_df["StartTime"]))

{'最小舒適度指數', '最高體感溫度', '平均溫度', '天氣現象', '紫外線指數', '風向', '平均相對濕度', '最大舒適度指數', '天氣預報綜合描述', '平均露點溫度', '最低溫度', '最高溫度', '12小時降雨機率', '最大風速', '最低體感溫度'}
{'UVI', 'RH', 'Wx', 'WS', 'MaxAT', 'PoP12h', 'WD', 'MaxCI', 'MinAT', 'MinCI', 'WeatherDescription', 'MinT', 'Td', 'T', 'MaxT'}
{'紫外線指數', '攝氏度', 'NA', '自定義 CI 文字', '公尺/秒', '自定義 Wx 單位', '曝曬級數', '8方位', '自定義 Wx 文字', 'NA ', '蒲福風級', '百分比'}
{'2023-04-21 18:00:00', '2023-04-19 18:00:00', '2023-04-20 06:00:00', '2023-04-17 06:00:00', '2023-04-15 12:00:00', '2023-04-16 18:00:00', '2023-04-18 06:00:00', '2023-04-18 18:00:00', '2023-04-15 18:00:00', '2023-04-17 18:00:00', '2023-04-20 18:00:00', '2023-04-16 06:00:00', '2023-04-19 06:00:00', '2023-04-21 06:00:00'}


In [None]:
data_df.info()

In [None]:
sorted(set(data_df_pivot["locationName"]))

In [None]:
sorted(set(data_df_pivot["locationName"]))