# Temperature Dataset Downloader

Python Version: 3.13.5

In [18]:
from typing import Literal

def getBaseUrl(dataType: Literal["CLMTEMP", "CLMMAXT", "CLMMINT"],
           rformat: Literal["csv", "json"] = "csv",
           lang: str = "en") -> str:
    return f"https://data.weather.gov.hk/weatherAPI/opendata/opendata.php?dataType={dataType}&lang={lang}&rformat={rformat}"

In [19]:
# period for data collection
START_YEAR  = 2014
END_YEAR    = 2025

## Daily Mean Temperature

In [20]:
from pydantic import BaseModel
from typing import Union, Literal

class MeanTemperatureDataPoint(BaseModel):
    year            :   int
    month           :   int
    day             :   int
    station         :   str
    value           :   Union[float, Literal["***",]]
    completeness    :   Literal["C", "#", '']

In [21]:
MEAN_TEMP_STATIONS = [
"CCH", # Cheung Chau
"CWB", # Clear Water Bay
"HKA", # Hong Kong International Airport
"HKO", # Hong Kong Observatory
"HKP", # Hong Kong Park
"HKS", # Wong Chuk Hang
"HPV", # Happy Valley
"JKB", # Tseung Kwan O
"KLT", # Kowloon City
"KP" , # King's Park
"KSC", # Kau Sai Chau
"KTG", # Kwun Tong
"LFS", # Lau Fau Shan
"NGP", # Ngong Ping
"PEN", # Peng Chau
"PLC", # Tai Mei Tuk
"SE1", # Kai Tak Runway Park
"SEK", # Shek Kong
"SHA", # Sha Tin
"SKG", # Sai Kung
"SKW", # Shau Kei Wan
"SSH", # Sheung Shui
"SSP", # Sham Shui Po
"STY", # Stanley
"TC" , # Tate's Cairn
"TKL", # Ta Kwu Ling
"TMS", # Tai Mo Shan
"TPO", # Tai Po (Conservation Studies Centre)
"TU1", # Tuen Mun Children and Juvenile Home
"TW" , # Tsuen Wan Shing Mun Valley
"TWN", # Tsuen Wan
"TY1", # New Tsing Yi Station
"TYW", # Pak Tam Chung (Tsak Yue Wu)
"VP1", # The Peak
"WGL", # Waglan Island
"WLP", # Wetland Park
"WTS", # Wong Tai Sin
"YCT", # Tai Po (Yuan Chau Tsai Park)
"YLP", # Yuen Long Park
]

In [22]:
import requests
def getDailyMeanTempJSON(year: int, station: str) -> list[MeanTemperatureDataPoint]:
    url = f"{getBaseUrl("CLMTEMP", "json")}&year={year}&station={station}"
    response = requests.get(url)
    if (response.status_code != 200):
        raise Exception(f"DMT - unexpected status code ({response.status_code}) for y=({year}), s=({station}). Response Text: {response.text}")
    try:
        raw_data =  response.json()
    except Exception as e:
        raise e
    # data = list[['年/Year','月/Month','日/Day','數值/Value', '數據完整性/data Completeness']]
    # 'legend': ['*** 沒有數據/unavailable','# 數據不完整/data incomplete','C 數據完整/data Complete']

    datapoint_list: list[MeanTemperatureDataPoint] = []

    for row in raw_data["data"]:
        datapoint = MeanTemperatureDataPoint(station=station,
                                             year        = row[0],
                                             month       = row[1],
                                             day         = row[2],
                                             value       = row[3],
                                             completeness= row[4])
        datapoint_list.append(datapoint)
    return datapoint_list

In [23]:
mean_datapoint_list: list[MeanTemperatureDataPoint] = []
for station in MEAN_TEMP_STATIONS:
    for year in range(START_YEAR, END_YEAR + 1):
        try:
            mean_datapoint_list.extend(getDailyMeanTempJSON(year, station))
        except Exception as e:
            print(f"Mean Temperature Download Error: could not load y=({year}) station=({station}) e=({e})")



Mean Temperature Download Error: could not load y=(2014) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2015) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2016) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2017) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2023) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2024) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2025) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2014) station=(YCT) e=(Expecting value: line 1 column 1 (char 0))
Mean Temperature Download Error: could not load y=(2015) station

In [24]:
def save_datapoint_list(datapoint_list: list[BaseModel], absFilePath: str):
    with open(absFilePath, mode="w") as f:
        for datapoint in datapoint_list:
            f.write(datapoint.model_dump_json())
            f.write("\n")
        f.close()

In [25]:
import os

saveFilePath = os.path.join(os.getcwd(), "files", "meanTemperature.jsonl")

save_datapoint_list(mean_datapoint_list, saveFilePath)

## Daily Max Temperature

In [26]:
from pydantic import BaseModel
from typing import Union, Literal

class MaxTemperatureDataPoint(BaseModel):
    year            :   int
    month           :   int
    day             :   int
    station         :   str
    value           :   Union[float, Literal["***",]]
    completeness    :   Literal["C", "#", '']

In [27]:
MAX_TEMP_STATIONS = [
"CCH",   # Cheung Chau
"CWB",   # Clear Water Bay
"HKA",   # Hong Kong International Airport
"HKO",   # Hong Kong Observatory
"HKP",   # Hong Kong Park
"HKS",   # Wong Chuk Hang
"HPV",   # Happy Valley
"JKB",   # Tseung Kwan O
"KLT",   # Kowloon City
"KP" ,   # King's Park
"KSC",   # Kau Sai Chau
"KTG",   # Kwun Tong
"LFS",   # Lau Fau Shan
"NGP",   # Ngong Ping
"PEN",   # Peng Chau
"PLC",   # Tai Mei Tuk
"SE1",   # Kai Tak Runway Park
"SEK",   # Shek Kong
"SHA",   # Sha Tin
"SKG",   # Sai Kung
"SKW",   # Shau Kei Wan
"SSH",   # Sheung Shui
"SSP",   # Sham Shui Po
"STY",   # Stanley
"TC" ,   # Tate's Cairn
"TKL",   # Ta Kwu Ling
"TMS",   # Tai Mo Shan
"TPO",   # Tai Po (Conservation Studies Centre)
"TU1",   # Tuen Mun Children and Juvenile Home
"TW" ,   # Tsuen Wan Shing Mun Valley
"TWN",   # Tsuen Wan
"TY1",   # New Tsing Yi Station
"TYW",   # Pak Tam Chung (Tsak Yue Wu)
"VP1",   # The Peak
"WGL",   # Waglan Island
"WLP",   # Wetland Park
"WTS",   # Wong Tai Sin
"YCT",   # Tai Po (Yuan Chau Tsai Park)
"YLP",   # Yuen Long Park
]

In [None]:
import requests
def getDailyMaxTempJSON(year: int, station: str) -> list[MaxTemperatureDataPoint]:
    url = f"{getBaseUrl("CLMMAXT", "json")}&year={year}&station={station}"
    response = requests.get(url)
    if (response.status_code != 200):
        raise Exception(f"DailyMaxTemp - unexpected status code ({response.status_code}) for y=({year}), s=({station}). Response Text: {response.text}")
    try:
        raw_data =  response.json()
    except Exception as e:
        raise e
    # data = list[['年/Year','月/Month','日/Day','數值/Value', '數據完整性/data Completeness']]
    # 'legend': ['*** 沒有數據/unavailable','# 數據不完整/data incomplete','C 數據完整/data Complete']

    datapoint_list: list[MaxTemperatureDataPoint] = []

    for row in raw_data["data"]:
        datapoint = MaxTemperatureDataPoint(station=station,
                                             year        = row[0],
                                             month       = row[1],
                                             day         = row[2],
                                             value       = row[3],
                                             completeness= row[4])
        datapoint_list.append(datapoint)
    return datapoint_list

In [30]:
max_datapoint_list: list[MaxTemperatureDataPoint] = []
for station in MAX_TEMP_STATIONS:
    for year in range(START_YEAR, END_YEAR + 1):
        try:
            max_datapoint_list.extend(getDailyMaxTempJSON(year, station))
        except Exception as e:
            print(f"Max Temperature Download Error: could not load y=({year}) station=({station}) e=({e})")

Max Temperature Download Error: could not load y=(2014) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2015) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2016) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2017) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2023) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2024) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2025) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2014) station=(YCT) e=(Expecting value: line 1 column 1 (char 0))
Max Temperature Download Error: could not load y=(2015) station=(YCT) e=

In [31]:
import os

saveFilePath = os.path.join(os.getcwd(), "files", "maxTemperature.jsonl")
save_datapoint_list(max_datapoint_list, saveFilePath)

## Daily Min Temperature

In [32]:
from pydantic import BaseModel
from typing import Union, Literal

class MinTemperatureDataPoint(BaseModel):
    year            :   int
    month           :   int
    day             :   int
    station         :   str
    value           :   Union[float, Literal["***",]]
    completeness    :   Literal["C", "#", '']

In [33]:
MIN_TEMP_STATIONS = [
"CCH",   # Cheung Chau
"CWB",   # Clear Water Bay
"HKA",   # Hong Kong International Airport
"HKO",   # Hong Kong Observatory
"HKP",   # Hong Kong Park
"HKS",   # Wong Chuk Hang
"HPV",   # Happy Valley
"JKB",   # Tseung Kwan O
"KLT",   # Kowloon City
"KP" ,   # King's Park
"KSC",   # Kau Sai Chau
"KTG",   # Kwun Tong
"LFS",   # Lau Fau Shan
"NGP",   # Ngong Ping
"PEN",   # Peng Chau
"PLC",   # Tai Mei Tuk
"SE1",   # Kai Tak Runway Park
"SEK",   # Shek Kong
"SHA",   # Sha Tin
"SKG",   # Sai Kung
"SKW",   # Shau Kei Wan
"SSH",   # Sheung Shui
"SSP",   # Sham Shui Po
"STY",   # Stanley
"TC" ,   # Tate's Cairn
"TKL",   # Ta Kwu Ling
"TMS",   # Tai Mo Shan
"TPO",   # Tai Po (Conservation Studies Centre)
"TU1",   # Tuen Mun Children and Juvenile Home
"TW" ,   # Tsuen Wan Shing Mun Valley
"TWN",   # Tsuen Wan
"TY1",   # New Tsing Yi Station
"TYW",   # Pak Tam Chung (Tsak Yue Wu)
"VP1",   # The Peak
"WGL",   # Waglan Island
"WLP",   # Wetland Park
"WTS",   # Wong Tai Sin
"YCT",   # Tai Po (Yuan Chau Tsai Park)
"YLP",   # Yuen Long Park
]

In [34]:
import requests
def getDailyMinTempJSON(year: int, station: str) -> list[MinTemperatureDataPoint]:
    url = f"{getBaseUrl("CLMMINT", "json")}&year={year}&station={station}"
    response = requests.get(url)
    if (response.status_code != 200):
        raise Exception(f"DMT - unexpected status code ({response.status_code}) for y=({year}), s=({station}). Response Text: {response.text}")
    try:
        raw_data =  response.json()
    except Exception as e:
        raise e
    # data = list[['年/Year','月/Month','日/Day','數值/Value', '數據完整性/data Completeness']]
    # 'legend': ['*** 沒有數據/unavailable','# 數據不完整/data incomplete','C 數據完整/data Complete']

    datapoint_list: list[MinTemperatureDataPoint] = []

    for row in raw_data["data"]:
        datapoint = MinTemperatureDataPoint(station=station,
                                             year        = row[0],
                                             month       = row[1],
                                             day         = row[2],
                                             value       = row[3],
                                             completeness= row[4])
        datapoint_list.append(datapoint)
    return datapoint_list

In [35]:
min_datapoint_list: list[MinTemperatureDataPoint] = []
for station in MIN_TEMP_STATIONS:
    for year in range(START_YEAR, END_YEAR + 1):
        try:
            min_datapoint_list.extend(getDailyMinTempJSON(year, station))
        except Exception as e:
            print(f"Min Temperature Download Error: could not load y=({year}) station=({station}) e=({e})")

Min Temperature Download Error: could not load y=(2014) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2015) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2016) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2017) station=(CWB) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2023) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2024) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2025) station=(TPO) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2014) station=(YCT) e=(Expecting value: line 1 column 1 (char 0))
Min Temperature Download Error: could not load y=(2015) station=(YCT) e=

In [36]:
import os

saveFilePath = os.path.join(os.getcwd(), "files", "minTemperature.jsonl")
save_datapoint_list(min_datapoint_list, saveFilePath)