# Mean, Max, Min Temperature Combiner

Python Version: 3.13.5

In [14]:
import os

MAX_TEMP_FILE_PATH = os.path.join(os.getcwd(), "files", "maxTemperature.jsonl")
MIN_TEMP_FILE_PATH = os.path.join(os.getcwd(), "files", "minTemperature.jsonl")
MEAN_TEMP_FILE_PATH = os.path.join(os.getcwd(), "files", "meanTemperature.jsonl")

In [15]:
from pydantic import BaseModel
from typing import Union, Literal

class MeanTemperatureDataPoint(BaseModel):
    year            :   int
    month           :   int
    day             :   int
    station         :   str
    value           :   Union[float, Literal["***",]]
    completeness    :   Literal["C", "#", '']
    
    def key(self) -> str:
        return f"{self.station}/{self.year}/{self.month}/{self.day}"

class MaxTemperatureDataPoint(BaseModel):
    year            :   int
    month           :   int
    day             :   int
    station         :   str
    value           :   Union[float, Literal["***",]]
    completeness    :   Literal["C", "#", '']

    def key(self) -> str:
        return f"{self.station}/{self.year}/{self.month}/{self.day}"

class MinTemperatureDataPoint(BaseModel):
    year            :   int
    month           :   int
    day             :   int
    station         :   str
    value           :   Union[float, Literal["***",]]
    completeness    :   Literal["C", "#", '']

    def key(self) -> str:
        return f"{self.station}/{self.year}/{self.month}/{self.day}"


class CombinedTemperatureDataPoint(BaseModel):
    year                    :   int
    month                   :   int
    day                     :   int
    station                 :   str
    minTemp                 :   Union[float, Literal["***",]]
    minTempCompleteness     :   Literal["C", "#", '']
    maxTemp                 :   Union[float, Literal["***",]]
    maxTempCompleteness     :   Literal["C", "#", '']
    meanTemp                :   Union[float, Literal["***",]]
    meanTempCompleteness    :   Literal["C", "#", '']

In [16]:
import json

def load_json_list_from_jsonl(absFilePath: str)-> list[dict]:
    dataset: list[dict] = []
    with open(absFilePath, mode="r") as f:
        lines = f.readlines()
        for line in lines:
            line = line[:-1]
            dataset.append(json.loads(line))
    return dataset

def load_daily_mean_temperature_data_points(absFilePath: str) -> list[MeanTemperatureDataPoint]:
    raw_dataset = load_json_list_from_jsonl(absFilePath)
    mean_datapoint_list: list[MeanTemperatureDataPoint] = []
    for datapoint in raw_dataset:
        mean_datapoint_list.append(MeanTemperatureDataPoint(**datapoint))
    return mean_datapoint_list

def load_daily_min_temperature_data_points(absFilePath: str) -> list[MinTemperatureDataPoint]:
    raw_dataset = load_json_list_from_jsonl(absFilePath)
    mean_datapoint_list: list[MinTemperatureDataPoint] = []
    for datapoint in raw_dataset:
        mean_datapoint_list.append(MinTemperatureDataPoint(**datapoint))
    return mean_datapoint_list

def load_daily_max_temperature_data_points(absFilePath: str) -> list[MaxTemperatureDataPoint]:
    raw_dataset = load_json_list_from_jsonl(absFilePath)
    mean_datapoint_list: list[MaxTemperatureDataPoint] = []
    for datapoint in raw_dataset:
        mean_datapoint_list.append(MaxTemperatureDataPoint(**datapoint))
    return mean_datapoint_list

In [17]:
mean_datapoint_list = load_daily_mean_temperature_data_points(MEAN_TEMP_FILE_PATH)
min_datapoint_list = load_daily_min_temperature_data_points(MIN_TEMP_FILE_PATH)
max_datapoint_list = load_daily_max_temperature_data_points(MAX_TEMP_FILE_PATH)

In [18]:
mean_datapoint_dict: dict[str, MeanTemperatureDataPoint] = dict()
for mean_datapoint in mean_datapoint_list:
    if mean_datapoint.key() in mean_datapoint_dict:
        print(f"Warning: Duplicate Mean Datapoint ({mean_datapoint.key()})")
    mean_datapoint_dict[mean_datapoint.key()] = mean_datapoint

In [19]:
min_datapoint_dict: dict[str, MinTemperatureDataPoint] = dict()
for min_datapoint in min_datapoint_list:
    if min_datapoint.key() in min_datapoint_dict:
        print(f"Warning: Duplicate Min Datapoint ({min_datapoint.key()})")
    min_datapoint_dict[min_datapoint.key()] = min_datapoint

In [20]:
combined_datapoint_list: list[CombinedTemperatureDataPoint] = []

for max_datapoint in max_datapoint_list:
    maxTemp = max_datapoint.value
    maxTempCompleteness = max_datapoint.completeness
    
    minTemp = "***"
    minTempCompleteness = "#"

    meanTemp = "***"
    meanTempCompleteness = "#"
    
    if max_datapoint.key() in min_datapoint_dict:
        min_datapoint = min_datapoint_dict[max_datapoint.key()]
        minTemp = min_datapoint.value
        minTempCompleteness = min_datapoint.completeness
    else:
        print(f"Warning: Min Datapoint Not Found ({max_datapoint.key()})")

    if max_datapoint.key() in mean_datapoint_dict:
        mean_datapoint = mean_datapoint_dict[max_datapoint.key()]
        meanTemp = mean_datapoint.value
        meanTempCompleteness = mean_datapoint.completeness
    else:
        print(f"Warning: Mean Datapoint Not Found ({max_datapoint.key()})")
    
    combined_datapoint = CombinedTemperatureDataPoint(
        year                = max_datapoint.year,
        month               = max_datapoint.month,
        day                 = max_datapoint.day,
        station             = max_datapoint.station,
        minTemp             = minTemp,
        minTempCompleteness = minTempCompleteness,
        maxTemp             = max_datapoint.value,
        maxTempCompleteness = max_datapoint.completeness,
        meanTemp            = meanTemp,
        meanTempCompleteness = meanTempCompleteness)
    
    combined_datapoint_list.append(combined_datapoint)

In [21]:
def save_datapoint_list(datapoint_list: list[BaseModel], absFilePath: str):
    with open(absFilePath, mode="w") as f:
        for datapoint in datapoint_list:
            f.write(datapoint.model_dump_json())
            f.write("\n")
        f.close()

In [22]:
import os

saveFilePath = os.path.join(os.getcwd(), "files", "combinedTemperature.jsonl")
save_datapoint_list(combined_datapoint_list, saveFilePath)