In [1]:
import pandas as pd
from itertools import product
import cartopy.crs as ccrs
import xarray as xr
import numpy as np

def extract_wind_speed(x):
    if isinstance(x, str):
        if float(x.split(",")[3]) == 9999:
            return np.nan
        else:
            return float(x.split(",")[3]) / 10
    else:
        return np.nan


def extract_uv(row):
    x = row.WND
    if isinstance(x, str):
        if float(x.split(",")[3]) == 9999:
            return np.nan, np.nan
        elif (float(x.split(",")[0]) == 999) & (x.split(",")[2] == "C"):
            return 0.0, 0.0
        else:
            speed = float(x.split(",")[3]) / 10
            angle = float(x.split(",")[0])
            u = (
                -np.sin(np.radians(angle)) * speed
            )  # negative because we get "the angle, measured in a clockwise direction, between true north and the direction from which the wind is blowing."
            v = -np.cos(np.radians(angle)) * speed
            return u, v
    else:
        return np.nan, np.nan


def extract_mm(x):
    """
    Example:
        01,0000,9,5 -> time of measurement, 10^-5 m, quality flag, source of data ( different data catalog)

    quality flag - always valid when data present in csv
    source of data - different data catalog

    Returns:
        total precip in mm over the previous hour

    """
    if isinstance(x, str):
        if float(x.split(",")[1]) == 9999.0:
            return np.nan
        else:
            return float(x.split(",")[1]) / 10
    else:
        return np.nan


def lonlat_to_xy(projection, longitude, latitude):
    x, y = projection.transform_point(longitude, latitude, ccrs.PlateCarree())
    xmin = -2697520.142522
    x_dist = 3000.0
    ymin = -1587306.152557
    y_dist = 3000.0
    x_reg = 834
    y_reg = 353

    x = ((x - xmin) / x_dist) - x_reg
    y = ((y - ymin) / y_dist) - y_reg
    return x, y


def latlon_to_xy_wrapper(row):
    x, y = lonlat_to_xy(projection, row["LONGITUDE"], row["LATITUDE"])
    return x, y

import numpy as np

def extract_tmp(x):
    """
    Decode ISD TMP field formatted as '±TTTT,Q'
    Example: '+0022,1' -> 2.2°C
    """
    if not isinstance(x, str) or "," not in x:
        return np.nan

    val_str, qc_str = x.split(",")

    # Parse numeric
    try:
        val = int(val_str)
        qc_flag = int(qc_str)
    except ValueError:
        return np.nan
    # Missing indicator (9999 or -9999)
    if abs(val) >= 9999:
        return np.nan

    # Accept only QC flags {0, 1} → 0=good, 1=suspect
    if qc_flag not in [0, 1]:
        return np.nan

    return val / 10.0


In [8]:
import glob
from pathlib import Path

isd_path = "/beegfs/muduchuru/data/isd/"
type = "daily"
year = "1999"
files = list(Path(isd_path).joinpath(str(type), str(year)).glob("*.csv"))


In [9]:
projection = ccrs.LambertConformal(
    central_longitude=262.5,
    central_latitude=38.5,
    standard_parallels=(38.5, 38.5),
    globe=ccrs.Globe(semimajor_axis=6371229, semiminor_axis=6371229),
)
year = 2017
yafter = year + 1

# import ISD data in csv format
stations = pd.read_csv(files[0])
stations["DATE"] = pd.to_datetime(stations.DATE)
stations["tp"] = stations.AA1.map(extract_mm)
stations[["u10", "v10"]] = stations.apply(extract_uv, axis=1, result_type="expand")
stations["temp_C"] = stations.TMP.map(extract_tmp)
stations["dew_C"] = stations.DEW.map(extract_tmp)  # similar format



AttributeError: 'DataFrame' object has no attribute 'AA1'

In [12]:
stations.iloc[0]

STATION                     17140099999
DATE                1999-01-01 00:00:00
LATITUDE                      39.816667
LONGITUDE                          34.8
ELEVATION                        1298.0
NAME                         YOZGAT, TU
TEMP                               30.5
TEMP_ATTRIBUTES                       6
DEWP                               22.8
DEWP_ATTRIBUTES                       6
SLP                              9999.9
SLP_ATTRIBUTES                        0
STP                               879.9
STP_ATTRIBUTES                        6
VISIB                               9.3
VISIB_ATTRIBUTES                      6
WDSP                                5.7
WDSP_ATTRIBUTES                       6
MXSPD                               8.9
GUST                              999.9
MAX                                36.0
MAX_ATTRIBUTES                        *
MIN                                25.9
MIN_ATTRIBUTES                         
PRCP                                0.0


In [7]:
stations[["DATE","LATITUDE","LONGITUDE","temp_C","dew_C","tp","u10","v10"]].dropna()

Unnamed: 0,DATE,LATITUDE,LONGITUDE,temp_C,dew_C,tp,u10,v10
0,1999-01-01 00:00:00,39.816667,34.8,-1.2,-2.9,0.0,-1.818653,-1.050000
1,1999-01-01 06:00:00,39.816667,34.8,-3.4,-5.3,0.0,-3.550704,-2.050000
3,1999-01-01 12:00:00,39.816667,34.8,2.2,-4.3,0.0,-3.983717,-2.300000
5,1999-01-01 18:00:00,39.816667,34.8,-2.6,-7.2,0.0,-2.913047,-1.060262
6,1999-01-02 00:00:00,39.816667,34.8,-6.1,-9.2,0.0,-0.984808,-0.173648
...,...,...,...,...,...,...,...,...
2560,1999-12-30 18:00:00,39.816667,34.8,5.0,3.8,0.0,-2.068096,-0.364661
2561,1999-12-31 00:00:00,39.816667,34.8,2.2,1.1,0.0,-1.299038,-0.750000
2562,1999-12-31 06:00:00,39.816667,34.8,2.0,0.7,0.0,-1.973355,-0.718242
2563,1999-12-31 12:00:00,39.816667,34.8,11.8,-3.3,0.0,1.231273,3.382893
