In [1]:
import glob
import gzip
import os
import shutil
import sys
import time
import warnings
import zipfile
from datetime import date

import cdsapi
import h5py
import numpy
import numpy as np
import pandas as pd
import urllib3
import xarray

warnings.filterwarnings("ignore")
import pycountry

sys.path.append(os.getcwd() + "/../cds-backend/code/")
import copy
import glob
import json

import cds_eua4 as eua
import numba
import pandas
import psutil
import requests
from numba import njit


def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]


def datetime_to_seconds(dates, ref="1900-01-01T00:00:00"):
    """from datetime64 to seconds since 1900-01-01 00:00:00"""
    return ((dates - np.datetime64(ref)) / np.timedelta64(1, "s")).astype(np.int64)


def seconds_to_datetime(seconds, ref="1900-01-01"):
    """from seconds to datetime64"""
    seconds = np.asarray(seconds)
    return pd.to_datetime(seconds, unit="s", origin=ref)


import matplotlib
import matplotlib.pylab as plt
import matplotlib.pyplot as maplt

matplotlib.rcParams.update({"font.size": 20})
matplotlib.rcParams["figure.figsize"] = (20, 10)
font = {
    "family": "normal",
    # 'weight' : 'bold',
    "size": 22,
}
matplotlib.rc("font", **font)

In [3]:
stations = glob.glob(
    "/mnt/users/scratch/leo/scratch/converted_v11/long/*11035*.nc"  # 0-20000-0-01107*
)
stations[:10]

['/mnt/users/scratch/leo/scratch/converted_v11/long/0-20001-0-11035_CEUAS_merged_v1.nc']

In [4]:
bad_stats = []
for stat in stations[:]:
    target_station = glob.glob(
        "/mnt/users/scratch/leo/scratch/converted_v11/long/" + stat.split("/")[-1]
    )[0]

    with eua.CDMDataset(target_station) as file:
        df = file.to_dataframe(
            groups=["observations_table"],
            variables=[
                "date_time",
                "z_coordinate",
                "z_coordinate_type",
                "z_coordinate_method",
                "observed_variable",
                "observation_value",
            ],
        )
    # df["date_time"] = seconds_to_datetime(df.date_time.values)
    df_geo = df[df.observed_variable == 117]
    df_nan = df_geo[np.isnan(df_geo.z_coordinate)]
    if len(df_nan) > 0:
        print(stat)
        print(target_station)
        display(df_nan)
        print()
        bad_stats.append(target_station)
        if len(bad_stats) > 8:
            break

In [5]:
df_geo

Unnamed: 0,date_time,z_coordinate,z_coordinate_type,z_coordinate_method,observed_variable,observation_value
51544986,1908-10-28 12:54:00,101205.007812,1,-2147483648,117,98.066498
51544987,1908-10-29 10:54:00,101205.007812,1,-2147483648,117,98.066498
51544988,1908-10-30 09:37:00,98723.343750,1,-2147483648,117,2147.656250
51544989,1908-12-03 07:19:00,77072.539062,1,-2147483648,117,22064.962891
51544990,1908-12-03 07:19:00,82005.585938,1,-2147483648,117,17161.636719
...,...,...,...,...,...,...
64850592,2022-12-31 11:30:23,99440.000000,1,-2147483648,117,2314.369385
64850593,2022-12-31 11:30:23,99470.000000,1,-2147483648,117,2294.756104
64850594,2022-12-31 11:30:23,99530.000000,1,-2147483648,117,2235.916260
64850595,2022-12-31 11:30:23,99540.000000,1,-2147483648,117,2226.109619


In [15]:
checksorting = (lambda zc: np.all(zc[:-1] <= zc[1:]))
sub_df = df[df.observed_variable == 117]
for i in sub_df.date_time.iloc[:]:
    test_df = sub_df[sub_df.date_time == i]
    test_zc = test_df.z_coordinate
    if not checksorting(np.array(test_zc)):
        display(test_df)
    

In [11]:
test_df

Unnamed: 0,date_time,z_coordinate,z_coordinate_type,z_coordinate_method,observed_variable,observation_value
64848064,2022-12-31 11:30:23,380.0,1,-2147483648,117,364807.375000
64848065,2022-12-31 11:30:23,390.0,1,-2147483648,117,363454.062500
64848066,2022-12-31 11:30:23,400.0,1,-2147483648,117,361698.687500
64848067,2022-12-31 11:30:23,410.0,1,-2147483648,117,360002.125000
64848068,2022-12-31 11:30:23,420.0,1,-2147483648,117,358364.406250
...,...,...,...,...,...,...
64850592,2022-12-31 11:30:23,99440.0,1,-2147483648,117,2314.369385
64850593,2022-12-31 11:30:23,99470.0,1,-2147483648,117,2294.756104
64850594,2022-12-31 11:30:23,99530.0,1,-2147483648,117,2235.916260
64850595,2022-12-31 11:30:23,99540.0,1,-2147483648,117,2226.109619


##  0-20000-0-01107*

In [31]:
df[np.logical_and(df.date_time == "1925-04-14 09:00:00", df.observed_variable == 117)]

Unnamed: 0,date_time,z_coordinate,z_coordinate_type,z_coordinate_method,observed_variable,observation_value
158950,1925-04-14 09:00:00,1469.387695,0,-2147483648,117,14409.771484
158951,1925-04-14 09:00:00,2607.142822,0,-2147483648,117,25567.337891
158952,1925-04-14 09:00:00,270.408173,0,-2147483648,117,2651.798096
158953,1925-04-14 09:00:00,765.306152,0,-2147483648,117,7505.089355


In [32]:
df[np.logical_and(df.date_time == "1925-04-14 09:00:00", df.observed_variable == 140)]

Unnamed: 0,date_time,z_coordinate,z_coordinate_type,z_coordinate_method,observed_variable,observation_value
324293,1925-04-14 09:00:00,1469.387695,0,-2147483648,140,6.8944
324294,1925-04-14 09:00:00,2607.142822,0,-2147483648,140,9.396926
324295,1925-04-14 09:00:00,270.408173,0,-2147483648,140,8.86327
324296,1925-04-14 09:00:00,765.306152,0,-2147483648,140,7.878462


In [33]:
df

Unnamed: 0,date_time,z_coordinate,z_coordinate_type,z_coordinate_method,observed_variable,observation_value
0,1906-09-05 13:33:00,10750.0,0,-2147483648,106,2.200000e+01
1,1906-09-05 13:33:00,11450.0,0,-2147483648,106,3.500000e+02
2,1906-09-05 13:33:00,11900.0,0,-2147483648,106,2.720000e+02
3,1906-09-05 13:33:00,12300.0,0,-2147483648,106,3.360000e+02
4,1906-09-05 13:33:00,124.0,0,-2147483648,106,0.000000e+00
...,...,...,...,...,...,...
402548,1956-12-31 03:00:00,2100.0,0,-2147483648,140,-2.673109e+00
402549,1956-12-31 03:00:00,300.0,0,-2147483648,140,-9.444492e-07
402550,1956-12-31 03:00:00,3000.0,0,-2147483648,140,8.669847e+00
402551,1956-12-31 03:00:00,600.0,0,-2147483648,140,-6.746066e-07
