# Test GDSR and GPCC data reader classes

In [1]:
import rainfallqc
print(rainfallqc.__version__)

0.0.7


In [2]:
from abc import ABC, abstractmethod
import datetime
import glob
import os
import zipfile

import pandas as pd
import polars as pl
import numpy as np

from rainfallqc.utils import neighbourhood_utils, data_readers

## Testing

In [4]:
gdsr_reader = data_readers.GDSRNetworkReader(path_to_gdsr_dir="../../data/GDSR/")
gpcc_reader = data_readers.GPCCNetworkReader(path_to_gpcc_dir="../../data/GPCC/", time_res='tw')

In [5]:
gdsr_reader.metadata

station_id,country,original_station_number,original_station_name,path_to_original_data,latitude,longitude,start_datetime,end_datetime,elevation,number_of_records,percent_missing_data,original_timestep,new_timestep,original_units,new_units,time_zone,daylight_saving_info,no_data_value,resolution,other,path
str,str,str,str,str,f64,f64,datetime[μs],datetime[μs],str,str,str,str,str,str,str,str,str,str,str,str,str
"""DE_00310""","""Germany""","""00310""","""NA""","""B:/INTENSE data/Original data/…",51.0662,8.5373,2006-01-01 00:00:00,2010-12-31 23:00:00,"""590m""","""43824""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_00310.txt"""
"""DE_00389""","""Germany""","""00389""","""NA""","""B:/INTENSE data/Original data/…",51.0148,8.4318,2009-11-01 00:00:00,2010-12-31 23:00:00,"""436m""","""10224""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_00389.txt"""
"""DE_00390""","""Germany""","""00390""","""NA""","""B:/INTENSE data/Original data/…",50.9837,8.3679,2006-01-01 00:00:00,2010-12-31 23:00:00,"""610m""","""43824""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_00390.txt"""
"""DE_01300""","""Germany""","""01300""","""NA""","""B:/INTENSE data/Original data/…",51.254,8.1565,2006-01-01 00:00:00,2010-12-31 23:00:00,"""351m""","""43824""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_01300.txt"""
"""DE_02483""","""Germany""","""02483""","""NA""","""B:/INTENSE data/Original data/…",51.1803,8.4891,2006-01-01 00:00:00,2010-12-31 23:00:00,"""839m""","""43824""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_02483.txt"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""DE_03215""","""Germany""","""03215""","""NA""","""B:/INTENSE data/Original data/…",51.1681,8.7129,2007-06-01 00:00:00,2010-12-31 23:00:00,"""380m""","""31440""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_03215.txt"""
"""DE_04313""","""Germany""","""04313""","""NA""","""B:/INTENSE data/Original data/…",51.4966,8.4342,2006-01-01 00:00:00,2010-12-31 23:00:00,"""361m""","""43824""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_04313.txt"""
"""DE_04488""","""Germany""","""04488""","""NA""","""B:/INTENSE data/Original data/…",51.2129,8.2672,2006-08-01 00:00:00,2010-12-31 23:00:00,"""443m""","""38736""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_04488.txt"""
"""DE_06264""","""Germany""","""06264""","""NA""","""B:/INTENSE data/Original data/…",51.4143,8.6498,2006-01-01 00:00:00,2010-12-31 23:00:00,"""457m""","""43824""","""0.00""","""1hr""","""1hr""","""mm""","""mm""","""CET""","""NA""","""-999""","""0.10""","""""","""../../data/GDSR/DE_06264.txt"""


In [6]:
gpcc_reader.metadata

station_id,latitude,longitude,start_datetime,end_datetime,time_step,country,location,path
str,f64,f64,datetime[μs],datetime[μs],str,str,str,str
"""6303""",51.2915,8.5982,2002-12-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Willingen/Hochsauerland""","""../../data/GPCC/tw_6303.zip"""
"""1283""",51.0148,8.2824,1941-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Erndtebruck-Birkelbach""","""../../data/GPCC/tw_1283.zip"""
"""2483""",51.1803,8.4891,1955-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Kahler Asten""","""../../data/GPCC/tw_2483.zip"""
"""310""",51.0662,8.5375,1951-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Battenberg-Hof Karlsburg""","""../../data/GPCC/tw_310.zip"""
"""3215""",51.1683,8.7125,1931-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Medebach-Berge""","""../../data/GPCC/tw_3215.zip"""
"""3264""",51.3385,8.2709,1941-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Meschede""","""../../data/GPCC/tw_3264.zip"""
"""3798""",51.2827,8.4797,1950-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Olsberg-Brunskappel""","""../../data/GPCC/tw_3798.zip"""
"""480""",50.9275,8.4935,1931-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Biedenkopf-Wallau""","""../../data/GPCC/tw_480.zip"""
"""5360""",51.4319,8.2665,1941-01-02 07:00:00,2018-12-31 07:00:00,"""daily""","""DEU""","""Warstein-Hirschberg""","""../../data/GPCC/tw_5360.zip"""
"""5610""",51.1968,8.5268,1970-01-02 07:00:00,2008-12-31 07:00:00,"""daily""","""DEU""","""Winterberg""","""../../data/GPCC/tw_5610.zip"""


In [8]:
gpcc_reader.data_paths

{'6303': '../../data/GPCC/tw_6303.zip',
 '1283': '../../data/GPCC/tw_1283.zip',
 '2483': '../../data/GPCC/tw_2483.zip',
 '310': '../../data/GPCC/tw_310.zip',
 '3215': '../../data/GPCC/tw_3215.zip',
 '3264': '../../data/GPCC/tw_3264.zip',
 '3798': '../../data/GPCC/tw_3798.zip',
 '480': '../../data/GPCC/tw_480.zip',
 '5360': '../../data/GPCC/tw_5360.zip',
 '5610': '../../data/GPCC/tw_5610.zip'}

In [9]:
result = gpcc_reader.get_nearest_overlapping_neighbours_to_target(target_id="310", distance_threshold=30, n_closest=3, min_overlap_days=1000)

In [10]:
list(result)

['2483', '5610', '480']

In [12]:
gpcc_obj = data_readers.GPCCNetworkReader(path_to_gpcc_dir="../../data/GPCC/", time_res="tw")
res = gpcc_obj.load_network_data(data_paths=[ "../../data/GPCC/tw_310.zip", "../../data/GPCC/tw_1283.zip", "../../data/GPCC/tw_6303.zip"], rain_col="rain_mm")
res = res.sort('time')
res


Data has a inconsistent time step. Data has following time steps: ['1d', '2d', '7d']
Attempting to resample into daily
Data has a inconsistent time step. Data has following time steps: ['1d', '731d', '1462d']
Attempting to resample into daily


time,rain_mm_tw_310,rain_mm_tw_1283,rain_mm_tw_6303
datetime[μs],f64,f64,f64
1941-01-01 07:00:00,,17.9,
1941-01-02 07:00:00,,0.0,
1941-01-03 07:00:00,,0.5,
1941-01-04 07:00:00,,0.0,
1941-01-05 07:00:00,,0.0,
…,…,…,…
2018-12-27 07:00:00,0.0,0.0,0.0
2018-12-28 07:00:00,0.2,0.5,1.0
2018-12-29 07:00:00,6.9,4.4,10.5
2018-12-30 07:00:00,0.9,1.8,4.7


In [13]:
gpcc_obj.metadata.filter(pl.col('station_id').is_in(result))['path'].to_list()

['../../data/GPCC/tw_2483.zip',
 '../../data/GPCC/tw_480.zip',
 '../../data/GPCC/tw_5610.zip']