# Merge Metadata

This script can be used to build the final Metadata file.

There are several notes that are important:

* Some of the location files have been processed externally and the workflow needs to be explained here
* We should note done all CRS transformations applied for reference

In [2]:
import os
import json
import pandas as pd
import numpy as np
from tqdm import tqdm
from pyproj.transformer import Transformer

from camelsp import Bundesland, Station, util

As an example: The `Bundesland` context manager can load the metadata for the given Bundesland only from the full metadata table. If this table does not yet exist, it is created from the NUTSID mapping table. Check out for Saarland:

In [2]:
with Bundesland('DEC') as bl:
    dec_meta = bl.metadata

dec_meta

Unnamed: 0,camels_id,provider_id,camels_path,nuts_lvl2,federal_state,gauge_name,waterbody_name,gauge_elevation,area,x,y,lon,lat,q_count,w_count,q_w_pearson,q_w_spearman
876,DEC10000,1271120,./DEC/DEC10000/DEC10000_data.csv,DEC,Saarland,Schieferstollen,Wadrill,344.48,44.2,4095083.0,2947311.0,6.874568,49.595319,22707.0,22707.0,0.92504,0.982478
877,DEC10010,1122120,./DEC/DEC10010/DEC10010_data.csv,DEC,Saarland,Geislautern,Rossel,184.38,203.0,4090150.0,2907085.0,6.829978,49.232039,21611.0,21611.0,0.92744,0.897286
878,DEC10020,1251120,./DEC/DEC10020/DEC10020_data.csv,DEC,Saarland,Gonnesweiler,Bos,372.4,12.5,4110659.0,2943239.0,7.09203,49.564482,6909.0,6909.0,0.924174,0.997792
879,DEC10030,1102220,./DEC/DEC10030/DEC10030_data.csv,DEC,Saarland,Niedaltdorf,Nied,183.38,1337.0,4073444.0,2920102.0,6.592731,49.342264,18689.0,18689.0,0.977621,0.95021
880,DEC10040,1051110,./DEC/DEC10040/DEC10040_data.csv,DEC,Saarland,Blieskastel,Blies,213.4,1716.0,4121641.0,2906316.0,7.262403,49.236502,24319.0,24319.0,0.976258,0.990419
881,DEC10050,1062220,./DEC/DEC10050/DEC10050_data.csv,DEC,Saarland,Reinheim,Blies,202.0,1798.0,4115310.0,2895236.0,7.181266,49.13478,23802.0,23802.0,0.95715,0.888071
882,DEC10060,1113120,./DEC/DEC10060/DEC10060_data.csv,DEC,Saarland,Überherrn,Bist,195.5,120.4,4080580.0,2909471.0,6.697274,49.249695,20303.0,20303.0,0.836683,0.77462
883,DEC10070,1641120,./DEC/DEC10070/DEC10070_data.csv,DEC,Saarland,Ihn,Ihner Bach,199.32,44.51,4074238.0,2918235.0,6.604804,49.325827,2802.0,2802.0,0.916261,0.998477
884,DEC10080,1401120,./DEC/DEC10080/DEC10080_data.csv,DEC,Saarland,Nonnweiler II,Altbach (Talsperre),454.86,16.2,4104442.0,2951172.0,7.001765,49.633516,14671.0,14671.0,0.893714,0.995034
885,DEC10090,1381120,./DEC/DEC10090/DEC10090_data.csv,DEC,Saarland,Weiler,Kohlenbrucher Bach,181.83,9.87,4072663.0,2933279.0,6.573683,49.460292,17593.0,17593.0,0.9554,0.95525


## Generate basic metadata

This step will produce one metadata file containing all processed data, which can be used as NUTS lookup and as a basis to add more specific metadata.
The first step also loads the Location files and merges everything

### Add Pegelname, Gewässername and elevation

add the above fields to the merged metadata, if we have the information from the Landesämter.

In [3]:
# lookup dictionary for column names in raw_metadata
_META_DICT = {
    'DE1': {'provider_id': 'Messstellennummer', 'gauge_name': 'Standort', 'waterbody_name': 'Gewässer', 'gauge_elevation': 'Pegelnullpunkt (PNP) in m'},
    'DE2': {'provider_id': 'Stationsnummer', 'gauge_name': 'Stationsname', 'waterbody_name': 'Gewässer (Name|Nummer)', 'gauge_elevation': 'PNP'},
    'DE4': {'provider_id': 'Messstellennummer', 'gauge_name': 'Pegelname', 'waterbody_name': 'Gewaesser', 'gauge_elevation': 'PNP_Höhe'},
    'DE7': {'provider_id': 'Messstellen Nr.', 'gauge_name': 'Pegelname', 'waterbody_name': 'Gewässer', 'gauge_elevation': 'Höhe              [m ü. NN]'},
    'DE8': {'provider_id': 'pegelkennzahl', 'gauge_name': 'bezeichnung', 'waterbody_name': 'gewaesser', 'gauge_elevation': 'pnp'},
    'DE9': {'provider_id': 'MESSSTELLE_NR', 'gauge_name': 'LANGNAME', 'waterbody_name': np.nan, 'gauge_elevation': np.nan},
    'DEA': {'provider_id': 'Stationsnummer', 'gauge_name': 'Station', 'waterbody_name': 'Gewässer', 'gauge_elevation': 'NULLPUNKT'},
    'DEB': {'provider_id': 'Nummer', 'gauge_name': 'Stationsname', 'waterbody_name': 'Gewässer', 'gauge_elevation': 'PNP'},
    'DEC': {'provider_id': 'MSTNR', 'gauge_name': 'Pegelname_', 'waterbody_name': 'Gewässer', 'gauge_elevation': 'PNP'},
    'DED': {'provider_id': 'Pegelkennziffer', 'gauge_name': 'Pegelname', 'waterbody_name': 'Gewaesser', 'gauge_elevation': np.nan},
    'DEE': {'provider_id': 'SANR', 'gauge_name': 'SNAME', 'waterbody_name': 'SWATER', 'gauge_elevation': np.nan},
    'DEF': {'provider_id': 'id', 'gauge_name': 'gauge', 'waterbody_name': 'river', 'gauge_elevation': np.nan},
    'DEG': {'provider_id': 'Pegelnr', 'gauge_name': 'Pegelname', 'waterbody_name': 'Gewässer', 'gauge_elevation': 'PNP'},
}

for NUTS in tqdm(util._NUTS_LVL2_NAMES.keys()):
    with Bundesland(NUTS) as bl:
        try:
            # read raw metadata for bl
            p = os.path.join(bl.base_path, 'raw_metadata', f"{bl.NUTS}_raw_metadata.csv")
            df = pd.read_csv(p)

            # get relevant metadata columns from lookup dict
            cols = list(_META_DICT[NUTS].values())
            # drop nan from cols
            cols = [col for col in cols if not pd.isna(col)]

            # select relevant columns
            df = df[cols]
            
            # rename columns
            df = df.rename(columns=dict((v,k) for k,v in _META_DICT[NUTS].items()))

            # make provider_id a string
            df['provider_id'] = df['provider_id'].astype(str)

            # transform gauge_elevation to float
            if 'gauge_elevation' in df.columns:
                # make sure that gauge_elevation is a string
                df['gauge_elevation'] = df['gauge_elevation'].astype(str)
                
                # replace comma with dot
                df['gauge_elevation'] = df['gauge_elevation'].str.replace(',', '.')

                # remove all non numeric characters
                df['gauge_elevation'] = df['gauge_elevation'].str.extract('([\d.]+)').astype(float)

                # transform to float
                df['gauge_elevation'] = df['gauge_elevation'].astype(float)
        except FileNotFoundError:
            continue
        # update metadata
        bl.update_metadata(df, id_column='provider_id')

util.get_metadata()

100%|██████████| 16/16 [00:00<00:00, 23.63it/s]


Unnamed: 0,provider_id,camels_id,camels_path,nuts_lvl2,federal_state,gauge_name,waterbody_name,gauge_elevation,area,x,y,lon,lat,q_count,w_count,q_w_pearson,q_w_spearman
0,573000,DEG10000,./DEG/DEG10000/DEG10000_data.csv,DEG,Thüringen,Ammern,Unstrut,210.243,182.70,4.352221e+06,3.124617e+06,10.446993,51.231727,29646.0,29646.0,0.969240,0.976895
1,447000,DEG10010,./DEG/DEG10010/DEG10010_data.csv,DEG,Thüringen,Arenshausen,Leine,196.288,275.00,4.318941e+06,3.140875e+06,9.970428,51.378709,22707.0,22707.0,0.709148,0.685209
2,574200,DEG10020,./DEG/DEG10020/DEG10020_data.csv,DEG,Thüringen,Arnstadt,Gera,293.577,174.70,4.386764e+06,3.077926e+06,10.933022,50.809106,35490.0,35490.0,0.958767,0.962387
3,576500,DEG10030,./DEG/DEG10030/DEG10030_data.csv,DEG,Thüringen,Berga,Weiße Elster,218.995,1383.00,4.473276e+06,3.073272e+06,12.157989,50.750857,12845.0,12845.0,0.502141,0.553354
4,570210,DEG10040,./DEG/DEG10040/DEG10040_data.csv,DEG,Thüringen,Blankenstein-Rosenthal,Saale,410.517,1013.00,4.442190e+06,3.033884e+06,11.704738,50.404273,21246.0,21246.0,0.940139,0.977699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2901,5934145,DE912890,./DE9/DE912890/DE912890_data.csv,DE9,Niedersachsen,Jeetzel UW,,,,,,,,0.0,18689.0,,
2902,5985101,DE912900,./DE9/DE912900/DE912900_data.csv,DE9,Niedersachsen,Bremervörde,,,,,,,,0.0,9923.0,,
2903,5986107,DE912910,./DE9/DE912910/DE912910_data.csv,DE9,Niedersachsen,Hollen,,,,,,,,0.0,12845.0,,
2904,9286164,DE912920,./DE9/DE912920/DE912920_data.csv,DE9,Niedersachsen,Laar,,,1749.57,4.100203e+06,3.283170e+06,6.739143,52.613090,0.0,19450.0,,


### Add location

In [4]:
for NUTS in tqdm(util._NUTS_LVL2_NAMES.keys()):
    with Bundesland(NUTS) as bl:
        try:
            p = os.path.join(bl.base_path, 'locations', f'{bl.NUTS}_Locations.csv')
            # read in 
            df = pd.read_csv(p, dtype={'ID': str})
            df.columns = ['provider_id', 'area', 'x', 'y']
            #df.drop_duplicates(inplace=True)
        except FileNotFoundError:
            continue
        
        # update by simply setting the new metadata to the property setter
        # in this case, the joining column needs to be 'camels_id' or 'provider_id'
        #bl.metadata = df

        # or use the function if you prefer
        bl.update_metadata(df, id_column='provider_id')

metadata = util.get_metadata()
metadata[metadata['nuts_lvl2'] == 'DE9']

100%|██████████| 16/16 [00:00<00:00, 29.02it/s]


Unnamed: 0,provider_id,camels_id,camels_path,nuts_lvl2,federal_state,gauge_name,waterbody_name,gauge_elevation,area,x,y,lon,lat,q_count,w_count,q_w_pearson,q_w_spearman
2612,3183101,DE910000,./DE9/DE910000/DE910000_data.csv,DE9,Niedersachsen,Sudendorf,,,121.560123,4.183280e+06,3.217135e+06,7.993218,52.048457,12845.0,12845.0,,
2613,3346103,DE910010,./DE9/DE910010/DE910010_data.csv,DE9,Niedersachsen,Schwege,,,47.371025,4.178458e+06,3.218442e+06,7.922396,52.058975,12845.0,12845.0,,
2614,3437108,DE910020,./DE9/DE910020/DE910020_data.csv,DE9,Niedersachsen,Beesten,,,407.147698,4.150721e+06,3.261657e+06,7.496412,52.439334,5966.0,5966.0,,
2615,3445100,DE910030,./DE9/DE910030/DE910030_data.csv,DE9,Niedersachsen,Spelle,,,,inf,inf,,,6789.0,6789.0,,
2616,3449100,DE910040,./DE9/DE910040/DE910040_data.csv,DE9,Niedersachsen,Spelle,,,149.735031,4.155177e+06,3.254250e+06,7.565579,52.374162,4597.0,4597.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2901,5934145,DE912890,./DE9/DE912890/DE912890_data.csv,DE9,Niedersachsen,Jeetzel UW,,,,inf,inf,,,0.0,18689.0,,
2902,5985101,DE912900,./DE9/DE912900/DE912900_data.csv,DE9,Niedersachsen,Bremervörde,,,,inf,inf,,,0.0,9923.0,,
2903,5986107,DE912910,./DE9/DE912910/DE912910_data.csv,DE9,Niedersachsen,Hollen,,,,inf,inf,,,0.0,12845.0,,
2904,9286164,DE912920,./DE9/DE912920/DE912920_data.csv,DE9,Niedersachsen,Laar,,,1749.573210,4.100147e+06,3.283012e+06,6.739143,52.613090,0.0,19450.0,,


## Add WGS84 coordinates

In [5]:
# create a transformer
transformer = Transformer.from_crs("EPSG:3035", "EPSG:4326", always_xy=True)

# transform
lon, lat = transformer.transform(metadata.x.values, metadata.y.values)

# add back
updates = pd.DataFrame({'camels_id': metadata.camels_id, 'lon': lon, 'lat': lat})
util.update_metadata(new_metadata=updates)

metadata = util.get_metadata()
metadata[metadata['nuts_lvl2'] == 'DE9']

Unnamed: 0,camels_id,provider_id,camels_path,nuts_lvl2,federal_state,gauge_name,waterbody_name,gauge_elevation,area,x,y,lon,lat,q_count,w_count,q_w_pearson,q_w_spearman
2612,DE910000,3183101,./DE9/DE910000/DE910000_data.csv,DE9,Niedersachsen,Sudendorf,,,121.560123,4.183280e+06,3.217135e+06,7.992302,52.047079,12845.0,12845.0,,
2613,DE910010,3346103,./DE9/DE910010/DE910010_data.csv,DE9,Niedersachsen,Schwege,,,47.371025,4.178458e+06,3.218442e+06,7.921498,52.057597,12845.0,12845.0,,
2614,DE910020,3437108,./DE9/DE910020/DE910020_data.csv,DE9,Niedersachsen,Beesten,,,407.147698,4.150721e+06,3.261657e+06,7.495577,52.437915,5966.0,5966.0,,
2615,DE910030,3445100,./DE9/DE910030/DE910030_data.csv,DE9,Niedersachsen,Spelle,,,,inf,inf,inf,inf,6789.0,6789.0,,
2616,DE910040,3449100,./DE9/DE910040/DE910040_data.csv,DE9,Niedersachsen,Spelle,,,149.735031,4.155177e+06,3.254250e+06,7.564736,52.372753,4597.0,4597.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2901,DE912890,5934145,./DE9/DE912890/DE912890_data.csv,DE9,Niedersachsen,Jeetzel UW,,,,inf,inf,inf,inf,0.0,18689.0,,
2902,DE912900,5985101,./DE9/DE912900/DE912900_data.csv,DE9,Niedersachsen,Bremervörde,,,,inf,inf,inf,inf,0.0,9923.0,,
2903,DE912910,5986107,./DE9/DE912910/DE912910_data.csv,DE9,Niedersachsen,Hollen,,,,inf,inf,inf,inf,0.0,12845.0,,
2904,DE912920,9286164,./DE9/DE912920/DE912920_data.csv,DE9,Niedersachsen,Laar,,,1749.573210,4.100147e+06,3.283012e+06,6.738424,52.611652,0.0,19450.0,,


## Count existing data

Go for each file and count the available data. Add anything that makes it necessary to read every single data file into the for-loop below.

Checking columns:

* `'q'` 
* `'w'`

In [6]:
for NUTS in util._NUTS_LVL2_NAMES.keys():
    # empty container for this BL
    count_q = []
    count_w = []
    
    # process this federal state
    with Bundesland(NUTS) as bl:
        # get meta
        meta = bl.metadata

        # go for each id
        for camels_id in tqdm(meta.camels_id.values, desc=NUTS):
            # load the data
            try:
                df = bl.get_data(camels_id)
            except FileNotFoundError:
                count_q.append(0)
                count_w.append(0)
                continue
    
            # check q
            if 'q' in df.columns.values:
                count_q.append((~df.q.isna()).count())
            else:
                count_q.append(0)

            # check w
            if 'w' in df.columns.values:
                count_w.append((~df.w.isna()).count())
            else:
                count_w.append(0)

        # build the new metadata
        counts = pd.DataFrame({'camels_id': meta.camels_id.values, 'q_count': np.asarray(count_q, dtype=int), 'w_count': np.asarray(count_w, dtype=int)})

        # add to metadata
        bl.update_metadata(counts)

metadata = util.get_metadata()
metadata[metadata['nuts_lvl2'] == 'DE9']

DE1:   0%|          | 0/252 [00:00<?, ?it/s]

DE1: 100%|██████████| 252/252 [00:06<00:00, 41.17it/s]
DE2: 100%|██████████| 535/535 [00:17<00:00, 30.59it/s]
DE3: 0it [00:00, ?it/s]
DE4: 100%|██████████| 233/233 [00:06<00:00, 38.66it/s]
DE5: 0it [00:00, ?it/s]
DE6: 0it [00:00, ?it/s]
DE7: 100%|██████████| 97/97 [00:02<00:00, 38.03it/s]
DE8: 100%|██████████| 230/230 [00:03<00:00, 73.95it/s]
DE9: 100%|██████████| 294/294 [00:05<00:00, 57.41it/s]
DEA: 100%|██████████| 219/219 [00:04<00:00, 49.75it/s]
DEB: 100%|██████████| 124/124 [00:02<00:00, 53.67it/s]
DEC: 100%|██████████| 46/46 [00:00<00:00, 58.88it/s]
DED: 100%|██████████| 178/178 [00:02<00:00, 65.59it/s]
DEE: 100%|██████████| 126/126 [00:02<00:00, 46.20it/s]
DEF: 100%|██████████| 509/509 [00:06<00:00, 81.22it/s]
DEG: 100%|██████████| 63/63 [00:01<00:00, 32.38it/s]


Unnamed: 0,camels_id,provider_id,camels_path,nuts_lvl2,federal_state,gauge_name,waterbody_name,gauge_elevation,area,x,y,lon,lat,q_count,w_count,q_w_pearson,q_w_spearman
2612,DE910000,3183101,./DE9/DE910000/DE910000_data.csv,DE9,Niedersachsen,Sudendorf,,,121.560123,4.183280e+06,3.217135e+06,7.992302,52.047079,12845.0,12845.0,,
2613,DE910010,3346103,./DE9/DE910010/DE910010_data.csv,DE9,Niedersachsen,Schwege,,,47.371025,4.178458e+06,3.218442e+06,7.921498,52.057597,12845.0,12845.0,,
2614,DE910020,3437108,./DE9/DE910020/DE910020_data.csv,DE9,Niedersachsen,Beesten,,,407.147698,4.150721e+06,3.261657e+06,7.495577,52.437915,5966.0,5966.0,,
2615,DE910030,3445100,./DE9/DE910030/DE910030_data.csv,DE9,Niedersachsen,Spelle,,,,inf,inf,inf,inf,6789.0,6789.0,,
2616,DE910040,3449100,./DE9/DE910040/DE910040_data.csv,DE9,Niedersachsen,Spelle,,,149.735031,4.155177e+06,3.254250e+06,7.564736,52.372753,4597.0,4597.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2901,DE912890,5934145,./DE9/DE912890/DE912890_data.csv,DE9,Niedersachsen,Jeetzel UW,,,,inf,inf,inf,inf,0.0,18689.0,,
2902,DE912900,5985101,./DE9/DE912900/DE912900_data.csv,DE9,Niedersachsen,Bremervörde,,,,inf,inf,inf,inf,0.0,9923.0,,
2903,DE912910,5986107,./DE9/DE912910/DE912910_data.csv,DE9,Niedersachsen,Hollen,,,,inf,inf,inf,inf,0.0,12845.0,,
2904,DE912920,9286164,./DE9/DE912920/DE912920_data.csv,DE9,Niedersachsen,Laar,,,1749.573210,4.100147e+06,3.283012e+06,6.738424,52.611652,0.0,19450.0,,


## Add extent of available Q and W data in years

In [13]:
# get metadata
meta = util.get_metadata()

# get camels_ids
camels_ids = meta['camels_id'].values

for id in tqdm(camels_ids):
    # init Station
    s = Station(id)

    # get the data
    df = s.get_data()

    # get extent of date index of q and w
    if 'q' in df.columns:
        q_start, q_end = df['q'].dropna().index.min(), df['q'].dropna().index.max()
        
        # compute extent in years
        q_extent = (q_end - q_start).days / 365
        
        # add to metadata
        meta.loc[meta.camels_id == id, 'q_extent_years'] = q_extent
    else:
        meta.loc[meta.camels_id == id, 'q_extent_years'] = np.nan
    
    if 'w' in df.columns:
        w_start, w_end = df['w'].dropna().index.min(), df['w'].dropna().index.max()
        
        # compute extent in years
        w_extent = (w_end - w_start).days / 365
        
        # add to metadata
        meta.loc[meta.camels_id == id, 'w_extent_years'] = w_extent
    else:
        meta.loc[meta.camels_id == id, 'w_extent_years'] = np.nan

# save metadata
meta.to_csv(os.path.join(util.get_output_path(), 'metadata', 'metadata.csv'), index=False)

util.get_metadata().head()

  0%|          | 0/2870 [00:00<?, ?it/s]

100%|██████████| 2870/2870 [01:13<00:00, 39.30it/s]


Unnamed: 0,camels_id,provider_id,camels_path,nuts_lvl2,federal_state,gauge_name,waterbody_name,gauge_elevation,area,x,y,lon,lat,q_count,w_count,q_w_pearson,q_w_spearman,q_extent_years,w_extent_years
0,DEG10000,573000,./DEG/DEG10000/DEG10000_data.csv,DEG,Thüringen,Ammern,Unstrut,210.243,182.7,4352221.0,3124617.0,10.446993,51.231727,29646.0,29646.0,0.96924,0.976895,81.219178,32.186301
1,DEG10010,447000,./DEG/DEG10010/DEG10010_data.csv,DEG,Thüringen,Arenshausen,Leine,196.288,275.0,4318941.0,3140875.0,9.970428,51.378709,22707.0,22707.0,0.709148,0.685209,62.208219,59.876712
2,DEG10020,574200,./DEG/DEG10020/DEG10020_data.csv,DEG,Thüringen,Arnstadt,Gera,293.577,174.7,4386764.0,3077926.0,10.933022,50.809106,35490.0,35490.0,0.958767,0.962387,97.230137,32.186301
3,DEG10030,576500,./DEG/DEG10030/DEG10030_data.csv,DEG,Thüringen,Berga,Weiße Elster,218.995,1383.0,4473276.0,3073272.0,12.157989,50.750857,12845.0,12845.0,0.502141,0.553354,31.186301,35.189041
4,DEG10040,570210,./DEG/DEG10040/DEG10040_data.csv,DEG,Thüringen,Blankenstein-Rosenthal,Saale,410.517,1013.0,4442190.0,3033884.0,11.704738,50.404273,21246.0,21246.0,0.940139,0.977699,58.205479,52.032877


In [7]:
from glob import glob

# empty_data = []
# empty_files = []

for NUTS in util._NUTS_LVL2_NAMES.keys():

    bl = Bundesland(NUTS)

    print(NUTS)

#     # empty data
#     for id in metadata['camels_id'].values:
#         try:
#             df = bl.get_data(id)
#             if len(df) == 0:
#                 empty_data.append(id)
#         except FileNotFoundError:
#             empty_files.append(id)

    print(f"{len(bl.metadata[(bl.metadata['q_count'] == 0) & (bl.metadata['w_count'] == 0)])}")

    # empty folders
    all_folders = glob(f"{bl.base_path}/{NUTS}/*")

    print("Empty folders:")
    print(f"{len([folder for folder in all_folders if len(os.listdir(folder)) == 0])}\n")

DE1
0
Empty folders:
0

DE2
0
Empty folders:
0

DE3
0
Empty folders:
0

DE4
0
Empty folders:
0

DE5
0
Empty folders:
0

DE6
0
Empty folders:
0

DE7
0
Empty folders:
0

DE8
0
Empty folders:
0

DE9
0
Empty folders:
0

DEA
0
Empty folders:
0

DEB
0
Empty folders:
0

DEC
0
Empty folders:
0

DED
0
Empty folders:
0

DEE
0
Empty folders:
0

DEF
0
Empty folders:
0

DEG
0
Empty folders:
0



In [32]:
# metadata where lon or lat is inf
ids_meta = metadata[(metadata['lon'] == np.inf) | (metadata['lat'] == np.inf)]
ids_meta

Unnamed: 0,camels_id,provider_id,camels_path,nuts_lvl2,federal_state,gauge_name,waterbody_name,gauge_elevation,area,x,y,lon,lat,q_count,w_count,q_w_pearson,q_w_spearman
69,DEF10060,114300,./DEF/DEF10060/DEF10060_data.csv,DEF,Schleswig-Holstein,Ahrensburg,Aue,,-999.0,inf,inf,inf,inf,699.0,699.0,0.849881,0.974858
481,DEF14180,114254,./DEF/DEF14180/DEF14180_data.csv,DEF,Schleswig-Holstein,Sörup,Bönstrupau,,-999.0,inf,inf,inf,inf,577.0,577.0,0.91527,0.941727
2615,DE910030,3445100,./DE9/DE910030/DE910030_data.csv,DE9,Niedersachsen,Spelle,,,,inf,inf,inf,inf,6789.0,6789.0,,
2618,DE910060,3547104,./DE9/DE910060/DE910060_data.csv,DE9,Niedersachsen,Lingen Parkstraáe,,,,inf,inf,inf,inf,12298.0,12298.0,,
2622,DE910100,3613185,./DE9/DE910100/DE910100_data.csv,DE9,Niedersachsen,Schimm,,,,inf,inf,inf,inf,1430.0,1430.0,,
2640,DE910280,3658105,./DE9/DE910280/DE910280_data.csv,DE9,Niedersachsen,Lodbergen,,,,inf,inf,inf,inf,11233.0,11233.0,,
2667,DE910550,3881114,./DE9/DE910550/DE910550_data.csv,DE9,Niedersachsen,Thülsfeld,,,,inf,inf,inf,inf,4809.0,4809.0,,
2686,DE910740,4661185,./DE9/DE910740/DE910740_data.csv,DE9,Niedersachsen,Gesmold,,,,inf,inf,inf,inf,1461.0,1461.0,,
2688,DE910760,4665103,./DE9/DE910760/DE910760_data.csv,DE9,Niedersachsen,Bruchmühlen,,,,inf,inf,inf,inf,8462.0,8462.0,,
2705,DE910930,4821120,./DE9/DE910930/DE910930_data.csv,DE9,Niedersachsen,Probsteiburg,,,,inf,inf,inf,inf,7366.0,7366.0,,
