# Comparison between "Comprehensive upper-air observation network from 1905 to present" and "Insitu IGRA radiosoundings baseline network"

**Contains modified Copernicus Climate Change Service Information 2020**
under [License](https://apps.ecmwf.int/datasets/licences/copernicus/)

*Copernicus Climate Change Service (C3S) - Upper Air Data Service (2020)*

The purpose of this IPython Notebook is to find differences in both mentioned data sets and to compare them. 

    Author: U. Voggenberger
    Date: 02.2021
    Contact: ulrich.voggenberger [at] univie.ac.at
    License: C3S, 2021


In [1]:
import pandas
import numpy as np
import sys, zipfile, os, time
import matplotlib.pyplot as plt
import glob
import datetime
import urllib3
import cdsapi
import xarray
import shutil
import warnings
warnings.filterwarnings('ignore')

In [2]:
import matplotlib.pylab as pylab
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (16, 10),
         'axes.labelsize': 20,
         'axes.titlesize': 24,
         'xtick.labelsize':'medium',
         'ytick.labelsize':'medium'}
pylab.rcParams.update(params)

In [3]:
a=[1,2,3]
a[1:]

[2, 3]

In [8]:
def request(rqdict, source):
    t0 = time.time()

    c = cdsapi.Client()
    r = c.retrieve(
        source,rqdict)
    if True:
        r.download(target='download.zip')
        assert os.stat('download.zip').st_size == r.content_length, "Downloaded file is incomplete"
    z = zipfile.ZipFile('download.zip')
    z.extractall(path='./test/')
    z.close()
    print('Request took: ' + str(time.time() - t0) + ' seconds')
    
    files = glob.glob('./test/*.nc')
    
    if files[0].split('/')[-1].startswith('IGRA'):
        ds = xarray.open_dataset(files[0])            
        data = ds.to_dataframe()
        for i in files[1:]:
            ds = xarray.open_dataset(i)            
            data = data.append(ds.to_dataframe())

    else:
        ds = xarray.open_dataset(files[0]) 
        to_be_removed = ['obs', 'string14', 'trajectory', 'trajectory_index', 'trajectory_label']
        ds = ds.drop_vars(to_be_removed)
        data = ds.to_dataframe()
        for i in files[1:]:
            ds = xarray.open_dataset(i)  
            ds = ds.drop_vars(to_be_removed)
            data = data.append(ds.to_dataframe())
            
    os.remove('download.zip')
    try:
       shutil.rmtree('./test/')
    except:
       print('Error while deleting directory')
    
    return data

---

# Comparing available stations for IGRA (and its harmonized version) and Comprehensive upper-air observation network (CUON)

---

## Loading Data:

The loading of the data shows if there is any differance in the retreival performance.

In [5]:
cuon = 'insitu-comprehensive-upper-air-observation-network'
igra = 'insitu-observations-igra-baseline-network'

# CUON
## Requesting the global data and checking for different stations


In [9]:
da_c = request({
    'variable': 'temperature',
    'date': '20100101-20100102',
}, cuon)

2021-02-03 10:53:39,527 INFO Welcome to the CDS
2021-02-03 10:53:39,529 INFO Sending request to https://sis-dev.climate.copernicus.eu/api/v2/resources/insitu-comprehensive-upper-air-observation-network
2021-02-03 10:53:39,807 INFO Request is completed
2021-02-03 10:53:39,808 INFO Downloading http://136.156.132.176/cache-compute-0000/cache/data1/adaptor.comprehensive_upper_air.retrieve-1612345881.1689107-26950-17-4c455f03-1e90-46e7-bed8-4be5bd43dbb3.zip to download.zip (18.1M)
2021-02-03 10:53:41,202 INFO Download rate 13M/s    


Request took: 2.186800479888916 seconds


In [13]:
da_c.drop_duplicates(['lat', 'lon'])

Unnamed: 0_level_0,lat,lon,plev,ta,time
obs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,47.450001,-111.379997,960.0,213.300003,2010-01-01 00:00:00
0,-17.950001,122.230003,720.0,231.699997,2009-12-31 23:00:00
0,3.200000,113.029999,1000.0,231.500000,2010-01-01 00:00:00
0,27.570000,110.000000,1000.0,222.899994,2010-01-01 11:00:00
0,-29.780001,-57.029999,3690.0,214.300003,2010-01-01 00:00:00
...,...,...,...,...,...
0,26.750000,83.370003,15400.0,205.500000,2010-01-01 01:00:00
0,26.450001,49.820000,760.0,227.300003,2010-01-01 10:00:00
0,28.616699,121.416702,2000.0,221.050003,2010-01-01 00:00:00
0,35.430000,133.330002,15000.0,219.899994,2010-01-01 00:00:00


# IGRA
## Requesting the global data and checking for different stations

In [14]:
da_i = request({
    'source': 'IGRA',
    'variable': ['air_temperature'],
    'period': '2010-01-01/2010-01-02',
}, igra)

2021-02-03 11:14:31,936 INFO Welcome to the CDS
2021-02-03 11:14:31,937 INFO Sending request to https://sis-dev.climate.copernicus.eu/api/v2/resources/insitu-observations-igra-baseline-network
2021-02-03 11:14:32,026 INFO Request is completed
2021-02-03 11:14:32,027 INFO Downloading http://136.156.132.176/cache-compute-0002/cache/data1/adaptor.insitu_reference.retrieve_test-1612344114.8875098-28002-15-1a712893-643c-4a69-b19a-b7176ad87587.zip to download.zip (430.5K)
2021-02-03 11:14:32,274 INFO Download rate 1.7M/s 


Request took: 0.6208710670471191 seconds


In [15]:
da_i

Unnamed: 0_level_0,station_name,report_timestamp,report_id,longitude,latitude,air_pressure,air_temperature
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,PKM00041640,2010-01-02,1117137,74.333298,31.549999,17300.0,208.649994
1,PKM00041640,2010-01-02,1117137,74.333298,31.549999,6760.0,205.850006
2,ZZV0000UFTA,2010-01-02,384115,-144.899994,81.400002,30000.0,219.649994
3,ZZV0000UFTA,2010-01-02,384115,-144.899994,81.400002,100700.0,235.449997
4,PKM00041640,2010-01-02,1117137,74.333298,31.549999,3580.0,214.250000
...,...,...,...,...,...,...,...
50554,RQM00078526,2010-01-02,32556971,-65.991898,18.431700,,
50555,RQM00078526,2010-01-02,32556971,-65.991898,18.431700,,
50556,RQM00078526,2010-01-02,32556971,-65.991898,18.431700,,
50557,RQM00078526,2010-01-02,32556971,-65.991898,18.431700,,


In [16]:
da_i.drop_duplicates(['latitude', 'longitude'])

Unnamed: 0_level_0,station_name,report_timestamp,report_id,longitude,latitude,air_pressure,air_temperature
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,PKM00041640,2010-01-02,1117137,74.333298,31.549999,17300.0,208.649994
2,ZZV0000UFTA,2010-01-02,384115,-144.899994,81.400002,30000.0,219.649994
59,ZZV00ASEU04,2010-01-02,959572,-35.099998,40.000000,20000.0,225.850006
151,ZZV00ASEU02,2010-01-02,1033949,-50.099998,48.099998,70000.0,267.450012
298,ZZV000ASFR1,2010-01-02,1158543,-14.100000,42.400002,92500.0,279.350006
...,...,...,...,...,...,...,...
49801,USM00072201,2010-01-02,32322886,-81.788597,24.553101,58500.0,271.549988
49872,USM00072250,2010-01-02,32408578,-97.419197,25.916700,,
50134,USM00072240,2010-01-02,32453812,-93.216103,30.125299,40752.0,246.449997
50207,USM00072210,2010-01-02,32512135,-82.400597,27.705299,10665.0,203.649994


# IGRA Harmonized
## Requesting the global data and checking for different stations

In [17]:
da_ih = request({
    'source': 'IGRA_H',
    'variable': ['air_temperature'],
    'period': '2010-01-01/2010-01-02',
}, igra)

2021-02-03 11:14:55,740 INFO Welcome to the CDS
2021-02-03 11:14:55,742 INFO Sending request to https://sis-dev.climate.copernicus.eu/api/v2/resources/insitu-observations-igra-baseline-network
2021-02-03 11:14:55,810 INFO Request is completed
2021-02-03 11:14:55,811 INFO Downloading http://136.156.132.176/cache-compute-0002/cache/data2/adaptor.insitu_reference.retrieve_test-1612344274.9767025-27778-16-d56c5ace-367d-454b-a9f5-6763dead8d1d.zip to download.zip (338.4K)
2021-02-03 11:14:55,997 INFO Download rate 1.8M/s 


Request took: 0.31320905685424805 seconds


In [18]:
da_ih

Unnamed: 0_level_0,station_name,report_timestamp,actual_time,report_id,longitude,latitude,height_of_station_above_sea_level,air_pressure,air_temperature
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,SFM00068816,2010-01-02,2010-01-02 00:00:00+00,13061350,18.600000,-33.970001,46.000000,87600,
1,SFM00068816,2010-01-02,2010-01-02 00:00:00+00,13061350,18.600000,-33.970001,46.000000,78200,
2,SFM00068816,2010-01-02,2010-01-02 00:00:00+00,13061350,18.600000,-33.970001,46.000000,52300,
3,SFM00068816,2010-01-02,2010-01-02 00:00:00+00,13061350,18.600000,-33.970001,46.000000,20000,
4,SFM00068816,2010-01-02,2010-01-02 00:00:00+00,13061350,18.600000,-33.970001,46.000000,3870,
...,...,...,...,...,...,...,...,...,...
21642,ZZV0000UFTA,2010-01-02,2010-01-01 23:31:00+00,17931,-998.888977,-98.888802,-998.799988,53300,
21643,ZZV0000UFTA,2010-01-02,2010-01-01 23:31:00+00,17931,-998.888977,-98.888802,-998.799988,97100,
21644,ZZV0000UFTA,2010-01-02,2010-01-01 23:31:00+00,17931,-998.888977,-98.888802,-998.799988,41900,
21645,ZZV0000UFTA,2010-01-02,2010-01-01 23:31:00+00,17931,-998.888977,-98.888802,-998.799988,50000,


In [19]:
da_ih.drop_duplicates(['latitude', 'longitude'])

Unnamed: 0_level_0,station_name,report_timestamp,actual_time,report_id,longitude,latitude,height_of_station_above_sea_level,air_pressure,air_temperature
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,SFM00068816,2010-01-02,2010-01-02 00:00:00+00,13061350,18.600000,-33.970001,46.000000,87600,
60,SFM00068994,2010-01-02,2010-01-02 00:00:00+00,13037665,37.866901,-46.883099,24.000000,45600,
99,INM00043279,2010-01-02,2010-01-01 23:04:00+00,13003436,80.183296,13.000000,13.700000,89400,
148,INM00043371,2010-01-02,2010-01-01 23:03:00+00,12965742,76.949997,8.483300,59.900002,100000,
196,INM00043003,2010-01-02,2010-01-02 00:30:00+00,12925104,72.849998,19.116699,14.200000,40000,
...,...,...,...,...,...,...,...,...,...
21338,INM00043041,2010-01-02,2010-01-02 00:00:00+00,259089,82.033302,19.083300,554.000000,72100,284.750000
21364,INM00042701,2010-01-02,2010-01-02 00:00:00+00,243501,85.316704,23.316700,646.000000,61100,277.149994
21401,INM00043353,2010-01-02,2010-01-01 23:26:00+00,238129,76.233299,9.933300,1.000000,50000,
21422,INM00042874,2010-01-02,2010-01-02 00:00:00+00,192464,81.650002,21.233299,296.000000,96300,285.950012
