# risicolive_QC interactive notebook

In [1]:
import risicolive_QC as qc
import pandas as pd

---
## **Settings**
All DEFAUL settings are placed in the **config.json** file:
1. *INFO*: contains information on time step, unique identifier for the single station (*KEY_STATION*) and variables;
2. *TEST*: contains information for tests.
    - *WINDOW*: sliding window
    - *VARS_CHECK*: variables checked for the complete_test
    - *VARS_CONS*: variables checked for the consistency_test
    - *RANGES*: ranges checked for the range_test
    - *STEPS*: steps checked for the step_test
    - *VARIATIONS*: variations checked for the time_persistence_test. These variations are controlled in a specific range of values

These information can be accessed by DEFAULT. The user can also modified these info, following the DEFAULT structure (see examples below).

In [2]:
qc.DEFAULT['INFO']

{'DT': '10min',
 'KEY_STATION': 'station_id',
 'VARS': {'t': {'info': 'temperature', 'um': 'C'},
  'h': {'info': 'humidity', 'um': '.'},
  'p': {'info': 'precipitation', 'um': 'mm/dt'},
  'ws': {'info': 'wind speed', 'um': 'm/s'},
  'wd': {'info': 'wind direction', 'um': 'deg'}}}

In [3]:
qc.DEFAULT['TEST']

{'WINDOW': 3,
 'VARS_CHECK': ['t', 'h', 'p', 'ws', 'wd'],
 'VARS_CONS': ['ws', 'wd'],
 'RANGES': {'t': [-30, 50],
  'h': [0, 100],
  'p': [0, 400],
  'ws': [0, 75],
  'wd': [0, 360]},
 'STEPS': {'t': 2, 'h': 10, 'ws': 10},
 'VARIATIONS': {'t': [0.01, -30, 50],
  'h': [0.01, 0, 95],
  'ws': [0.01, 0.1, 75]}}

---
## **Example**

In [4]:
## Data for all stations of RISICO Live, for a single hour
df_allStations = pd.read_csv('./test/test.csv')
df_allStations

Unnamed: 0,station_id,date,t,h,p,ws,wd
0,1,2017-06-01 00:00:00+00:00,16.4000,63.0000,0.0,1.20000,157.0
1,1,2017-06-01 00:10:00+00:00,16.6000,59.0000,0.0,1.50000,149.0
2,1,2017-06-01 00:20:00+00:00,16.6000,59.0000,0.0,0.80000,88.0
3,1,2017-06-01 00:30:00+00:00,16.9000,59.0000,0.0,1.60000,358.0
4,1,2017-06-01 00:40:00+00:00,17.0000,59.0000,0.0,0.60000,27.0
...,...,...,...,...,...,...,...
3679,614,2017-06-01 00:10:00+00:00,20.7833,86.3333,,1.16667,31.0
3680,614,2017-06-01 00:20:00+00:00,20.7667,86.6667,,1.33333,31.0
3681,614,2017-06-01 00:30:00+00:00,20.7500,87.0000,,1.50000,31.0
3682,614,2017-06-01 00:40:00+00:00,20.7333,87.3333,,1.66667,31.0


In [7]:
## TEST ON A SINGLE STATION: DEFAULT CONFIGURATION
# get data for a single station
df_station = df_allStations[df_allStations[qc.DEFAULT['INFO']['KEY_STATION']]==46]
# perform tests on single station
df_station_checked = qc.check_stations(df_station)
df_station_checked

Unnamed: 0,station_id,date,t,h,p,ws,wd,QC,QC_label
270,46,2017-06-01 00:00:00+00:00,10.9,,0.0,0.0,0.0,,none
271,46,2017-06-01 00:10:00+00:00,10.9333,,,0.0,0.0,,none
272,46,2017-06-01 00:20:00+00:00,10.9667,,0.0,0.0,0.0,0.0,incomplete
273,46,2017-06-01 00:30:00+00:00,11.0,,0.0,0.0,0.0,0.0,incomplete
274,46,2017-06-01 00:40:00+00:00,10.9333,,0.0,0.0,0.0,0.0,incomplete
275,46,2017-06-01 00:50:00+00:00,10.8667,,0.0,0.0,0.0,0.0,incomplete


In [8]:
# TEST ON A SINGLE STATION: CHANGE CONFIG
# Some info are specified by user (e.g. variables checked by complete_test, ranges checked by range_test). 
# The missing info are added by default configuration
config_new = {'VARS_CHECK':['t'], 'RANGES':{'t':[0, 50]}, 'VARIATIONS':{'t':[1,-10,50]}}
df_station_checked = qc.check_stations(df_station, config=config_new)
df_station_checked

Unnamed: 0,station_id,date,t,h,p,ws,wd,QC,QC_label
270,46,2017-06-01 00:00:00+00:00,10.9,,0.0,0.0,0.0,,none
271,46,2017-06-01 00:10:00+00:00,10.9333,,,0.0,0.0,,none
272,46,2017-06-01 00:20:00+00:00,10.9667,,0.0,0.0,0.0,4.0,suspicious
273,46,2017-06-01 00:30:00+00:00,11.0,,0.0,0.0,0.0,4.0,suspicious
274,46,2017-06-01 00:40:00+00:00,10.9333,,0.0,0.0,0.0,4.0,suspicious
275,46,2017-06-01 00:50:00+00:00,10.8667,,0.0,0.0,0.0,4.0,suspicious


In [9]:
# TEST ON ALL STATIONS
# if the dataframe contains more than one station, the tests are applied to each station
df_allStations_checked = qc.check_stations(df_allStations)
df_allStations_checked

Unnamed: 0,station_id,date,t,h,p,ws,wd,QC,QC_label
0,1,2017-06-01 00:00:00+00:00,16.4000,63.0000,0.0,1.20000,157.0,,none
1,1,2017-06-01 00:10:00+00:00,16.6000,59.0000,0.0,1.50000,149.0,,none
2,1,2017-06-01 00:20:00+00:00,16.6000,59.0000,0.0,0.80000,88.0,5.0,good
3,1,2017-06-01 00:30:00+00:00,16.9000,59.0000,0.0,1.60000,358.0,4.0,suspicious
4,1,2017-06-01 00:40:00+00:00,17.0000,59.0000,0.0,0.60000,27.0,4.0,suspicious
...,...,...,...,...,...,...,...,...,...
3679,614,2017-06-01 00:10:00+00:00,20.7833,86.3333,,1.16667,31.0,,none
3680,614,2017-06-01 00:20:00+00:00,20.7667,86.6667,,1.33333,31.0,0.0,incomplete
3681,614,2017-06-01 00:30:00+00:00,20.7500,87.0000,,1.50000,31.0,0.0,incomplete
3682,614,2017-06-01 00:40:00+00:00,20.7333,87.3333,,1.66667,31.0,0.0,incomplete
