# risicolive_QC interactive notebook

In [1]:
import risicolive_QC as qc
import pandas as pd
import numpy as np

---
## **Settings**
All DEFAUL settings are placed in the **config.json** file:
1. *INFO*: contains information on time step, unique identifier for the single station (*KEY_STATION*) and variables;
2. *TEST*: contains information for tests.
    - *WINDOW*: sliding window
    - *VARS_CHECK*: variables checked for the complete_test
    - *VARS_CONS*: variables checked for the consistency_test
    - *RANGES*: ranges checked for the range_test
    - *STEPS*: steps checked for the step_test
    - *VARIATIONS*: variations checked for the time_persistence_test. These variations are controlled in a specific range of values

These information can be accessed by DEFAULT. The user can also modified these info, following the DEFAULT structure (see examples below).

In [2]:
qc.INFO

{'DT': '10min',
 'KEY_STATION': 'station_id',
 'VARS': {'t': {'info': 'temperature', 'um': 'C'},
  'h': {'info': 'humidity', 'um': '.'},
  'p': {'info': 'precipitation', 'um': 'mm/dt'},
  'ws': {'info': 'wind speed', 'um': 'm/s'},
  'wd': {'info': 'wind direction', 'um': 'deg'}}}

In [3]:
qc.DEFAULT

{'VARS_CHECK': ['t', 'h', 'p', 'ws'],
 'VARS_CONS': ['ws', 'wd'],
 'RANGES': {'t': [-30, 50],
  'h': [0, 100],
  'p': [0, 400],
  'ws': [0, 75],
  'wd': [0, 360]},
 'STEPS': {'t': 2, 'h': 10},
 'WINDOW': 3,
 'VARIATIONS': {'t': [0.01, -30, 50], 'h': [0.01, 0, 95], 'ws': [0.01, 0, 75]}}

---
## **Example**

In [4]:
df_TEST = pd.read_csv('test/75200.csv', index_col=0)
df_TEST

Unnamed: 0_level_0,p,t,h,ws,wd,sm,gsi,igsi,pheno_phase,grass_phase,moisture,intensity,ros,wind_effect
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2022-09-29 11:50:00+00:00,,21.400000,64.0,8.400001,206.0,0.535962,0.533734,0.525523,5.0,3.0,14.2233,386.973,196.168,2.74243
2022-09-29 12:00:00+00:00,,21.400000,67.0,6.700000,211.0,0.535636,0.533734,0.525523,5.0,3.0,,,,
2022-09-29 12:10:00+00:00,,21.300001,61.0,7.800000,212.0,0.535309,0.533734,0.525523,5.0,3.0,,,,
2022-09-29 12:20:00+00:00,,21.300001,63.0,8.900001,211.0,0.534983,0.533734,0.525523,5.0,3.0,,,,
2022-09-29 12:30:00+00:00,,21.600000,65.0,7.600000,209.0,0.534657,0.533734,0.525523,5.0,3.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-30 11:10:00+00:00,,,,,,0.514102,0.533734,0.525523,5.0,3.0,,,,
2022-09-30 11:20:00+00:00,,,,,,0.514102,0.533734,0.525523,5.0,3.0,,,,
2022-09-30 11:30:00+00:00,,,,,,0.514102,0.533734,0.525523,5.0,3.0,,,,
2022-09-30 11:40:00+00:00,,,,,,0.514102,0.533734,0.525523,5.0,3.0,,,,


In [10]:
# TEST ON A SINGLE STATION: DEFAULT
df_check = qc.quality_check(df_TEST)
df_check

Unnamed: 0_level_0,QC,QC_LABEL
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-29 11:50:00+00:00,2,INCOMPLETE
2022-09-29 12:00:00+00:00,10,INCOMPLETE
2022-09-29 12:10:00+00:00,26,INCOMPLETE
2022-09-29 12:20:00+00:00,26,INCOMPLETE
2022-09-29 12:30:00+00:00,26,INCOMPLETE
...,...,...
2022-09-30 11:10:00+00:00,2,INCOMPLETE
2022-09-30 11:20:00+00:00,2,INCOMPLETE
2022-09-30 11:30:00+00:00,2,INCOMPLETE
2022-09-30 11:40:00+00:00,2,INCOMPLETE


In [9]:
# TEST ON A SINGLE STATION: CHANGE CONFIG
# Some info are specified by user (e.g. variables checked by complete_test, ranges checked by range_test). 
# The missing info are added by default configuration
settings_new = {'VARS_CHECK':['t'], 'RANGES':{'t':[0, 50]}, 'VARIATIONS':{'t':[1,-10,50]}}
df_check_new = qc.quality_check(df_TEST, settings=settings_new)
df_check_new

Unnamed: 0_level_0,QC,QC_LABEL
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-29 11:50:00+00:00,7,INCOMPLETE
2022-09-29 12:00:00+00:00,15,INCOMPLETE
2022-09-29 12:10:00+00:00,15,INCOMPLETE
2022-09-29 12:20:00+00:00,15,INCOMPLETE
2022-09-29 12:30:00+00:00,15,INCOMPLETE
...,...,...
2022-09-30 11:10:00+00:00,2,INCOMPLETE
2022-09-30 11:20:00+00:00,2,INCOMPLETE
2022-09-30 11:30:00+00:00,2,INCOMPLETE
2022-09-30 11:40:00+00:00,2,INCOMPLETE
