In [1]:
import glob
import pandas as pd
import numpy as np
from src.constants import ROOT_DIR
from src.data.utils import Location
import cartopy.crs as ccrs
import matplotlib.pyplot as plt

Project directory (ROOT_DIR): '/home/pereza/git/esowc/aq-biascorrection'


This notebook is used to get information about the predictions validations CSVs generated.

In [2]:
variable = str(input('Let me know the variable to report the results (no2, o3, pm25): '))
dir_to_tables = ROOT_DIR / 'reports' / 'tables' / variable / 'results' / 'SetOfRuns'

Let me know the variable to report the results (no2, o3, pm25): no2


In [3]:
dir_to_tables

PosixPath('/home/pereza/git/esowc/aq-biascorrection/reports/tables/no2/results/SetOfRuns')

In [4]:
csv_files = glob.glob(str(dir_to_tables / "*.csv"))
train_files = [file for file in csv_files if 'train' in file]
test_files = [file for file in csv_files if 'test' in file]
train_files.sort()
test_files.sort()

In [5]:
train_files[:1]

['/home/pereza/git/esowc/aq-biascorrection/reports/tables/no2/results/SetOfRuns/train_metrics-for-setofruns_at001_vienna_austria.csv']

In [6]:
test_files[:1]

['/home/pereza/git/esowc/aq-biascorrection/reports/tables/no2/results/SetOfRuns/test_metrics-for-setofruns_at001_vienna_austria.csv']

In [29]:
point_values_list = []
for train_file, test_file in zip(train_files, test_files):
    # Get the city and country of the station
    station_id = train_file.split('.csv')[0].split('_')[-3].upper()
    station_obj = Location.get_location_by_id(station_id)
    
    # Read the train and test data
    train_data = pd.read_csv(train_file, index_col=[0, 1])
    test_data = pd.read_csv(test_file, index_col=[0,1])
    
    # Study the mean value of all runs
    train_data_mean = train_data.loc[('Mean'), :].to_dict()
    test_data_mean = test_data.loc[('Mean'), :].to_dict()
    
    point_values_list.append(
        {
            "station": station_obj,
            "values_train": train_data_mean,
            "len_train": len(train_data) // 2 -1,
            "values_test": test_data_mean,
            "len_test": len(test_data) // 2 -1
        }
    )

In [30]:
for station in point_values_list:
    print(f"Checking the metrics for {station['station'].city} ({station['station'].country})")
    for metric, values in station['values_train'].items():
        if metric in ['NMAE', 'RMSE', 'De-Biased NMAE']:
            percentage = 100 * (values['CAMS Forecast + Correction'] - 
                                values['CAMS Forecast']) / values['CAMS Forecast']
        elif :
            percentage = 100 * (np.abs(values['CAMS Forecast + Correction']) - 
                                np.abs(values['CAMS Forecast'])) / np.abs(values['CAMS Forecast'])
        print(f"{metric}: {percentage} %")
    print('\n')

Checking the train metrics for Vienna (Austria) using 347 runs
NMAE: from 0.556 to 0.508
BIAS: from 1.021 to 5.313
RMSE: from 13.056 to 11.691
De-Biased NMAE: from 0.489 to 0.39
Pearson Correlation: from 0.4 to 0.594


Checking the train metrics for Salzburg (Austria) using 363 runs
NMAE: from 0.624 to 0.362
BIAS: from -9.472 to -1.432
RMSE: from 15.113 to 9.296
De-Biased NMAE: from 0.431 to 0.304
Pearson Correlation: from 0.103 to 0.473


Checking the train metrics for Adelaide (Australia) using 452 runs
NMAE: from 0.657 to 0.539
BIAS: from -7.199 to -2.705
RMSE: from 13.959 to 11.121
De-Biased NMAE: from 0.557 to 0.474
Pearson Correlation: from 0.086 to 0.376


Checking the train metrics for Sydney (Australia) using 623 runs
NMAE: from 1.919 to 1.332
BIAS: from 9.132 to 3.312
RMSE: from 19.715 to 15.741
De-Biased NMAE: from 1.168 to 0.901
Pearson Correlation: from 0.343 to 0.43


Checking the train metrics for Melbourne (Australia) using 535 runs
NMAE: from 0.515 to 0.525
BIAS: from 

In [33]:
for station in point_values_list:
    print(f" - Checking the train metrics for " \
          f"{station['station'].city} ({station['station'].country}) using {station['len_train']} runs:")
    for metric, values in station['values_train'].items():
        cams = round(values['CAMS Forecast'], 3)
        prediction = round(values['CAMS Forecast + Correction'], 3)
        if metric in ['NMAE', 'RMSE', 'De-Biased NMAE']:
            percentage = round(100 * (prediction - cams) / cams, 2)
        elif metric == 'BIAS':
            percentage = round(100 * (abs(prediction) - abs(cams)) / abs(cams), 2)
        elif metric == 'Pearson Correlation':
            percentage = round(100 * (prediction - cams) / abs(cams), 2)
        print(f"{metric}: from {cams} to {prediction} ({percentage} %)")
        
    print(f" - Checking the test metrics for " \
          f"{station['station'].city} ({station['station'].country}) using {station['len_test']} runs:")
    for metric, values in station['values_test'].items():
        cams = round(values['CAMS Forecast'], 3)
        prediction = round(values['CAMS Forecast + Correction'], 3)
        if metric in ['NMAE', 'RMSE', 'De-Biased NMAE']:
            percentage = round(100 * (prediction - cams) / cams, 2)
        elif metric == 'BIAS':
            percentage = round(100 * (abs(prediction) - abs(cams)) / abs(cams), 2)
        elif metric == 'Pearson Correlation':
            percentage = round(100 * (prediction - cams) / abs(cams), 2)
        print(f"{metric}: from {cams} to {prediction} ({percentage} %)")
    print('\n')

 - Checking the train metrics for Vienna (Austria) using 347 runs:
NMAE: from 0.556 to 0.508 (-8.63 %)
BIAS: from 1.021 to 5.313 (420.37 %)
RMSE: from 13.056 to 11.691 (-10.45 %)
De-Biased NMAE: from 0.489 to 0.39 (-20.25 %)
Pearson Correlation: from 0.4 to 0.594 (48.5 %)
 - Checking the test metrics for Vienna (Austria) using 87 runs:
NMAE: from 0.578 to 0.548 (-5.19 %)
BIAS: from 1.613 to 5.935 (267.95 %)
RMSE: from 13.495 to 12.661 (-6.18 %)
De-Biased NMAE: from 0.504 to 0.411 (-18.45 %)
Pearson Correlation: from 0.407 to 0.579 (42.26 %)


 - Checking the train metrics for Salzburg (Austria) using 363 runs:
NMAE: from 0.624 to 0.362 (-41.99 %)
BIAS: from -9.472 to -1.432 (-84.88 %)
RMSE: from 15.113 to 9.296 (-38.49 %)
De-Biased NMAE: from 0.431 to 0.304 (-29.47 %)
Pearson Correlation: from 0.103 to 0.473 (359.22 %)
 - Checking the test metrics for Salzburg (Austria) using 91 runs:
NMAE: from 0.627 to 0.386 (-38.44 %)
BIAS: from -9.38 to -1.706 (-81.81 %)
RMSE: from 15.391 to 10.121