In [46]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
#import seaborn as sns
from csv import writer

In [47]:
def style_diag(data):
    '''author: CJR, https://stackoverflow.com/a/56916315'''
    diag_mask = pd.DataFrame("", index=data.index, columns=data.columns)
    min_axis = min(diag_mask.shape)
    diag_mask.iloc[range(min_axis), range(min_axis)] = 'background-color: yellow'
    return diag_mask

# Metric Analysis: Landcover Prediction (CSV)

In [48]:
# create csv to store country level metrics
modeltype = 'multiyear'  # singleyear | multiyear
fold_config = '12345'

mimer_metric_summary = '/mimer/NOBACKUP/groups/globalpoverty1/albin_and_albin/metric_summary/'
country_metrics_csv_path = mimer_metric_summary + f'landcover_prediction_esri_{modeltype}_{fold_config}_yearly.csv'

headers = ['year',
           'recall','','','','','','','', 
           'precision','','','','','','','', 
           'iou','','','','','','','', 
           'f1','','','','','','','',]

'''sub_headers = [''] + 4*['c1:Water', 'c2:Tree', 'c3:Flooded Vegetation', 
               'c4:Crops', 'c5:Bare Ground', 'c6:Rangeland', 
               'c7:Rural', 'c8:Urban']'''

with open(country_metrics_csv_path, 'w') as f:
    writer_object = writer(f)
    writer_object.writerow(headers)
    f.close()

In [49]:
# loop through the confusion matrices of each year in the evaluated time series 2018-2022
if modeltype == 'singleyear':
    mimer_path = '/mimer/NOBACKUP/groups/globalpoverty1/albin_and_albin/raw_count_confusion_matrices/single_year_model/'
elif modeltype == 'multiyear':
    mimer_path = '/mimer/NOBACKUP/groups/globalpoverty1/albin_and_albin/raw_count_confusion_matrices/multi_year_model/'

for year_index in range(5):
    
    if modeltype == 'singleyear':
        model_path = f'confusion_matrix_esri_urban_rural_{fold_config}_2018to2022_{2018+year_index}.csv'
    elif modeltype == 'multiyear':
        model_path = f'confusion_matrix_LSTM_esri_urban_rural_{fold_config}_2018to2022_{2018+year_index}.csv'
    
    # dataframe containing raw count confusion matrix
    try:
        df_raw_count = pd.read_csv(mimer_path + model_path, index_col=0)
    except:
        print(f'confusion matrix of {2018 + year_index} missing')
        continue
    class_names = df_raw_count.columns

    # extract recall/precision by normalization of the conf-matrix by row/col
    df_recall = df_raw_count.div(df_raw_count.sum(axis=1), axis=0)
    recall_scores = df_recall.to_numpy().diagonal()

    df_precision = df_raw_count.div(df_raw_count.sum(axis=0), axis=1)
    precision_scores = df_precision.to_numpy().diagonal()

    # calculate per class iou and f1-scores
    raw_counts = df_raw_count.to_numpy()
    iou_scores = np.zeros(len(class_names))
    f1_scores = np.zeros(len(class_names))

    for i in range(len(class_names)):
        tp = raw_counts[i, i]
        fp = raw_counts[:, i].sum() - tp
        fn = raw_counts[i, :].sum() - tp

        # iou
        nom = tp
        denom = (tp + fp + fn)

        if denom != 0:
            iou = nom / denom
        else:
            iou = np.NaN
        iou_scores[i] = iou

        # f1
        nom = tp
        denom = tp + 0.5*(fp + fn)

        if denom != 0:
            f1 = nom / denom
        else:
            f1 = np.NaN
        f1_scores[i] = f1

    year = 2018 + year_index
    country_metrics_csv_row = [year] + recall_scores.tolist() + precision_scores.tolist() + iou_scores.tolist() + f1_scores.tolist()

    # write results to csv
    with open(country_metrics_csv_path, 'a') as f_object:

        writer_object = writer(f_object)
        writer_object.writerow(country_metrics_csv_row)
        f_object.close()
        


In [50]:
headers = ['recall', 'precision', 'iou', 'f1']
sub_headers = ['c1:Water', 'c2:Tree', 'c3:Flooded Vegetation', 'c4:Crops', 'c5:Bare Ground', 'c6:Rangeland', 'c7:Rural', 'c8:Urban']
cols = pd.MultiIndex.from_tuples([(header, subheader) for header in headers for subheader in sub_headers])

df_raw_count = pd.read_csv(country_metrics_csv_path, index_col=0)
df_raw_count.columns = cols
df_raw_count

Unnamed: 0_level_0,recall,recall,recall,recall,recall,recall,recall,recall,precision,precision,...,iou,iou,f1,f1,f1,f1,f1,f1,f1,f1
Unnamed: 0_level_1,c1:Water,c2:Tree,c3:Flooded Vegetation,c4:Crops,c5:Bare Ground,c6:Rangeland,c7:Rural,c8:Urban,c1:Water,c2:Tree,...,c7:Rural,c8:Urban,c1:Water,c2:Tree,c3:Flooded Vegetation,c4:Crops,c5:Bare Ground,c6:Rangeland,c7:Rural,c8:Urban
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018,0.946985,0.922055,0.918272,0.683428,0.772129,0.792441,0.714946,0.610351,0.551354,0.759783,...,0.098701,0.362628,0.696937,0.833091,0.294797,0.623977,0.833871,0.840542,0.179669,0.532248
2019,0.930281,0.882391,0.795205,0.783052,0.867646,0.777972,0.725494,0.583115,0.556418,0.809874,...,0.111329,0.391937,0.696341,0.844579,0.359159,0.643462,0.856194,0.836181,0.200353,0.563154
2020,0.953776,0.927871,0.932549,0.76631,0.881948,0.769559,0.747071,0.612928,0.576651,0.78739,...,0.121043,0.389004,0.718749,0.851878,0.359099,0.666602,0.868447,0.842412,0.215947,0.560119
2021,0.936823,0.931934,0.924107,0.731377,0.832921,0.762107,0.706238,0.598801,0.572808,0.746481,...,0.138108,0.393834,0.710928,0.828962,0.331228,0.660151,0.848051,0.83108,0.242698,0.565109
2022,0.952574,0.931504,0.937088,0.744288,0.843637,0.798685,0.696555,0.625161,0.595753,0.781189,...,0.141524,0.385122,0.733048,0.84975,0.34695,0.684935,0.86452,0.855287,0.247956,0.556084


In [51]:
df_raw_count['recall'].sort_values(by=['c7:Rural'])

Unnamed: 0_level_0,c1:Water,c2:Tree,c3:Flooded Vegetation,c4:Crops,c5:Bare Ground,c6:Rangeland,c7:Rural,c8:Urban
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022,0.952574,0.931504,0.937088,0.744288,0.843637,0.798685,0.696555,0.625161
2021,0.936823,0.931934,0.924107,0.731377,0.832921,0.762107,0.706238,0.598801
2018,0.946985,0.922055,0.918272,0.683428,0.772129,0.792441,0.714946,0.610351
2019,0.930281,0.882391,0.795205,0.783052,0.867646,0.777972,0.725494,0.583115
2020,0.953776,0.927871,0.932549,0.76631,0.881948,0.769559,0.747071,0.612928


In [52]:
df_raw_count['precision']

Unnamed: 0_level_0,c1:Water,c2:Tree,c3:Flooded Vegetation,c4:Crops,c5:Bare Ground,c6:Rangeland,c7:Rural,c8:Urban
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018,0.551354,0.759783,0.175582,0.574041,0.906345,0.89486,0.102745,0.471866
2019,0.556418,0.809874,0.231963,0.546111,0.845041,0.903805,0.116225,0.544514
2020,0.576651,0.78739,0.222362,0.589853,0.855353,0.930501,0.126215,0.515689
2021,0.572808,0.746481,0.201775,0.601567,0.863742,0.913779,0.146526,0.535006
2022,0.595753,0.781189,0.212885,0.634349,0.886464,0.920524,0.150823,0.500753


In [53]:
df_raw_count['iou']

Unnamed: 0_level_0,c1:Water,c2:Tree,c3:Flooded Vegetation,c4:Crops,c5:Bare Ground,c6:Rangeland,c7:Rural,c8:Urban
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018,0.534845,0.713929,0.172881,0.453464,0.715076,0.724944,0.098701,0.362628
2019,0.534144,0.73097,0.218887,0.474342,0.748549,0.71848,0.111329,0.391937
2020,0.560974,0.741975,0.218842,0.499927,0.767482,0.727731,0.121043,0.389004
2021,0.551504,0.707887,0.198486,0.492706,0.736188,0.71098,0.138108,0.393834
2022,0.578591,0.738752,0.209885,0.520837,0.76137,0.747163,0.141524,0.385122


In [54]:
df_raw_count['f1']

Unnamed: 0_level_0,c1:Water,c2:Tree,c3:Flooded Vegetation,c4:Crops,c5:Bare Ground,c6:Rangeland,c7:Rural,c8:Urban
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018,0.696937,0.833091,0.294797,0.623977,0.833871,0.840542,0.179669,0.532248
2019,0.696341,0.844579,0.359159,0.643462,0.856194,0.836181,0.200353,0.563154
2020,0.718749,0.851878,0.359099,0.666602,0.868447,0.842412,0.215947,0.560119
2021,0.710928,0.828962,0.331228,0.660151,0.848051,0.83108,0.242698,0.565109
2022,0.733048,0.84975,0.34695,0.684935,0.86452,0.855287,0.247956,0.556084


### Transform into long format

In [55]:
# view data as a dict of dicts
data = df_raw_count.to_dict(orient='index') # save the dataframe as a list of dictionaries

data_long_format = []  # create a new dict using the long format

# add each value as a function of year, metric and class (long format)
years = data.keys()
for year in years:

    indicators = data[year].keys()  # format: ('metric type', 'class')
    for indicator in indicators:

        row = {}
        row['year'] = str(year)
        row['modeltype'] = modeltype
        row['metric'] = indicator[0]
        row['class'] = indicator[1]
        row['value'] = data[year][indicator]
        data_long_format.append(row)  # add one row in long format
    


In [56]:
#data_long_format

In [57]:
dataframe_longformat = pd.DataFrame.from_dict(data_long_format)
dataframe_longformat.head()
#dataframe_longformat[dataframe_longformat['metric'] == 'recall']

Unnamed: 0,year,modeltype,metric,class,value
0,2018,multiyear,recall,c1:Water,0.946985
1,2018,multiyear,recall,c2:Tree,0.922055
2,2018,multiyear,recall,c3:Flooded Vegetation,0.918272
3,2018,multiyear,recall,c4:Crops,0.683428
4,2018,multiyear,recall,c5:Bare Ground,0.772129


In [58]:
country_metrics_csv_path_longformat = mimer_metric_summary + f'landcover_prediction_esri_{modeltype}_{fold_config}_yearly_longformat.csv'

# save metric csv in long format
dataframe_longformat.to_csv(country_metrics_csv_path_longformat, index=False)

In [59]:
df_test = pd.read_csv(country_metrics_csv_path_longformat)
df_test[df_test['year'] == 2018]

Unnamed: 0,year,modeltype,metric,class,value
0,2018,multiyear,recall,c1:Water,0.946985
1,2018,multiyear,recall,c2:Tree,0.922055
2,2018,multiyear,recall,c3:Flooded Vegetation,0.918272
3,2018,multiyear,recall,c4:Crops,0.683428
4,2018,multiyear,recall,c5:Bare Ground,0.772129
5,2018,multiyear,recall,c6:Rangeland,0.792441
6,2018,multiyear,recall,c7:Rural,0.714946
7,2018,multiyear,recall,c8:Urban,0.610351
8,2018,multiyear,precision,c1:Water,0.551354
9,2018,multiyear,precision,c2:Tree,0.759783
