In [1]:
import os
import sys
import pandas as pd
import numpy as np
import sklearn.metrics as skl
sys.path.append('../')
import utils.csv as csv
import utils.shp as shp
import utils.plot as plot

In [2]:
# file path
# PATH = os.path.join('..', 'outputs', 'csv','multi_label', '04')
PATH = os.path.join('..', 'outputs', 'csv', 'map', 'multi_label', '04')
SHP_PATH ='D:\\Deutschland\\FUB\\master_thesis\\data\\Reference_data\\validation\\aoi_polygons.shp'
LABEL_PATH = 'D:\\Deutschland\\FUB\\master_thesis\\data\\ref\\validation\\multi_aoi.csv'
models = ['transformer', 'lstm', 'bi-lstm']
uid = '7ml20'
cols = ['Spruce','Silver Fir','Douglas Fir','Pine','Oak','Beech','Sycamore']
indexes = ['Precision', 'Recall', 'F1']

Metrics

In [7]:
acc = {}
hamming = {}
precision = {}
recall = {}
f1 = {}
for model in models:
    # load csv file
    ref_path = os.path.join(PATH, f'{model}_{uid}_ref.csv')
    pred_path = os.path.join(PATH, f'{model}_{uid}_pred.csv')
    ref = csv.load(ref_path, 'id')
    pred = csv.load(pred_path, 'id')
    # calculate generall terms
    acc[model] = skl.accuracy_score(ref, pred)
    precision[model] = skl.precision_score(ref, pred, average='weighted')
    recall[model] = skl.recall_score(ref, pred, average='weighted')
    f1[model] = skl.f1_score(ref, pred, average='weighted')
    hamming[model] = 1 - skl.hamming_loss(ref, pred)
    # new dataframe
    df = pd.DataFrame(columns=cols, index=indexes)
    df.index.name = 'index'
    # precision, recall & F1
    df.iloc[0] = skl.precision_score(ref, pred, average=None)
    df.iloc[1] = skl.recall_score(ref, pred, average=None)
    df.iloc[2] = skl.f1_score(ref, pred, average=None)
    df = df.astype('float')
    df = df.round(2)
    # export matrix
    out_path = os.path.join(PATH, f'{model}_{uid}_matrix.csv')
    # csv.export(df, out_path, True)
print('Accuracy', acc)
print('Hamming', hamming)
print('Precision', precision)
print('Recall', recall)
print('F1', f1)

Accuracy {'transformer': 0.49780827139317707, 'lstm': 0.46807699637888317, 'bi-lstm': 0.45683247570040025}
Hamming {'transformer': 0.8967301042772741, 'lstm': 0.8895423235045876, 'bi-lstm': 0.8859484331182444}
Precision {'transformer': 0.7957914575166565, 'lstm': 0.7982306117526757, 'bi-lstm': 0.769247863374583}
Recall {'transformer': 0.7594366847512409, 'lstm': 0.7091076994112894, 'bi-lstm': 0.741775366501212}
F1 {'transformer': 0.7730407309637375, 'lstm': 0.7475855005156916, 'bi-lstm': 0.7474731285460074}


Map for AOI

In [3]:
aoi = shp.load_shp_file(SHP_PATH)
labels = csv.load(LABEL_PATH, 'id')
for model in models:
    # load
    pred_path = os.path.join(PATH, f'{model}_{uid}_pred.csv')
    pred =  csv.load(pred_path, 'id')
    gdf = pd.merge(aoi, pred, on='id', how='inner')
    gdf.set_index('id', inplace=True)
    # compare
    gdf['sum'] = (labels == gdf.iloc[:, 19:]).sum(axis=1)
    # export
    out_path = os.path.join(PATH, f'{model}_{uid}_map.csv')
    csv.export(gdf, out_path, True)
    # draw map
    aoi_1 = gdf[gdf['Location'] == 'Hardtwald_pine_beech_redoak']
    aoi_2 = gdf[gdf['Location'] == 'schoenbuch_beech_oak_mixture']
    aoi_3 = gdf[gdf['Location'] == 'Schwarzwald_spruce_silverfir_douglasfir']
    areas = {'Hardtwald':aoi_1, 'Schoenbuch':aoi_2, 'Schwarzwald':aoi_3}
    for area, gdf in areas.items():
        plot.draw_symbol_map(gdf, area, f'{model}_{uid}')


import file D:\Deutschland\FUB\master_thesis\data\Reference_data\validation\aoi_polygons.shp
export file ..\outputs\csv\map\multi_label\04\transformer_7ml20_map.csv
export file ..\outputs\csv\map\multi_label\04\lstm_7ml20_map.csv
export file ..\outputs\csv\map\multi_label\04\bi-lstm_7ml20_map.csv


<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

<Figure size 720x576 with 0 Axes>

In [4]:
loc_0_wrong = {'Hardtwald':{}, 'Schwarzwald':{}, 'schoenbuch':{}}
loc_1_wrong = {'Hardtwald':{}, 'Schwarzwald':{}, 'schoenbuch':{}}
for model in models:
    pred_path = os.path.join(PATH, f'{model}_{uid}_map.csv')
    pred = csv.load(pred_path, 'id')
    group = pred.groupby('Location')
    for key, df in group:
        loc = key.split('_')[0]
        total = df.shape[0]
        loc_0_wrong[loc][model] = sum(df['sum'] == 7) / total
        loc_1_wrong[loc][model] = sum(df['sum'] == 6) / total
print(loc_0_wrong)
print(loc_1_wrong)

{'Hardtwald': {'transformer': 0.7755102040816326, 'lstm': 0.673469387755102, 'bi-lstm': 0.7346938775510204}, 'Schwarzwald': {'transformer': 0.25925925925925924, 'lstm': 0.18518518518518517, 'bi-lstm': 0.16666666666666666}, 'schoenbuch': {'transformer': 0.4835164835164835, 'lstm': 0.4945054945054945, 'bi-lstm': 0.4175824175824176}}
{'Hardtwald': {'transformer': 0.20408163265306123, 'lstm': 0.24489795918367346, 'bi-lstm': 0.1836734693877551}, 'Schwarzwald': {'transformer': 0.2962962962962963, 'lstm': 0.48148148148148145, 'bi-lstm': 0.2962962962962963}, 'schoenbuch': {'transformer': 0.3956043956043956, 'lstm': 0.27472527472527475, 'bi-lstm': 0.37362637362637363}}


Test

In [9]:
class_path = 'D:\\Deutschland\\FUB\\master_thesis\\data\\ref\\all\\7main_classes.csv'
classes = csv.load(class_path, 'id')
res = pred == ref
res['sum'] = res.sum(axis=1)
res.sort_values(by='sum', ascending=False, inplace=True)
output = pd.merge(res, classes, on='id', how='inner')
out_path = os.path.join(PATH, f'{model}_{uid}_count.csv')
csv.export(output, out_path, True)

export file ..\outputs\csv\multi_label\04\transformer_7ml_count.csv
