In [21]:
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import numpy as np 
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import confusion_matrix
import matplotlib.cm as cm
import itertools as its

## This notebook creates confusion matrices for all metric combinations and stores them to disk as images

In [None]:
def compute_cm(it1, it2):
    """This function takes in one of the combinations for possible confusion matrices of cluster allocations and then generates said confusion matrix using the passed array. Labels are generated by
    additional elements that are passed alongside the arrays."""
    n1= it1[0]
    lab1= it1[1]
    max1 = max(lab1) + 1
    n2= it2[0]
    lab2= it2[1]
    max2 = max(lab2) + 1
    max_clusters = max(max1, max2)
    #set the nr of labels to the lowest cluster nr found in either array
    labels= [i for i in range(max_clusters)]
    cm = confusion_matrix(lab1, lab2, labels=labels)
    cm_df = pd.DataFrame(cm)
    #clean 0 cols
    cm_df = cm_df.loc[:, (cm_df != 0).any(axis=0)]
    #clean 0 rows
    cm_df = cm_df.loc[(cm_df != 0).any(axis=1), :]  
    title_met = n1 + ' & ' + n2
    title_tot = ('Heatmap of confusion matrix ' + title_met).title()
    filename = 'E:/thesis_images/cms/' + n1 + '_' + n2 + '.png'
    sns.set(font_scale=1.4) # for label size
    sns.heatmap(cm_df, annot=True, annot_kws={"size": 16}, fmt='d' \
                , cbar_kws={'label': 'Number of maps assigned to cluster', 'shrink': .9})
    plt.title(title_tot, fontsize=14)
    plt.ylabel(n1 + ' Clusters')
    plt.xlabel(n2 + ' Clusters') 
    plt.figure(figsize=(14, 14))
    ax = plt.gca()
    ax.set_aspect('equal')
    plt.savefig(filename, format='png', bbox_inches="tight")
    plt.close()

In [44]:
def compute_cm(it1, it2):
    """This function takes in one of the combinations for possible confusion matrices of cluster allocations and then generates said confusion matrix using the passed array. Labels are generated by
    additional elements that are passed alongside the arrays."""
    n1= it1[0]
    lab1= it1[1]
    max1 = max(lab1) + 1
    n2= it2[0]
    lab2= it2[1]
    max2 = max(lab2) + 1
    max_clusters = max(max1, max2)
    #set the nr of labels to the lowest cluster nr found in either array
    labels= [i for i in range(max_clusters)]
    cm = confusion_matrix(lab1, lab2, labels=labels)
    cm_df = pd.DataFrame(cm)
    #clean 0 cols
    cm_df = cm_df.loc[:, (cm_df != 0).any(axis=0)]
    #clean 0 rows
    cm_df = cm_df.loc[(cm_df != 0).any(axis=1), :]  
    title_met = n1 + ' & ' + n2
    title_tot = ('Heatmap of confusion matrix ' + title_met).title()
    filename = 'E:/thesis_images/cms/' + n1 + '_' + n2 + '.png'
    plt.figure(figsize=(10,10))
    sns.set(font_scale=1.4) # for label size
    sns.heatmap(cm_df, annot=True, annot_kws={"size": 16}, fmt='d' \
                , cbar_kws={'label': 'Number of maps assigned to cluster', 'shrink': .9}) # font size
    plt.title(title_tot)
    plt.ylabel(n1 + ' Clusters')
    plt.xlabel(n2 + ' Clusters')    
    ax = plt.gca()
    ax.set_aspect('equal')
    plt.savefig(filename, format='png', bbox_inches="tight")
    plt.close()

In [12]:
map_list = []
for i in range(2000):
    name = 'map' + str(i)
    map_list.append(name)
    
### Load Results ###
#Class 4 Residential
dist_tca4 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/TCA4_df.csv')
dist_tca4.index = map_list

dist_pland4 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/PLAND4_df.csv')
dist_pland4.index = map_list

#Class 14  Corn
dist_tca14 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/TCA14_df.csv')
dist_tca14.index = map_list

dist_pland14 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/PLAND14_df.csv')
dist_pland14.index = map_list

#Class 22 Nature
dist_tca22 = pd.read_csv('E:/thesis_data/Output_DFs/Total Class Area[class 22]_df.csv')
dist_tca22.index = map_list

dist_pland22 = pd.read_csv('E:/thesis_data/Output_DFs/PLAND[class 22]_df.csv')
dist_pland22.index = map_list

# OQD_22
qd22_df = pd.read_csv('E:/thesis_data/Output_DFs/quantitydifferencecategorical_22_df.csv')
np.fill_diagonal(qd22_df.values, 0)

"""Non categorical metrics"""
# Shannon
dist_shan = pd.read_csv('E:/thesis_data/Output_DFs/shannon_df.csv')
dist_shan.index = map_list

# Simpsons
dist_simp = pd.read_csv('E:/thesis_data/Output_DFs/simp_df.csv')
dist_simp.index = map_list
# Kappa
kappa_df = pd.read_csv('E:/thesis_data/Output_DFs/kappa.csv', index_col= 'Unnamed: 0')
dist_kappa = 1 - kappa_df
# OA
oa_df = pd.read_csv('E:/thesis_data/Output_DFs/overallaccuracy_df.csv')
dist_oa = 1 - oa_df
dist_oa.index = map_list
# OAD
oad_df = pd.read_csv('E:/thesis_data/Output_DFs/overallallocationdifference_df.csv')
oad_df.index = map_list
np.fill_diagonal(oad_df.values, 0)
# OD
od_df = pd.read_csv('E:/thesis_data/Output_DFs/overalldifference_df.csv')
od_df.index = map_list
np.fill_diagonal(od_df.values, 0)
# OQD
oqd_df = pd.read_csv('E:/thesis_data/Output_DFs/overallquantitydifference_df.csv')
oqd_df.index = map_list
np.fill_diagonal(oqd_df.values, 0)

In [15]:
"""Create the clusters for all metrics and assign labels"""
kappaclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit(dist_kappa)
shannonclusters = AgglomerativeClustering(n_clusters=4, affinity='precomputed', linkage='complete').fit(dist_shan)
simpsonclusters = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit(dist_simp)
oaclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit(dist_oa)
odclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit(od_df)
oadclusters = AgglomerativeClustering(n_clusters=6, affinity='precomputed', linkage='complete').fit(oad_df)
oqdclusters = AgglomerativeClustering(n_clusters=4, affinity='precomputed', linkage='complete').fit(oqd_df)
qd22clusters = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(qd22_df)
tcacluster4 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_tca4)
tcacluster14 = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit(dist_tca14)
tcacluster22 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_tca22)
plandcluster4 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_pland4)
plandcluster14 = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit(dist_pland14)
plandcluster22 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_pland22)

kappa = kappaclusters.labels_
shan = shannonclusters.labels_
simp = simpsonclusters.labels_
oa = oaclusters.labels_
od = odclusters.labels_
oad = oadclusters.labels_
oqd = oqdclusters.labels_
qd22 = qd22clusters.labels_
tca4 = tcacluster4.labels_
tca14 = tcacluster14.labels_
tca22 = tcacluster22.labels_
pland4 = plandcluster4.labels_
pland14 = plandcluster14.labels_
pland22 = plandcluster22.labels_

In [45]:
metrics = [('Kappa', kappa), ('Shannon', shan), ('Simpson', simp), ('Overall Accuracy', oa), ('Total Difference', od), ('Total Allocation Difference', oad) \
           , ('Total Quantity Difference', oqd), ('Quantity Difference [Nature]', qd22), ('Total Class Area [Residential]', tca4) \
           , ('Total Class Area [Corn]', tca14), ('Total Class Area [Nature]', tca22), ('Percentage Landscape [Residential]', pland4), \
           ('Percentage Landscape [Corn]', pland14), ('Percentage Landscape [Nature]', pland22)]

iterator = its.combinations(metrics, 2)

for i in its.starmap(compute_cm, iterator):
    pass