In [110]:
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import confusion_matrix
import matplotlib.cm as cm
import itertools as its

## This notebook creates confusion matrices for all metric combinations and stores them to disk as images

In [138]:
def compute_cm(it1, it2):
    """This function takes in one of the combinations for possible confusion matrices of cluster allocations and then generates said confusion matrix using the passed array. Labels are generated by
    additional elements that are passed alongside the arrays."""
    n1= it1[0]
    lab1= it1[1]
    max1 = max(lab1)
    n2= it2[0]
    lab2= it2[1]
    max2 = max(lab2)
    max_clusters = max(max1, max2)
    #set the nr of labels to the highest cluster nr found in either array
    labels= [i for i in range(max_clusters+1)]
    cm = confusion_matrix(lab1, lab2, labels=labels)
    title_met = n1 + '&' + n2
    filename = 'E:/thesis_images/cms/' + n1 + '_' + n2 + '.png'
    plt.figure(figsize=(14,14))
    sns.set(font_scale=1.4) # for label size
    sns.heatmap(cm, annot=True, annot_kws={"size": 16}, fmt='d' \
                , cbar_kws={'label': 'Cluster assignment between metrics'}) # font size
    plt.title('Heatmap of confusion matrix ' + title_met)
    plt.ylabel(n1)
    plt.xlabel(n2)
                                         
    plt.savefig(filename, format='png')
    plt.close()

In [111]:
map_list = []
for i in range(2000):
    name = 'map' + str(i)
    map_list.append(name)
    
### Load Results ###
#Class 4 Industrial
dist_tca4 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/TCA4_df.csv')
dist_tca4.index = map_list

dist_pland4 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/PLAND4_df.csv')
dist_pland4.index = map_list

#Class 14  Potatoes
dist_tca14 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/TCA14_df.csv')
dist_tca14.index = map_list

dist_pland14 = pd.read_csv('C:/LUMOS/MCK/Output_DFs/PLAND14_df.csv')
dist_pland14.index = map_list

#Class 22 Nature
dist_tca22 = pd.read_csv('E:/thesis_data/Output_DFs/Total Class Area[class 22]_df.csv')
dist_tca22.index = map_list

dist_pland22 = pd.read_csv('E:/thesis_data/Output_DFs/PLAND[class 22]_df.csv')
dist_pland22.index = map_list

# OQD_22
qd22_df = pd.read_csv('E:/thesis_data/Output_DFs/quantitydifferencecategorical_22_df.csv')
np.fill_diagonal(qd22_df.values, 0)

"""Non categorical metrics"""
# Shannon
dist_shan = pd.read_csv('E:/thesis_data/Output_DFs/shannon_df.csv')
dist_shan.index = map_list

# Simpsons
dist_simp = pd.read_csv('E:/thesis_data/Output_DFs/simp_df.csv')
dist_simp.index = map_list
# Kappa
kappa_df = pd.read_csv('E:/thesis_data/Output_DFs/kappa.csv', index_col= 'Unnamed: 0')
dist_kappa = 1 - kappa_df
# OA
oa_df = pd.read_csv('E:/thesis_data/Output_DFs/overallaccuracy_df.csv')
dist_oa = 1 - oa_df
dist_oa.index = map_list
# OAD
oad_df = pd.read_csv('E:/thesis_data/Output_DFs/overallallocationdifference_df.csv')
oad_df.index = map_list
np.fill_diagonal(oad_df.values, 0)
# OD
od_df = pd.read_csv('E:/thesis_data/Output_DFs/overalldifference_df.csv')
od_df.index = map_list
np.fill_diagonal(od_df.values, 0)
# OQD
oqd_df = pd.read_csv('E:/thesis_data/Output_DFs/overallquantitydifference_df.csv')
oqd_df.index = map_list
np.fill_diagonal(oqd_df.values, 0)

In [112]:
"""Create the clusters for all metrics and assign labels"""
kappaclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit(dist_kappa)
shannonclusters = AgglomerativeClustering(n_clusters=4, affinity='precomputed', linkage='complete').fit(dist_shan)
simpsonclusters = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit(dist_simp)
oaclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit(dist_oa)
odclusters = AgglomerativeClustering(n_clusters=7, affinity='precomputed', linkage='complete').fit(od_df)
oadclusters = AgglomerativeClustering(n_clusters=6, affinity='precomputed', linkage='complete').fit(oad_df)
oqdclusters = AgglomerativeClustering(n_clusters=4, affinity='precomputed', linkage='complete').fit(oqd_df)
qd22clusters = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(qd22_df)
tcacluster4 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_tca4)
tcacluster14 = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit(dist_tca14)
tcacluster22 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_tca22)
plandcluster4 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_pland4)
plandcluster14 = AgglomerativeClustering(n_clusters=5, affinity='precomputed', linkage='complete').fit(dist_pland14)
plandcluster22 = AgglomerativeClustering(n_clusters=3, affinity='precomputed', linkage='complete').fit(dist_pland22)

kappa = kappaclusters.labels_
shan = shannonclusters.labels_
simp = simpsonclusters.labels_
oa = oaclusters.labels_
od = odclusters.labels_
oad = oadclusters.labels_
oqd = oqdclusters.labels_
qd22 = qd22clusters.labels_
tca4 = tcacluster4.labels_
tca14 = tcacluster14.labels_
tca22 = tcacluster22.labels_
pland4 = plandcluster4.labels_
pland14 = plandcluster14.labels_
pland22 = plandcluster22.labels_

In [139]:
metrics = [('kappa', kappa), ('shan', shan), ('simp', simp), ('oa', oa), ('od', od), ('oad', oad), ('oqd', oqd), ('qd22', qd22), ('tca4', tca4) \
           , ('tca14', tca14), ('tca22', tca22), ('pland4', pland4), ('pland14', pland14), ('pland22', pland22)]
iterator = its.combinations(metrics, 2)

for y in its.starmap(compute_cm, iterator):
    pass