In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
directory = '../../7. Results/Accuracy csv scores' 

attributes_per_file = {}
file_names = []
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        file_names.append(filename)
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        attributes_per_file[filename] = df

Show bar plot per model. Clusterdistribution for all point classified in a specific building part.

In [None]:
for file_name, df in attributes_per_file.items():
    df_filtered = df[df['Building Part'] != 'TOTAL']
    pivot_df = df_filtered.pivot_table(index='Building Part', columns='Cluster', values='Percentage', fill_value=0)
    pivot_df = pivot_df.div(pivot_df.sum(axis=1), axis=0) * 100
    fig, ax = plt.subplots(figsize=(14, 8))
    num_clusters = len(pivot_df.columns)
    num_building_parts = len(pivot_df.index)
    bar_width = 0.8 / num_clusters  
    positions = np.arange(num_building_parts)
    cmap = plt.cm.get_cmap('tab20', num_clusters)
    for i, cluster in enumerate(pivot_df.columns):
        ax.bar(positions + i * bar_width, pivot_df[cluster], width=bar_width, label=f'Cluster {cluster}', color=cmap(i))
    ax.set_xlabel('Building Part')
    ax.set_ylabel('Percentage')
    ax.set_title(f'Distribution per cluster for every building part, model: {file_name}')
    ax.set_xticks(positions + bar_width * num_clusters / 2)
    ax.set_xticklabels(pivot_df.index, rotation=45)
    ax.legend(title='Cluster', bbox_to_anchor=(1.05, 1), loc='upper left')
    for i, cluster in enumerate(pivot_df.columns):
        for j in range(num_building_parts):
            yval = pivot_df[cluster].iloc[j]
            ax.text(j + i * bar_width, yval + 0.5, f'{yval:.1f}%', ha='center', va='bottom', rotation=90)
    plt.tight_layout()
    plt.show()

Show bar plot per model. Buildingpart distribution for all point classified in a specific cluster.

In [None]:
for file_name, df in attributes_per_file.items():
    df_filtered = df[df['Building Part'] != 'TOTAL']
    pivot_df = df_filtered.pivot_table(index='Building Part', columns='Cluster', values='Percentage', fill_value=0)
    fig, ax = plt.subplots(figsize=(18, 8))
    num_clusters = len(pivot_df.columns)
    num_building_parts = len(pivot_df.index)
    positions = np.arange(num_building_parts)
    cmap = plt.cm.get_cmap('tab20', num_clusters)
    for i, cluster in enumerate(pivot_df.columns):
        bars = ax.bar(positions + i * bar_width, pivot_df[cluster], width=bar_width, label=f'Cluster {cluster}', color=cmap(i))
        for bar in bars:
            yval = bar.get_height()
            if yval == 0:
                ax.text(bar.get_x() + bar.get_width()/2, yval + 0.1, '0.0%', ha='center', va='bottom', rotation=90)
            else:
                ax.text(bar.get_x() + bar.get_width()/2, yval + 0.5, f'{yval:.1f}%', ha='center', va='bottom', rotation=90)
    
    ax.set_xlabel('Building Part')
    ax.set_ylabel('Percentage')
    ax.set_title(f'Distribution per cluster in every building part, model: {file_name}')
    ax.set_xticks(positions + (bar_width * num_clusters) / 2)
    ax.set_xticklabels(pivot_df.index, rotation=45)
    for i in range(num_building_parts - 1):
        ax.axvline(x=positions[i] + (bar_width * num_clusters), color='gray', linestyle='--', linewidth=0.7)
    ax.legend(title='Cluster', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

In [None]:
# TO DO: define for every cluster in every model which building part is must be. Do this based on the above scores between every cluster and every building part 
# example: for the model gaussian mixture with 7 cluters the 2nd cluster is manually labelled below as stairs.
labeled = pd.DataFrame(columns=['file_name', 'cluster-number', 'building_part'])

i = 0 # GS_GaussianMixture14
labeled.loc[0] = [file_names[i], 'Cluster 0', 'wall']
labeled.loc[1] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[2] = [file_names[i], 'Cluster 2', 'stairs']
labeled.loc[3] = [file_names[i], 'Cluster 3', 'Remaining']
labeled.loc[4] = [file_names[i], 'Cluster 4', 'door']
labeled.loc[5] = [file_names[i], 'Cluster 5', 'wall']
labeled.loc[6] = [file_names[i], 'Cluster 6', 'window']
labeled.loc[7] = [file_names[i], 'Cluster 7', 'roof']
labeled.loc[8] = [file_names[i], 'Cluster 8', 'Remaining']
labeled.loc[9] = [file_names[i], 'Cluster 9', 'Remaining']
labeled.loc[10] = [file_names[i], 'Cluster 10', 'Remaining']
labeled.loc[11] = [file_names[i], 'Cluster 11', 'stairs']
labeled.loc[12] = [file_names[i], 'Cluster 12', 'canopy']
labeled.loc[13] = [file_names[i], 'Cluster 13', 'Remaining']

i = 1 # GS_GaussianMixture7
start = 14
labeled.loc[start + 0] = [file_names[i], 'Cluster 0', 'canopy']
labeled.loc[start + 1] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[start + 2] = [file_names[i], 'Cluster 2', 'stairs']
labeled.loc[start + 3] = [file_names[i], 'Cluster 3', 'door']
labeled.loc[start + 4] = [file_names[i], 'Cluster 4', 'Remaining']
labeled.loc[start + 5] = [file_names[i], 'Cluster 5', 'roof']
labeled.loc[start + 6] = [file_names[i], 'Cluster 6', 'wall']

i = 2 # GS_kMeans14
start = 21
labeled.loc[start + 0] = [file_names[i], 'Cluster 0', 'door']
labeled.loc[start + 1] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[start + 2] = [file_names[i], 'Cluster 2', 'Remaining']
labeled.loc[start + 3] = [file_names[i], 'Cluster 3', 'Remaining']
labeled.loc[start + 4] = [file_names[i], 'Cluster 4', 'canopy']
labeled.loc[start + 5] = [file_names[i], 'Cluster 5', 'roof']
labeled.loc[start + 6] = [file_names[i], 'Cluster 6', 'Remaining']
labeled.loc[start + 7] = [file_names[i], 'Cluster 7', 'door']
labeled.loc[start + 8] = [file_names[i], 'Cluster 8', 'Remaining']
labeled.loc[start + 9] = [file_names[i], 'Cluster 9', 'window']
labeled.loc[start + 10] = [file_names[i], 'Cluster 10', 'canopy']
labeled.loc[start + 11] = [file_names[i], 'Cluster 11', 'wall']
labeled.loc[start + 12] = [file_names[i], 'Cluster 12', 'stairs']
labeled.loc[start + 13] = [file_names[i], 'Cluster 13', 'wall']

i = 3 # GS_kMeans7
start = 35
labeled.loc[start + 0] = [file_names[i], 'Cluster 0', 'stairs']
labeled.loc[start + 1] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[start + 2] = [file_names[i], 'Cluster 2', 'door']
labeled.loc[start + 3] = [file_names[i], 'Cluster 3', 'canopy']
labeled.loc[start + 4] = [file_names[i], 'Cluster 4', 'roof']
labeled.loc[start + 5] = [file_names[i], 'Cluster 5', 'window']
labeled.loc[start + 6] = [file_names[i], 'Cluster 6', 'wall']

i = 4 # GS_SAM.csv
start = 42
labeled.loc[start + 0] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[start + 1] = [file_names[i], 'Cluster 2', 'Remaining']
labeled.loc[start + 2] = [file_names[i], 'Cluster 3', 'stairs']
labeled.loc[start + 3] = [file_names[i], 'Cluster 4', 'roof']
labeled.loc[start + 4] = [file_names[i], 'Cluster 5', 'wall']
labeled.loc[start + 5] = [file_names[i], 'Cluster 6', 'Remaining']
labeled.loc[start + 6] = [file_names[i], 'Cluster 7', 'Remaining']
labeled.loc[start + 7] = [file_names[i], 'Cluster 8', 'roof']
labeled.loc[start + 8] = [file_names[i], 'Cluster 9', 'window']
labeled.loc[start + 9] = [file_names[i], 'Cluster 10', 'wall']

i = 5 # PC_GaussianMixture14.csv
start = 52
labeled.loc[start + 0] = [file_names[i], 'Cluster 0', 'wall']
labeled.loc[start + 1] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[start + 2] = [file_names[i], 'Cluster 2', 'Remaining']
labeled.loc[start + 3] = [file_names[i], 'Cluster 3', 'roof']
labeled.loc[start + 4] = [file_names[i], 'Cluster 4', 'Remaining']
labeled.loc[start + 5] = [file_names[i], 'Cluster 5', 'Remaining']
labeled.loc[start + 6] = [file_names[i], 'Cluster 6', 'wall']
labeled.loc[start + 7] = [file_names[i], 'Cluster 7', 'stairs']
labeled.loc[start + 8] = [file_names[i], 'Cluster 8', 'window']
labeled.loc[start + 9] = [file_names[i], 'Cluster 9', 'door']
labeled.loc[start + 10] = [file_names[i], 'Cluster 10', 'door']
labeled.loc[start + 11] = [file_names[i], 'Cluster 11', 'roof']
labeled.loc[start + 12] = [file_names[i], 'Cluster 12', 'canopy']
labeled.loc[start + 13] = [file_names[i], 'Cluster 13', 'Remaining']

i = 6 # PC_GaussianMixture7.csv
start = 66
labeled.loc[start + 0] = [file_names[i], 'Cluster 0', 'door']
labeled.loc[start + 1] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[start + 2] = [file_names[i], 'Cluster 2', 'canopy']
labeled.loc[start + 3] = [file_names[i], 'Cluster 3', 'stairs']
labeled.loc[start + 4] = [file_names[i], 'Cluster 4', 'window']
labeled.loc[start + 5] = [file_names[i], 'Cluster 5', 'wall']
labeled.loc[start + 6] = [file_names[i], 'Cluster 6', 'roof']

i = 7 # PC_kMeans14.csv
start = 73
labeled.loc[start + 0] = [file_names[i], 'Cluster 0', 'roof']
labeled.loc[start + 1] = [file_names[i], 'Cluster 1', 'wall']
labeled.loc[start + 2] = [file_names[i], 'Cluster 2', 'wall']
labeled.loc[start + 3] = [file_names[i], 'Cluster 3', 'door']
labeled.loc[start + 4] = [file_names[i], 'Cluster 4', 'wall']
labeled.loc[start + 5] = [file_names[i], 'Cluster 5', 'canopy']
labeled.loc[start + 6] = [file_names[i], 'Cluster 6', 'Remaining']
labeled.loc[start + 7] = [file_names[i], 'Cluster 7', 'stairs']
labeled.loc[start + 8] = [file_names[i], 'Cluster 8', 'roof']
labeled.loc[start + 9] = [file_names[i], 'Cluster 9', 'window']
labeled.loc[start + 10] = [file_names[i], 'Cluster 10', 'Remaining']
labeled.loc[start + 11] = [file_names[i], 'Cluster 11', 'Remaining']
labeled.loc[start + 12] = [file_names[i], 'Cluster 12', 'Remaining']
labeled.loc[start + 13] = [file_names[i], 'Cluster 13', 'wall']

i = 8 # PC_kMeans7.csv
start = 87
labeled.loc[start + 0] = [file_names[i], 'Cluster 0', 'stairs']
labeled.loc[start + 1] = [file_names[i], 'Cluster 1', 'roof']
labeled.loc[start + 2] = [file_names[i], 'Cluster 2', 'window']
labeled.loc[start + 3] = [file_names[i], 'Cluster 3', 'door']
labeled.loc[start + 4] = [file_names[i], 'Cluster 4', 'canopy']
labeled.loc[start + 5] = [file_names[i], 'Cluster 5', 'wall']
labeled.loc[start + 6] = [file_names[i], 'Cluster 6', 'Remaining']

i = 9 # PC_SAM.csv
start = 94
labeled.loc[start + 0] = [file_names[i], 'Cluster 1', 'Remaining']
labeled.loc[start + 1] = [file_names[i], 'Cluster 2', 'Remaining']
labeled.loc[start + 2] = [file_names[i], 'Cluster 3', 'window']
labeled.loc[start + 3] = [file_names[i], 'Cluster 4', 'wall']
labeled.loc[start + 4] = [file_names[i], 'Cluster 5', 'roof']
labeled.loc[start + 5] = [file_names[i], 'Cluster 6', 'wall']
labeled.loc[start + 6] = [file_names[i], 'Cluster 7', 'window']
labeled.loc[start + 7] = [file_names[i], 'Cluster 8', 'wall']
labeled.loc[start + 8] = [file_names[i], 'Cluster 9', 'wall']
labeled.loc[start + 9] = [file_names[i], 'Cluster 10', 'stairs']
labeled.loc[start + 10] = [file_names[i], 'Cluster 11', 'door']
labeled.loc[start + 11] = [file_names[i], 'Cluster 12', 'Remaining']
labeled.loc[start + 12] = [file_names[i], 'Cluster 13', 'door']
labeled.loc[start + 13] = [file_names[i], 'Cluster 14', 'door']
labeled.loc[start + 14] = [file_names[i], 'Cluster 15', 'Remaining']
labeled.loc[start + 15] = [file_names[i], 'Cluster 16', 'stairs']
labeled.loc[start + 16] = [file_names[i], 'Cluster 17', 'Remaining']
labeled.loc[start + 17] = [file_names[i], 'Cluster 18', 'window']
labeled.loc[start + 18] = [file_names[i], 'Cluster 19', 'canopy']
labeled.loc[start + 19] = [file_names[i], 'Cluster 20', 'Remaining']
labeled.loc[start + 20] = [file_names[i], 'Cluster 21', 'window']
labeled.loc[start + 21] = [file_names[i], 'Cluster 22', 'Remaining']
labeled.loc[start + 22] = [file_names[i], 'Cluster 23', 'Remaining']
labeled.loc[start + 23] = [file_names[i], 'Cluster 24', 'Remaining']
labeled.loc[start + 24] = [file_names[i], 'Cluster 25', 'canopy']
labeled.loc[start + 25] = [file_names[i], 'Cluster 26', 'stairs']
labeled.loc[start + 26] = [file_names[i], 'Cluster 27', 'wall']
labeled.loc[start + 27] = [file_names[i], 'Cluster 28', 'window'] 
labeled.loc[start + 28] = [file_names[i], 'Cluster 29', 'door']
labeled.loc[start + 29] = [file_names[i], 'Cluster 30', 'roof']
labeled.loc[start + 30] = [file_names[i], 'Cluster 31', 'wall'] 
labeled.loc[start + 31] = [file_names[i], 'Cluster 32', 'window']
labeled.loc[start + 32] = [file_names[i], 'Cluster 33', 'window']
labeled.loc[start + 33] = [file_names[i], 'Cluster 34', 'canopy']
labeled.loc[start + 34] = [file_names[i], 'Cluster 35', 'window']
labeled.loc[start + 35] = [file_names[i], 'Cluster 36', 'door']
labeled.loc[start + 36] = [file_names[i], 'Cluster 37', 'Remaining']
labeled.loc[start + 37] = [file_names[i], 'Cluster 38', 'Remaining']
labeled.loc[start + 38] = [file_names[i], 'Cluster 39', 'wall']
labeled.loc[start + 39] = [file_names[i], 'Cluster 40', 'stairs']
labeled.loc[start + 40] = [file_names[i], 'Cluster 41', 'door']
labeled.loc[start + 41] = [file_names[i], 'Cluster 42', 'window']
labeled.loc[start + 42] = [file_names[i], 'Cluster 43', 'window']
labeled.loc[start + 43] = [file_names[i], 'Cluster 44', 'roof']
labeled.loc[start + 44] = [file_names[i], 'Cluster 45', 'stairs']
labeled.loc[start + 45] = [file_names[i], 'Cluster 46', 'Remaining']
labeled.loc[start + 46] = [file_names[i], 'Cluster 47', 'stairs']
labeled.loc[start + 47] = [file_names[i], 'Cluster 48', 'stairs']
labeled.loc[start + 48] = [file_names[i], 'Cluster 49', 'Remaining']
labeled.loc[start + 49] = [file_names[i], 'Cluster 50', 'door']
labeled.loc[start + 50] = [file_names[i], 'Cluster 51', 'stairs']
labeled.loc[start + 51] = [file_names[i], 'Cluster 52', 'stairs'] 
labeled.loc[start + 52] = [file_names[i], 'Cluster 53', 'door'] 
labeled.loc[start + 53] = [file_names[i], 'Cluster 54', 'roof']
labeled.loc[start + 54] = [file_names[i], 'Cluster 55', 'canopy']
labeled.loc[start + 55] = [file_names[i], 'Cluster 56', 'door']
labeled.loc[start + 56] = [file_names[i], 'Cluster 57', 'window']
labeled.loc[start + 57] = [file_names[i], 'Cluster 58', 'wall']
labeled.loc[start + 58] = [file_names[i], 'Cluster 59', 'stairs']
labeled.loc[start + 59] = [file_names[i], 'Cluster 60', 'window']
labeled.loc[start + 60] = [file_names[i], 'Cluster 61', 'Remaining']

In [None]:
labeled.to_csv('../../7. Results/labeled.csv', index=False)