In [None]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
labeled = pd.read_csv('../../7. Results/labeled.csv')
labeled.head()

In [None]:
file_names = labeled.file_name.unique()
directory = './' 
attributes_per_file = {}
file_names = []
for filename in os.listdir(directory):
    if filename.endswith('.csv') and filename != 'labeled.csv':
        file_names.append(filename)
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        attributes_per_file[filename] = df

In [None]:
all_accuracy_df = dict()
all_percentage_df = dict()

for i, file_name in enumerate(list(file_names)):
    accuracy_df = pd.DataFrame(0, index=[0, 1, 2], columns=['label', 'canopy', 'door', 'roof', 'stairs', 'wall', 'window'])
    # Filter the labeled dataframe for the current file
    filtered_df = labeled[labeled['file_name'] == file_name]
    
    # Get the corresponding attributes dataframe for the file
    df = attributes_per_file[file_name]
    
    # Initialize the row in accuracy_df with the file name and NaN for other columns
    accuracy_df.loc[0, 'label'] = 'classified correct in building part'
    accuracy_df.loc[1, 'label'] = 'all classified as building part'
    accuracy_df.loc[2, 'label'] = 'all building part points'
    
    for _, row in df.iterrows():
        current_cluster = row['Cluster']
        current_building_part = row['Building Part']
        if current_building_part != 'Remaining' and current_building_part != 'TOTAL':
            accuracy_df.loc[2, current_building_part] += row['Points']
        
    for _, row in filtered_df.iterrows():
        current_cluster = row['cluster-number']
        current_building_part = row['building_part']
        if current_building_part != 'Remaining' and current_building_part != 'TOTAL':
            classified_correct = df[(df['Cluster'] == current_cluster) & (df['Building Part'] == current_building_part)]['Points'].values 
            accuracy_df.loc[0, current_building_part] += classified_correct
            points_in_cluster = df[(df['Cluster'] == current_cluster) & (df['Building Part'] == 'TOTAL')]['Points'].values 
            accuracy_df.loc[1, current_building_part] += points_in_cluster
            
    percentage_acc_df = pd.DataFrame(0, index=[0, 1, 2], columns=['label', 'canopy', 'door', 'roof', 'stairs', 'wall', 'window'])
    percentage_acc_df.loc[0, 'label'] = 'points that must be classified as building part'
    percentage_acc_df.loc[1, 'label'] = 'classified correct from all points that are classified in building part (%)'
    percentage_acc_df.loc[2, 'label'] = 'classified correct from all building part points (%)'
    
    for col in accuracy_df.columns:
        if col != 'label':
            percentage_acc_df.loc[0, col] = accuracy_df.loc[0, col]
            percentage_acc_df.loc[1, col] = round(accuracy_df.loc[0, col] / accuracy_df.loc[1, col] * 100, 1)
            percentage_acc_df.loc[2, col] = round(accuracy_df.loc[0, col] / accuracy_df.loc[2, col] * 100, 1)
    all_accuracy_df[file_name] = accuracy_df
    all_percentage_df[file_name] = percentage_acc_df

In [None]:
models = ['GaussianSplatting_GuassianMixture_n14',
 'GaussianSplatting_GuassianMixture_n7',
 'GaussianSplatting_kMeans_n14',
 'GaussianSplatting_kMeans_n7',
 'GaussianSplatting_SAM',
 'GeoslamPointCloud_GuassianMixture_n14',
 'GeoslamPointCloud_GuassianMixture_n7',
 'GeoslamPointCloud_kMeans_n14',
 'GeoslamPointCloud_kMeans_n7',
 'GeoslamPointCloud_SAM']

In [None]:
df_recall = pd.DataFrame(0, index=[0 for i in range(len(models))], columns=['model', 'canopy', 'door', 'roof', 'stairs', 'wall', 'window'])
df_precision = pd.DataFrame(0, index=[0 for i in range(len(models))], columns=['model', 'canopy', 'door', 'roof', 'stairs', 'wall', 'window'])
for i, file_name in enumerate(file_names):
    value_df = melted_df[melted_df['file_name'] == file_name]
    canopy_recall = list(value_df[value_df['category'] == 'canopy']['value'])[1]
    door_recall = list(value_df[value_df['category'] == 'door']['value'])[1]
    roof_recall = list(value_df[value_df['category'] == 'roof']['value'])[1]
    stairs_recall = list(value_df[value_df['category'] == 'stairs']['value'])[1]
    wall_recall = list(value_df[value_df['category'] == 'wall']['value'])[1]
    window_recall = list(value_df[value_df['category'] == 'window']['value'])[1]
    df_recall.iloc[i] = [models[i], canopy_recall, door_recall, roof_recall, stairs_recall, wall_recall, window_recall]
    
    canopy_precision = list(value_df[value_df['category'] == 'canopy']['value'])[0]
    door_precision = list(value_df[value_df['category'] == 'door']['value'])[0]
    roof_precision = list(value_df[value_df['category'] == 'roof']['value'])[0]
    stairs_precision = list(value_df[value_df['category'] == 'stairs']['value'])[0]
    wall_precision = list(value_df[value_df['category'] == 'wall']['value'])[0]
    window_precision = list(value_df[value_df['category'] == 'window']['value'])[0]
    df_precision.iloc[i] = [models[i], canopy_precision, door_precision, roof_precision, stairs_precision, wall_precision, window_precision]
    
df_precision= df_precision.set_index('model')
df_precision = df_precision.fillna(0)
df_recall = df_recall.set_index('model')
df_recall = df_recall.fillna(0)

In [None]:
plt.figure(figsize=(10, 6))
heatmap = sns.heatmap(df_precision, annot=True, cmap='RdYlGn', vmin=0, vmax=100, linewidths=.5, linecolor='gray', cbar=True)
plt.title('Label Quality Precision results (%)')
colorbar = heatmap.collections[0].colorbar
colorbar.set_ticks([0, 25, 50, 75, 100]) 
colorbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
heatmap = sns.heatmap(df_recall, annot=True, cmap='RdYlGn', vmin=0, vmax=100, linewidths=.5, linecolor='gray', cbar=True)
plt.title('Label Quality Recall results (%)')
colorbar = heatmap.collections[0].colorbar
colorbar.set_ticks([0, 25, 50, 75, 100]) 
colorbar.set_ticklabels(['0%', '25%', '50%', '75%', '100%'])
plt.show()

In [None]:
overall_df = pd.DataFrame(0, index=[0 for i in range(len(file_names))], columns=['file_name', 'precision_mean', 'precision_std', 'recall_mean', 'recall_std'])
for i, file_name in enumerate(df_precision.index):
    mean_prec, std_prec = df_precision.loc[file_name].mean(), df_precision.loc[file_name].std()
    mean_recall, std_recall = df_recall.loc[file_name].mean(), df_recall.loc[file_name].std()
    overall_df.iloc[i] = [file_name, mean_prec, std_prec, mean_recall, std_recall]

In [None]:
data = overall_df.copy()

abbreviations = {
    "GaussianSplatting_GuassianMixture_n14": "GMix_n14",
    "GaussianSplatting_GuassianMixture_n7": "GMix_n7",
    "GaussianSplatting_kMeans_n14": "kMeans_n14",
    "GaussianSplatting_kMeans_n7": "kMeans_n7",
    "GaussianSplatting_SAM": "SAM",
    "GeoslamPointCloud_GuassianMixture_n14": "GMix_n14",
    "GeoslamPointCloud_GuassianMixture_n7": "GMix_n7",
    "GeoslamPointCloud_kMeans_n14": "kMeans_n14",
    "GeoslamPointCloud_kMeans_n7": "kMeans_n7",
    "GeoslamPointCloud_SAM": "SAM"
}
data['file_name'] = data['file_name'].replace(abbreviations)

# Plot with adjusted figure size
fig, ax = plt.subplots(figsize=(10, 5))  # Reduced width

# X locations for the groups
x = np.arange(len(data['file_name']))

# Width of the bars
width = 0.35

# Plot Precision
ax.bar(x - width/2, data['precision_mean'], yerr=data['precision_std'], width=width, label='Precision',
       color='blue', capsize=5)

# Plot Recall
ax.bar(x + width/2, data['recall_mean'], yerr=data['recall_std'], width=width, label='Recall',
       color='orange', capsize=5)

# Customize the chart
ax.set_ylabel('Mean Precision and Recall')
ax.set_title('Average Precision and Recall over all building parts with Standard Deviations')
ax.set_xticks(x)
ax.set_xticklabels(data['file_name'], rotation=45, ha='right', fontsize=12)  # Adjusted alignment and fontsize
ax.legend()
ax.set_facecolor("none")

plt.tight_layout()  # Adjust layout to prevent clipping
plt.savefig("plot_with_transparent_background.png", transparent=True, dpi=300)
plt.show()