In [4]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import sys 

In [5]:
# https://github.com/dice-group/gerbil/wiki/Precision,-Recall-and-F1-measure
# In some rare cases, the calculation of Precision or Recall can cause a division by 0. 
# Regarding the precision, this can happen if there are no results inside the answer of an annotator and, 
# thus, the true as well as the false positives are 0. For these special cases, we have defined that 
# if the true positives, false positives and false negatives are all 0, the precision, recall and 
# F1-measure are 1. This might occur in cases in which the gold standard contains a document without 
# any annotations and the annotator (correctly) returns no annotations. If true positives are 0 and 
# one of the two other counters is larger than 0, the precision, recall and F1-measure are 0.

def calculate_precision(TP,FP):
    """
    Calculates precision given true positives and false positives.
    
    Args:
    TP (int): number of true positives
    FP (int): number of false positives
    
    Returns:
    float: precision score
    """
    if(TP+FP) > 0:
        return (TP/(TP+FP))
    else: 
        return 0
    
def calculate_recall(TP,FN):
    """
    Calculates recall given true positives and false negatives.
    
    Args:
    TP (int): number of true positives
    FN (int): number of false negatives
    
    Returns:
    float: recall score
    """
    if(TP+FN) > 0:
        return TP/(TP+FN)
    else: 
        return 0
    
def calculate_F1(precision, recall):
    """
    Calculates F1 score given precision and recall.
    
    Args:
    precision (float): precision score
    recall (float): recall score
    
    Returns:
    float: F1 score rounded
    """
    if precision == 0 and recall == 0:
        return 0
    else:
        return (2*((precision * recall)/(precision + recall)))
    

def get_TP_FP_FN(gt_boxes, pred_boxes, iou_threshold):
    """
    Computes true positives (TP), false positives (FP), true negatives (TN), and false negatives (FN) given
    the ground truth bounding boxes and the predicted bounding boxes for a single class.

    Parameters:
    gt_boxes (list): List of ground truth bounding boxes in the format [x_min, y_min, x_max, y_max].
    pred_boxes (list): List of predicted bounding boxes in the same format as gt_boxes.
    iou_threshold (float): Intersection over Union (IoU) threshold used to determine true positives and false positives.

    Returns:
    Tuple containing the number of true positives, false positives, true negatives, and false negatives.
    """
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    gt_used = set()
    pred_used = set()

    for pred_idx, pred_box in enumerate(pred_boxes):
        best_iou = 0
        best_gt_idx = None

        for gt_idx, gt_box in enumerate(gt_boxes):
            iou = calculate_iou(pred_box, gt_box)

            if iou > best_iou:
                best_iou = iou
                best_gt_idx = gt_idx

        if best_iou >= iou_threshold:
            if best_gt_idx not in gt_used:
                TP += 1
                gt_used.add(best_gt_idx)
                pred_used.add(pred_idx)
            else:
                FP += 1
        else:
            FP += 1

    # Calculate number of false negatives
    
    FN = len(gt_boxes) - len(gt_used)

    return TP, FP,FN

# https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/
def calculate_iou(box1, box2):
    """
    Calculates the intersection over union (IoU) between two bounding boxes.

    Parameters:
    box1 (list): List of coordinates [x_min, y_min, x_max, y_max] defining the first bounding box.
    box2 (list): List of coordinates [x_min, y_min, x_max, y_max] defining the second bounding box.

    Returns:
    The IoU between the two bounding boxes.
    """
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection = max(0, x2 - x1) * max(0, y2 - y1)
    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union = area_box1 + area_box2 - intersection

    iou = intersection / union if union != 0 else 0

    return iou
        
def calculate_Precision_Recall_F1(TP, FP, FN):
    precision = calculate_precision(TP,FP)
    recall = calculate_recall(TP,FN)
    F1 = calculate_F1(precision,recall)
    return precision, recall, F1

def process_row(row):
    # Remove unnecessary characters and split the string by space
    rowRaw = row.replace('[','').replace(']','').replace(',','').replace('(','').replace("'",'').replace(')','').split(' ')
    boxes = []
    # Iterate through the raw row list by 4, since each bounding box has 4 coordinates
    for i in range(0, len(rowRaw), 4):
        try:
            # Convert the string coordinates to integer values
            xtl = int(float(rowRaw[i]))
            ytl = int(float(rowRaw[i+1]))
            xbr = int(float(rowRaw[i+2]))
            ybr = int(float(rowRaw[i+3]))
            # Append the box coordinates to the list of boxes
            boxes.append([xtl, ytl, xbr, ybr])
        except ValueError:
            # If a coordinate cannot be converted to float, skip the box
            pass  
    return boxes

def get_only_validations(path):
    dir_list = os.listdir(path)
    onlyValidations  = []
    for file in dir_list:
        onlyValidations.append(file[:-4] + ".jpg")
    return onlyValidations


# https://stackoverflow.com/questions/70097754/confusion-matrix-with-different-colors
def plot_confusion_matrix(resultsDF, modelName,TN):
    fig, ax = plt.subplots(figsize=(12, 8))
    sns.heatmap(np.eye(2), annot=[[resultsDF['TP'].sum(),resultsDF['FN'].sum()],[resultsDF['FP'].sum(),TN]], fmt='g', annot_kws={'size': 50},
                cmap=sns.color_palette(['tomato', 'palegreen'], as_cmap=True), cbar=False,
                yticklabels=['True', 'False'], xticklabels=['True', 'False'], ax=ax)
    ax.xaxis.tick_top()
    ax.xaxis.set_label_position('top')
    ax.tick_params(labelsize=20, length=0)

    ax.set_title(modelName, size=24, pad=20)
    ax.set_xlabel('Predicted Values', size=20)
    ax.set_ylabel('Actual Values', size=20)

    additional_texts = ['(True Positive)', '(False Negative)', '(False Positive)', '(True Negative)']
    for text_elt, additional_text in zip(ax.texts, additional_texts):
        ax.text(*text_elt.get_position(), '\n' + additional_text, color=text_elt.get_color(),
                ha='center', va='top', size=26)
    plt.tight_layout()
    plt.savefig('./ConfusionMatrixes/' + modelName + '.png')
    plt.close()
    

In [3]:
if not os.path.exists("./TotalResults"):
    os.makedirs("./TotalResults")
if not os.path.exists("./ConfusionMatrixes"):
    os.makedirs("./ConfusionMatrixes")
# -------------------------------Preparations done-------------------------------------------- 

In [7]:
IoUthreshold = 0.5
groundTruthCSV = pd.read_csv('processedCVAT.csv')
groundTruths = pd.DataFrame(groundTruthCSV, columns=['FileName','Boxes'])
overallResults = []


#cisto filter aby som ziskal hodnoty len pre fotky z validacnej mnoziny
onlyValidations = get_only_validations("C:/Users/David/Skola-PC/OnlyVal/files")

overallResults = []
validationResults = []

#prechadzam postupne vsetky csv s vysledkami
for path in os.listdir('./ProcessedAnnotations'):
    modelName = path[:-4]
    predictedCSV = pd.read_csv(f'./ProcessedAnnotations/{path}')
    predicteds = pd.DataFrame(predictedCSV, columns=['FileName','Boxes'])
    results = []
    TN = 0
    for index, predicted in predicteds.iterrows():

        fileName = predicted['FileName']
        groundTruthBoxesRaw = groundTruths.loc[groundTruths['FileName'] == predicted['FileName']]['Boxes']
        predictedBoxesRaw = predicted['Boxes']

        groundTruthBoxes = []
        predictedBoxes = []

        for index1, row in groundTruthBoxesRaw.items():
            groundTruthBoxes = process_row(row)
            predictedBoxes = process_row(predictedBoxesRaw)

        TP, FP, FN = get_TP_FP_FN (groundTruthBoxes, predictedBoxes, IoUthreshold)
        precision, recall, F1 = calculate_Precision_Recall_F1(TP, FP, FN)
        results.append((fileName, precision, recall, F1, TP, FP, FN))

#Hladam TN pre celu mnozinu a kedze prechadzam len tie co naslo, tak to treba takto
    for index, groundTruth in groundTruths.iterrows():
        if(len(groundTruth['Boxes']))==2:
            shouldBeEmpty = predicteds.loc[predicteds['FileName'] == groundTruth['FileName']]['Boxes']
            if(shouldBeEmpty.empty):
                TN = TN + 1
        
    resultsDF = pd.DataFrame(data = results, columns = ('FileName', 'Precision', 'Recall','F1','TP','FP','FN'))
    print(round(resultsDF['Precision'].mean(),3), round(resultsDF['Recall'].mean(),3))
        
    plot_confusion_matrix(resultsDF, modelName,TN)

    validationOnly = resultsDF[resultsDF['FileName'].isin(onlyValidations)]
    
#Vypocitat precision, recall a F1 pre celu mnozinu
    validationPrecision, validationRecall, validationF1 = calculate_Precision_Recall_F1(validationOnly['TP'].sum(), validationOnly['FP'].sum(), validationOnly['FN'].sum())
    validationResults.append((modelName, round(validationPrecision,3), round(validationRecall,3), round(validationF1,3), validationOnly['TP'].sum(),validationOnly['FP'].sum(),validationOnly['FN'].sum()))
    
    overallPrecision, overallRecall, overallF1 = calculate_Precision_Recall_F1(resultsDF['TP'].sum(), resultsDF['FP'].sum(), resultsDF['FN'].sum())
    overallResults.append((modelName, round(overallPrecision,3), round(overallRecall,3), round(overallF1,3), TN, resultsDF['TP'].sum(),resultsDF['FP'].sum(),resultsDF['FN'].sum()))
    
overallDF = pd.DataFrame(data = overallResults, columns = ('ModelName', 'Precision', 'Recall','F1','TN','TP','FP','FN'))
overallDF.to_csv('./TotalResults/TotalResultsConfidenceWithAdjustedAnd.csv')

validationDF = pd.DataFrame(data = validationResults, columns = ('ModelName', 'Precision', 'Recall','F1','TP','FP','FN'))
validationDF.to_csv('./TotalResults/ValidationResultsWithCustomWithAdjustedAnd.csv')

# # -------------------------------Vsetko ulozene-------------------------------------------- 

0.384 0.47
0.421 0.454
0.441 0.442
0.453 0.43
0.461 0.419
0.467 0.408
0.469 0.397
0.471 0.385
0.471 0.373
0.47 0.359
0.461 0.339
0.449 0.32
0.436 0.299
0.417 0.276
0.386 0.245
0.336 0.204
0.252 0.147
0.127 0.077
0.01 0.009
0.963 0.608
0.94 0.666
0.905 0.617
0.919 0.632
0.916 0.587
0.929 0.57
0.944 0.591
0.937 0.561
0.95 0.586
0.922 0.736
0.961 0.727
0.933 0.705
0.939 0.706
0.925 0.628
0.912 0.651
0.922 0.659
0.911 0.672
0.947 0.684
0.932 0.705
0.936 0.721
0.937 0.724
0.772 0.712
0.769 0.636
0.795 0.66
0.778 0.638
0.783 0.693
0.796 0.665
0.757 0.637
0.777 0.687
0.739 0.622
0.747 0.621
0.721 0.667
0.642 0.722
0.608 0.698
0.74 0.74
0.602 0.69
0.608 0.702
0.615 0.704
0.615 0.705
0.743 0.731
0.739 0.743
0.744 0.745
0.933 0.705
0.939 0.706
0.928 0.72
0.091 0.037
0.042 0.028
0.722 0.692
0.803 0.7
0.936 0.722
0.934 0.718
0.842 0.779
0.937 0.716
0.937 0.577
0.104 0.531
0.09 0.52
0.104 0.539
0.873 0.541
0.588 0.475
0.829 0.531
0.878 0.549
0.911 0.608
0.908 0.607
0.885 0.576
0.907 0.608
0.897 0.5

In [3]:
# def calculate_precision(TP,FP)
# def calculate_recall(TP,FN):
print(calculate_F1(0.936, 0.721))

0.8145515992757997
