# Results Analysis

In [109]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import commonPaths
from collections import OrderedDict
import os

In [110]:
modelNames = ["yolov8n", "yolov8s", "yolov8m", "yolov8l", "yolov8x"]

In [111]:
def providePathsForResultsAnalysis(forTrain):
    '''
        ### providePathsForResultsAnalysis
        provides paths for the results analysis.
        There are different paths whether one wants to preprocess train or val set.
        :param forTrain: defines if paths should be returned for train set. (They will be for val set otherwise)
        :return: dictionary with paths to directories applicable for test or val set
    '''
    paths = {}
    if (forTrain):
        paths["SPEED_RESULTS"] = commonPaths.SPEED_RESULTS_TRAIN_FILE
        paths["MAP_RESULTS_DIR"] = commonPaths.MAP_RESULTS_TRAIN_DIR
    else:
        paths["SPEED_RESULTS"] = commonPaths.SPEED_RESULTS_VAL_FILE
        paths["MAP_RESULTS_DIR"] = commonPaths.MAP_RESULTS_VAL_DIR

    paths["PARAMETERS_DATA"] = commonPaths.PARAMETERS_DATA
    return paths

In [112]:
def readData(modelNames, isTrain=False):
    '''
      ### readData
      reads data about parameters, speed results and mAP from data directory.
      
      :param modelNames: list with the names of models matching those in file names (without .pt)
      :param isTrain: defines if data should be returned for train set. (The data will be for val set otherwise)
      :return parametersData: DataFrame with YOLO model name and number of parameters in Millions
      :return speedResults: DataFrame with YOLO model name and time needed for making predictions in seconds.
      :return mAPResults: Dictionary where: Key=YOLO model name, Value=DataFrame with 2 columns: Category and Average Precision
    '''
    paths = providePathsForResultsAnalysis(isTrain)
    parametersData = pd.read_csv(paths["PARAMETERS_DATA"], index_col=0)
    parametersData.columns = ["Parameters (M)"]
    speedResults = pd.read_csv(paths["SPEED_RESULTS"], index_col=0)
    speedResults.columns = ["Time (sec)"]
    mAPResults = OrderedDict().fromkeys(modelNames)
    for file in os.listdir(paths["MAP_RESULTS_DIR"]):
       name = file.split("_")[-1].split(".")[0]
       mAP = pd.read_csv(paths["MAP_RESULTS_DIR"]+file, header=None)
       mAP.columns = ["Category", "Average Precision"]
       mAPResults[name] = mAP
    return parametersData, speedResults, mAPResults

In [113]:
parametersDataVal, speedResultsVal, mAPResultsVal = readData(modelNames)

In [114]:
parametersDataTrain, speedResultsTrain, mAPResultsTrain = readData(modelNames, isTrain=True)

In [115]:
def calculateMAP(mAPResults):
    '''
        ### calculateMAP
        calculates mAP over all classes for each model and creates a dictionary.

        :param mAPResults: Dictionary where: Key=YOLO model name, Value=DataFrame with 2 columns: Category and Average Precision
        :return: Dictionary where: Key=YOLO model name, Value=mAP averaged over all classes
    '''
    mAPs = OrderedDict()
    for name, result in mAPResults.items():
        mAPs[name] = result.iloc[:,-1].sum() / result.shape[0]
    return mAPs

In [116]:
mAPsVal = calculateMAP(mAPResultsVal)
mAPsTrain = calculateMAP(mAPResultsTrain)

# TODO
* bar plot - parameters, speed, mAP
* line plot - mAP x parameters, mAP x speed