In [None]:
import os
import re
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
dataframes = []
directory = 'hardware_info_bert'
modelo = "bert"
metodo = "json"
pattern = re.compile(f'^top(\d+)_{metodo}_round.*\.csv$')


for filename in os.listdir(directory):
    match = pattern.match(filename)
    if match:
        # print(filename)
        top_number = match.group(1)  
        file_path = os.path.join(directory, filename)

        df = pd.read_csv(file_path)

        df['Top'] = f'top{top_number}'
        dataframes.append(df)

combined_df = pd.concat(dataframes, ignore_index=True)

In [None]:
columns_of_interest = ['CPU', 'Memory', 'GPU', 'GPU_Memory']

for column in columns_of_interest:
    # Criar um boxplot para cada coluna de interesse
    plt.figure(figsize=(12, 5))
    sns.boxplot(x='Top', y=column, data=combined_df, order=["top3","top4","top5","top6","top7","top8","top9","top10"])
    if(column=='CPU' or column=="GPU"):
        plt.title(f'Uso de {column} em % para execução do {modelo} com {metodo}')
    else:
        plt.title(f'Uso de {column} em MB para execução do {modelo} com {metodo}')

    plt.xlabel('Top Number')
    plt.ylabel(column)
    plt.xticks(rotation=45)
    plt.show()

In [None]:
pattern = re.compile(r'top\d+')

diferencas_por_topx = {}

for filename in os.listdir(directory):
    if metodo in filename:
        match = pattern.search(filename)
        if match:
            top_number = match.group(0)

            filepath = os.path.join(directory, filename)
            df = pd.read_csv(filepath)
            df['Timestamp'] = pd.to_datetime(df['Timestamp'])
            time_difference = (df['Timestamp'].iloc[-1] - df['Timestamp'].iloc[0]).total_seconds()

            if top_number in diferencas_por_topx:
                diferencas_por_topx[top_number].append(time_difference)
            else:
                diferencas_por_topx[top_number] = [time_difference]


media_por_topx = {topx: sum(diferencas) / len(diferencas) for topx, diferencas in diferencas_por_topx.items()}


order = ["top3", "top4", "top5", "top6", "top7", "top8", "top9", "top10"]
media_por_topx_sorted = {k: media_por_topx[k] for k in order}

topx = list(media_por_topx_sorted.keys())
media_tempo = list(media_por_topx_sorted.values())

# Criar o gráfico de barras
plt.figure(figsize=(10, 6))
plt.bar(topx, media_tempo, color='skyblue')

plt.xlabel('TopX')
plt.ylabel('Média de Tempo (segundos)')
plt.title(f'Média de Tempo por TopX com {modelo} em {metodo}')

# Exibir o gráfico
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
