In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as patches
import seaborn as sns
import os
import glob
import csv
from natsort import natsorted
from itertools import chain
import scipy.stats as stats
from scipy.stats import brunnermunzel
from tqdm.notebook import tqdm

In [None]:
# check directory
current_dir = ".."
data_dir = "../raw_csv"
result_dir = "../reshape_csv"

In [None]:
# read csv function
def fast_concat(dfs):
    def fast_flatten(input_list):
        return list(chain.from_iterable(input_list))

    col_names = dfs[0].columns
    df_dict = dict.fromkeys(col_names, [])
    for col in col_names:
        extracted = (d[col] for d in dfs)
        df_dict[col] = fast_flatten(extracted)

    df = pd.DataFrame.from_dict(df_dict)[col_names]
    return df

In [None]:
# colums setting
# colums_name = ["frame", "time", "GPU", "size", "model"]
colums_name = ["frame", "time"]

In [None]:
csvs = glob.glob(data_dir + "/*.csv")
df = pd.DataFrame()
dfs = []
list_file = []
gpulist = []
sizelist = ["640", "1280"]
# modellist = ["yolov5l", "yolov5m", "yolov5x", "yolov5s", "yolov5n" ]
modellist = []

In [None]:
for csv in tqdm(csvs):
    # passing 1 rows
    dfss = pd.read_csv(csv, header=None, skiprows=1, names = colums_name)
    dfss = dfss.iloc[:50000]
    
    # extract info from file name
    fname = os.path.basename(csv)
    parts = fname.split('_')
    dfss["size"] = parts[0]
    dfss["model"] = parts[1]
    dfss["GPU"] = parts[3]
    gpulist.append(str(parts[3]))
    modellist.append(str(parts[1]))
#     print(str(parts[3]))
    dfs += [dfss]
    list_file += [os.path.basename(csv)]
    
#     print(fname)
#     # draw histgram
#     fig, ax = plt.subplots(1,1,dpi = 300)
#     ax = sns.histplot(
#       data = dfss,
#       x = "time",
#       color="b",
#       alpha = 0.3
#     )
#     ax.set_xlabel("Time (s)")
#     # ax.set_ylabel("count")
#     ax.set_title(fname)
    
#     save_path = result_dir + "/" + fname + "_histgram.png"
#     figure = fig.get_figure()
#     figure.savefig(save_path, format="png", dpi=300)
#     print(fname, gpulist)
    
#     # 不要な変数を削除してメモリを解放
#     del dfss
#     plt.close(fig)  # この行でプロットに関連するメモリを解放します
    
    
df = fast_concat(dfs)
csv_save_path = result_dir + "/results_data.csv"
df.to_csv(csv_save_path, index=False)

df_4080 = df[df["GPU"] == "NVIDIA GeForce RTX 4080"]
# print(df["model"])
print(df_4080)
csv_4080_save_path = result_dir + "/results_data_rtx4080.csv"
df_4080.to_csv(csv_4080_save_path, index=False)

gpulist = list(set(gpulist))
modellist = list(set(modellist))
print(gpulist)


d = "\n".join(list_file)
info_path = result_dir + "/info.txt"
with open(info_path, 'w') as f:
    f.write(d)

In [None]:
# statistical analysis
statistical_df = pd.DataFrame()
for gpu in tqdm(gpulist):
    f_df = df[df["GPU"] == gpu]
    for size in sizelist:
        fi_df = f_df[f_df["size"] == size]
        for model in modellist:
            filtered_df = fi_df[fi_df["model"] == model]
            df_stats = filtered_df.describe()
            print(df_stats)
            selected_stats = df_stats[['time']].transpose()
            selected_stats["GPU"] = gpu
            selected_stats["size"] = size
            selected_stats["model"] = model
            statistical_df = pd.concat([statistical_df, selected_stats], axis=0)
#             print(selected_stats)
#             print(df_stats)
            

statistical_df = statistical_df.reset_index(drop=True)
stats_save_path = result_dir + "/statistical_results.csv"
statistical_df.to_csv(stats_save_path, index=False)
print(statistical_df)

In [None]:
# statistical analysis reshape
result_df = pd.DataFrame()
count = 0
# res_df = pd.DataFrame()
for gpu_s in tqdm(gpulist):
    s_f_df = statistical_df[statistical_df["GPU"] == gpu_s]
    for size_s in sizelist:
        res_df = pd.DataFrame()
        s_fi_df = s_f_df[s_f_df["size"] == size_s]
        data = {
            'GPU': gpu_s,
            'size': size_s
                }
#         res_df["size"] = size_s
#         res_df["GPU"] = gpu_s
        res_df = pd.DataFrame(data, index=[0])
#         print(res_df)
        for model in modellist:
            s_filtered_df = s_fi_df[s_fi_df["model"] == model].reset_index(drop=True)
#             print(s_filtered_df)
            mean = s_filtered_df.loc[0, 'mean']
            mean = mean*1000
            mean = round(mean, 1)
            if model == "yolov5s":
                mean_n = mean
            std = s_filtered_df.loc[0, 'std']
            std = std*1000
            std = round(std, 1)
            value = str(mean) + "±" + str(std)
#             sort = mean
#             print(value)
            res_df[model] = str(value)
        res_df["sort"] = mean_n
        res_df["sort_no"] = count
#         print(res_df)

        count += 1
        
        result_df = pd.concat([result_df, res_df], axis=0)
        
# sort
sorted_df = result_df[result_df["size"] == "640"].sort_values(by='sort')
# print(sorted_df)

# sort list
new_list = []
sort_list = sorted_df["sort_no"].tolist()
# print(sort_list)
for num in sort_list:
    new_list.append(num)
    new_list.append(num + 1)
result_df = result_df.set_index("sort_no").reindex(new_list).reset_index()

name_list = ["GPU", "size", "yolov5n", "yolov5s", "yolov5m", "yolov5l", "yolov5x"]


result_df = result_df.reset_index(drop=True)
result_df = result_df.drop(columns=["sort_no"])
result_df = result_df.drop(columns=["sort"])

result_df = result_df.reindex(columns=name_list)
stats_reshape_save_path = result_dir + "/statistical_reshape_results.csv"
result_df.to_csv(stats_reshape_save_path, index=False)
print(result_df)