In [None]:
%%capture
import pandas as pd
import numpy as np
import os
import yaml
from glob import glob

from sklearn.metrics import normalized_mutual_info_score as NMI

In [None]:
EXEC_N = 3
CLUST_N = 3
prompt_files = glob('*/*/*/prompts.log')
score_columns = ["nmi", "nmi_class", "accus"]
dset_names = ["amazon_dslr", "dslr_webcam", "webcaam_amazon"]

In [None]:
df_list = []
dfg_list = []
for prompts in prompt_files:
    with open(prompts, 'r') as f:
        logs = f.read()
    split_texts = logs.split('\n')
    # スコアの行だけ抽出
    nmis = [split_text.split(':')[-1] for split_text in split_texts if "nmi:" in split_text]
    nmi_classes = [split_text.split(':')[-1] for split_text in split_texts if "nmi_class:" in split_text]
    accs = [split_text.split(':')[-1] for split_text in split_texts if "domain_accuracy:" in split_text]
    exec_nums = [f"exec{i//CLUST_N}" for i in range(len(nmis))]
    df = pd.DataFrame(np.array([exec_nums, nmis, nmi_classes, accs]).T, columns=["exec_num", *score_columns])
    df[score_columns] = df[score_columns].astype(float)

    key_columns = ['exec_num']  # meanとstdを結合する際にキーとなるカラム.
    dfg_mean = df.groupby(key_columns).mean().reset_index()
    dfg_std = df.groupby(key_columns).std().reset_index().drop(key_columns, axis=1)
    dfg_std.columns = [f"{c}_std" for c in dfg_std.columns]
    dfg = pd.concat([dfg_mean, dfg_std], axis=1)

    dfg.insert(0, 'theme', prompts.split('\\')[0])
    dfg.insert(1, 'dsets', prompts.split('\\')[-2][:2])
    dfg.insert(2, 'out_dim', prompts.split('\\')[1])  # cuda_dir を後でout_dimに変換
    dfg_list.append(dfg)
df_exec_g = pd.concat(dfg_list)
df_exec_g.reset_index(drop=True, inplace=True)

dsets_conv_dict = {"ad": "amazon_dslr", "dw": "dslr_webcam", "wa": "webcam_amazon"}
cuda_to_dim = {"CUDA0": "4", "CUDA1": "8", "CUDA2": "16", "CUDA3": "32", "CUDA4": "64", "CUDA5": "128"}
df_exec_g['dsets'] = df_exec_g['dsets'].apply(lambda x: dsets_conv_dict[x])
df_exec_g['out_dim'] = df_exec_g['out_dim'].apply(lambda x: cuda_to_dim[x])
df_exec_g.head(3)

In [None]:
key_columns = ['theme', 'dsets', 'out_dim']  # meanとstdを結合する際にキーとなるカラム.
df_dim_g_mean = df_exec_g[[*key_columns, *score_columns]].groupby(key_columns).mean().reset_index()
df_dim_g_std = df_exec_g[[*key_columns, *score_columns]].groupby(key_columns).std().reset_index().drop(key_columns, axis=1)
df_dim_g_std.columns = [f"{c}_std" for c in df_dim_g_std.columns]
df_dim_g = pd.concat([df_dim_g_mean, df_dim_g_std], axis=1)

df_list = []
for task in np.unique(df_dim_g.theme):
    df_list.append(df_dim_g[df_dim_g.theme == task].sort_values('out_dim', key=lambda t: t.map(list(cuda_to_dim.values()).index)))
df_dim_g = pd.concat(df_list).reset_index(drop=True)
df_dim_g.head(3)

In [None]:
df_exec_g.round(4).to_csv('./exec_g.csv', index=False)
df_dim_g.round(4).to_csv('./dim_g.csv', index=False)