In [1]:
%%capture
import pandas as pd
import numpy as np
import os
import yaml
from easydict import EasyDict
from glob import glob

import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import normalized_mutual_info_score as NMI

from z_common import config_to_execute
from clustering import run_clustering
from dataset import get_datasets
from simclr import SimCLR
from log_functions import *

from models.baseline_encoder import Encoder
from models.alexnet_simclr import AlexSimCLR
from models.resnet_simclr import ResNetSimCLR
from loss.nt_xent import NTXentLoss
from functions import *

pd.set_option('display.max_rows', 100)


In [2]:
""" CONFIG """

EXEC_REP_NUM = 3
CLUST_REP_NUM = 3
parent = 'Office31'
TITLE = 'dslr_webcam'
# THEME = 'jigsaw_const_phase'  # titleに使う
# THEME = 'jigsaw'  # titleに使う
THEME = 'none'  # titleに使う

augs_list = [
    "out_dim_8",
    "out_dim_16",
    "out_dim_32",
    "out_dim_64",
    "out_dim_128",
    "out_dim_256",
    # 'none',
    # 'jigsaw_const_phase',
    # 'jigsaw',
    # 'mask',
    # 'jigsaw_mask',
    # 'mask_const_phase',
    # 'jigsaw_mask_const_phase',
]

domain_initials = "".join([dname[0] for dname in TITLE.split('_')])
out_log_file = f'./record/{parent}/{domain_initials}_{THEME}_logs.txt'

In [3]:
""" cuda_dir毎に各ディレクトリのprompts.logをまとめ,2次元リストを作成  """
prompts_files_t = [glob(f"./record/{parent}/CUDA{i}/{domain_initials}__random_pseudo__{THEME}__*/prompts.log") for i in range(10)]
prompts_files = []
for cudaf in prompts_files_t:
    if len(cudaf) != 0:
        prompts_files.append(cudaf)

In [4]:
""" 各ファイルのprompts.logを1つのログファイルに書き込み """
output_texts = []
for cuda_porompts_files in prompts_files:
    log_texts = []
    for prompts in cuda_porompts_files:
        with open(prompts, 'r') as f:
            logs = f.read()
        split_texts = logs.split('\n')
        split_texts = [f"{ft}\n" for ft in split_texts]
        title_text = split_texts[:14]

        log_text = [
            line for line in split_texts 
            if 'Epoch:' in line
            or 'nmi:' in line
            or 'nmi_class:' in line
            or 'domain_accuracy:' in line
        ]
        log_text.insert(0, '\n==========================================\n')
        log_texts.append(log_text)

    output_text = sum(log_texts, [])
    output_texts.append(np.concatenate([title_text, output_text]))

# ログファイル書き込み
output_texts = np.concatenate(output_texts)
with open(out_log_file, 'w', newline='\n') as f:
    f.writelines(output_texts)

In [5]:
"""
    dft: ログから得た値(縦持ち)
    df: ログから得た値(横持ち). 各実行,各クラスタリングそれぞれの値を全て保持.
            レコード数: len(aug_ilst) * EXEC_REP_NUM * CLUST_REP_NUM
    dfg: クラスタリングの平均値をまとめた.
            レコード数: len(aug_ilst) * EXEC_REP_NUM
    dfg_avg: その平均値をまとめた
            レコード数: len(aug_ilst)
    csvに書き込んでいく.
"""
dfs = {}
dfgs = {}
dfg_avgs = {}

""" ログファイルからnmi等を記した行のみを取得し, DataFrameを作る. """
with open(out_log_file, 'r') as f:
    text = f.read()
split_text = text.split('\n')
log_text = [line for line in split_text if 'nmi:' in line or 'nmi_class:' in line or 'domain_accuracy:' in line]
split_logs = np.array([line.split(':') for line in log_text])

dft = pd.DataFrame(split_logs, columns=['title', 'result'])

augs_rep = sum([[aug for _ in range(EXEC_REP_NUM * CLUST_REP_NUM)] for aug in augs_list], [])
df = pd.DataFrame(augs_rep, columns=['augs'])
df['exec_number'] = sum([[i//EXEC_REP_NUM for i in range(EXEC_REP_NUM * CLUST_REP_NUM)] for _ in range(len(augs_list))], [])
df['nmi_domain'] = dft[dft['title']=='nmi'].result.values
df['nmi_class'] = dft[dft['title']=='nmi_class'].result.values
df['domain_accuracy'] = dft[dft['title']=='domain_accuracy'].result.values
df[['nmi_domain', 'nmi_class', 'domain_accuracy']] = df[['nmi_domain', 'nmi_class', 'domain_accuracy']].astype('float').round(5)

dfg = df.groupby(['augs', 'exec_number']).mean().reset_index()
# dfg_avg = dfg.drop('exec_number', axis=1).groupby('augs').mean().reset_index()
dfg_mean = dfg.drop('exec_number', axis=1).groupby('augs').mean().reset_index()
dfg_std = dfg.drop('exec_number', axis=1).groupby('augs').std().reset_index()
dfg_mean_std = pd.concat([dfg_mean, dfg_std.drop('augs', axis=1)], axis=1)
dfg_mean_std.columns = ['augs', 'd_nmi_mean', 'c_nmi_mean', 'd_accuracy_mean', 'd_nmi_std', 'c_nmi_std', 'd_accuracy_std']
dfg_mean_std = dfg_mean_std.sort_values('augs', key=lambda s: s.map(augs_list.index))

""" csv書き込み """
df.round(4).to_csv(f'./record/{parent}/{domain_initials}_{THEME}_df.csv', header=True, index=False)
dfg.round(4).to_csv(f'./record/{parent}/{domain_initials}_{THEME}_dfg.csv', header=True, index=False)
dfg_mean_std.round(4).to_csv(f'./record/{parent}/{domain_initials}_{THEME}_dfg_avg.csv', header=True, index=False)