In [1]:
# encoding: utf-8

import numpy as np
import sklearn as sk
import sklearn.linear_model as sklm
import sklearn.metrics as skmt
import matplotlib
matplotlib.use('agg') # so that plt works in command line
import matplotlib.pyplot as plt
import scipy.io as sio
import skimage.io
import h5py
import sys
import os
import gc
import os
import psutil
import re

from optparse import OptionParser

sys.path.append('../Metric/')
sys.path.append('../../Visualization/')
sys.path.append('../../Data_Preprocessing/')
from Metric import *
from Visualization import *
from Data_Extractor import *


parser = OptionParser()
parser.add_option("--dir", dest="dir")
(options, args) = parser.parse_args(["--dir", "./Log/Inception/"])

log_dir = options.dir

  from ._conv import register_converters as _register_converters


In [2]:
record_list = []
cnt = 0
for filename in os.listdir(log_dir):
    cnt += 1
    print(cnt, " : ", filename)

    # open file
    file = open(log_dir+filename)
    log = file.read().split('\n')
    file.close()

    # initialization
    train_record = {'pos_recall':None, 'pos_pre':None, 'pos_F1':None, 'neg_recall':None,
                    'avg_pre':None, 'bal_acc':None, 'AUC':None}
    val_record   = {'pos_recall':None, 'pos_pre':None, 'pos_F1':None, 'neg_recall':None,
                    'avg_pre':None, 'bal_acc':None, 'AUC':None}

    record = {}
    for n in ['name', 'weight', 'epoch', 'pos', 'norm_T', 'bn', 'rand', 'conv']:
        record[n] = None
    record['train'] = train_record
    record['val'] = val_record

    # name
    record['name'] = filename
    
    # settings
    record['conv']   = filename.split('_')[1]
    record['weight'] = filename.find('weight') > 0
    record['bn']     = filename.find('bn') > 0
    record['pos']    = int(filename.split('_p')[-1].split('_')[0])
    record['epoch']  = int(filename.split('_e')[-1].split('_')[0])

    rand = filename.split('_r')[-1].split('_')[0]
    if rand == 'None': record['rand']   = np.float('NaN')
    else: record['rand'] = int(rand[0])
        
    if filename[3:].find('_G') > 0:
        record['norm_T'] = 'std'
#         l = filename[3:].split('_G')[-1].split('_')
#         record['reg_param'] = float(l[0])
#         if record['reg_param'] == 0:
#             record['reg_param'] = float('.'.join([l[0], l[1]]))
    elif filename[3:].find('_m') > 0:
        record['norm_T'] = 'mean'
#         l = filename[3:].split('_m')[-1].split('_')
#         record['reg_param'] = float(l[0])
#         if record['reg_param'] == 0:
#             record['reg_param'] = float('.'.join([l[0], l[1]]))
    else:
        record['norm_T'] = 'None'
        record['reg_param'] = 'None'
#     print(l)

#     if not (record['weight'] and record['pos'] <= 1 and record['norm_T'] == 'std'): continue
    
    # metric on validation set
    try:
        idx = log.index('finish')
    except:
        continue
    line = log[idx-1].split()
    record['val']['bal_acc'] = float(line[5])
    record['val']['AUC']     = float(line[8])
    record['val']['avg_pre'] = float(line[11])
    assert line[0] == 'mean_cross_entropy' and line[3] == 'balanced_acc' and line[6] =='AUC' and line[9] == 'avg_precision'


#     print(log)
    train_idx = log.index("On training set: ")
    val_idx = log.index("On CV set:")
    
    # train set
    train_metric = log[train_idx:val_idx]
    
    line = [s for s in train_metric if 'pos_recall' in s][0].split()
    assert line[0] == 'pos_recall'
    record['train']['pos_recall'] = float(line[-1])

    
    line = [s for s in train_metric if 'pos_precision' in s][0].split()
    assert line[0] == 'pos_precision'
    record['train']['pos_pre'] = float(line[-1])
    
    line = [s for s in train_metric if 'pos_F1' in s][0].split()
    assert line[0] == 'pos_F1'
    record['train']['pos_F1'] = float(line[-1])

    line = [s for s in train_metric if 'neg_recall' in s][0].split()
    assert line[0] == 'neg_recall'
    record['train']['neg_recall'] = float(line[-1])

    line = [s for s in train_metric if 'mean_cross_entropy' in s][0].split()
    record['train']['bal_acc'] = float(line[5])
    record['train']['AUC']     = float(line[8])
    record['train']['avg_pre'] = float(line[11])
    assert line[0] == 'mean_cross_entropy' and line[3] == 'balanced_acc' and line[6] =='AUC' and line[9] == 'avg_precision'

    # validation set
    val_metric = log[val_idx:]
    
    line = [s for s in val_metric if 'pos_recall' in s][0].split()
    assert line[0] == 'pos_recall'
    record['val']['pos_recall'] = float(line[-1])

    
    line = [s for s in val_metric if 'pos_precision' in s][0].split()
    assert line[0] == 'pos_precision'
    record['val']['pos_pre'] = float(line[-1])
    
    line = [s for s in val_metric if 'pos_F1' in s][0].split()
    assert line[0] == 'pos_F1'
    record['val']['pos_F1'] = float(line[-1])

    line = [s for s in val_metric if 'neg_recall' in s][0].split()
    assert line[0] == 'neg_recall'
    record['val']['neg_recall'] = float(line[-1])

    record_list.append(record)
    print(record)
    print()

1  :  Incep_3-32;1-32|3-64;1-64_G_weight_p0_e20_r0
{'name': 'Incep_3-32;1-32|3-64;1-64_G_weight_p0_e20_r0', 'weight': True, 'epoch': 20, 'pos': 0, 'norm_T': 'std', 'bn': False, 'rand': 0, 'conv': '3-32;1-32|3-64;1-64', 'train': {'pos_recall': 0.788409146113, 'pos_pre': 0.0683784431629, 'pos_F1': 0.12584260244, 'neg_recall': 0.716847668179, 'avg_pre': 0.174306009431, 'bal_acc': 0.752628407146, 'AUC': 0.842442302277}, 'val': {'pos_recall': 0.788409146113, 'pos_pre': 0.0683784431629, 'pos_F1': 0.12584260244, 'neg_recall': 0.716847668179, 'avg_pre': 0.174306009431, 'bal_acc': 0.752628407146, 'AUC': 0.842442302277}}

2  :  Incep_0_m_weight_bn_p0_e30_r0
{'name': 'Incep_0_m_weight_bn_p0_e30_r0', 'weight': True, 'epoch': 30, 'pos': 0, 'norm_T': 'mean', 'bn': True, 'rand': 0, 'conv': '0', 'train': {'pos_recall': 0.0449788874609877, 'pos_pre': 0.011282523601197329, 'pos_F1': 0.018039908695972315, 'neg_recall': 0.948946228274801, 'avg_pre': 0.012337530728726452, 'bal_acc': 0.49696255786789434, 'A

In [3]:
sorted_record_list = sorted(record_list, key=lambda r: (r['conv'], r['val']['avg_pre'], r['val']['AUC']),  
                            reverse=True)
sorted_record_list

[{'bn': True,
  'conv': '3-32|3-64',
  'epoch': 20,
  'name': 'Incep_3-32|3-64_m_weight_bn_p0_e20_r0',
  'norm_T': 'mean',
  'pos': 0,
  'rand': 0,
  'train': {'AUC': 0.8328926525146082,
   'avg_pre': 0.1953637374931465,
   'bal_acc': 0.7430383551650305,
   'neg_recall': 0.8785087808100153,
   'pos_F1': 0.19547065004461767,
   'pos_pre': 0.11647126633509856,
   'pos_recall': 0.6075679295200457},
  'val': {'AUC': 0.8328926525146082,
   'avg_pre': 0.1953637374931465,
   'bal_acc': 0.7430383551650305,
   'neg_recall': 0.8785087808100153,
   'pos_F1': 0.19547065004461767,
   'pos_pre': 0.11647126633509856,
   'pos_recall': 0.6075679295200457},
  'weight': True},
 {'bn': True,
  'conv': '3-32|3-64',
  'epoch': 20,
  'name': 'Incep_3-32|3-64_G_weight_bn_p0_e20_r0',
  'norm_T': 'std',
  'pos': 0,
  'rand': 0,
  'train': {'AUC': 0.8292427442653454,
   'avg_pre': 0.18715926106801128,
   'bal_acc': 0.733191632415906,
   'neg_recall': 0.851515250625532,
   'pos_F1': 0.16967049927810196,
   'pos_p

In [4]:
print("%-6s   %-6s   %-7s   %-9s | %-6s   %-6s   %-7s   %-9s \\\\ %s \n" % (
                                   'bl_acc', 'recall',  'avg_pre','train_AUC',  
                                   'bl_acc',   'recall','avg_pre','val_AUC',  
                                   'name'))
for r in sorted_record_list:
    upsample = r['pos']/64.0

    train_bal_acc = (r['train']['pos_recall'] + r['train']['neg_recall'])/2
    val_bal_acc = (r['val']['pos_recall'] + r['train']['neg_recall'])/2
    print("%-6.3f & %-6.3f & %-7.3f & %-9.3f & %-6.3f & %-6.3f & %-7.3f & %-9.3f %% %s \n" % 
          (r['train']['bal_acc'], r['train']['pos_recall'], r['train']['avg_pre'], r['train']['AUC'], 
           r['val']['bal_acc'],   r['val']['pos_recall'],   r['val']['avg_pre'],   r['val']['AUC'],
           r['name'])
         )

    assert (abs(r['val']['pos_recall'] + r['val']['neg_recall'])/2 - r['val']['bal_acc']) < 1e-10
    assert (abs(r['train']['pos_recall'] + r['train']['neg_recall'])/2 - r['train']['bal_acc']) < 1e-10

bl_acc   recall   avg_pre   train_AUC | bl_acc   recall   avg_pre   val_AUC   \\ name 

0.743  & 0.608  & 0.195   & 0.833     & 0.743  & 0.608  & 0.195   & 0.833     % Incep_3-32|3-64_m_weight_bn_p0_e20_r0 

0.733  & 0.615  & 0.187   & 0.829     & 0.733  & 0.615  & 0.187   & 0.829     % Incep_3-32|3-64_G_weight_bn_p0_e20_r0 

0.743  & 0.770  & 0.171   & 0.834     & 0.743  & 0.770  & 0.171   & 0.834     % Incep_3-32|3-64_G_weight_p0_e20_r0 

0.500  & 0.000  & 0.030   & 0.575     & 0.500  & 0.000  & 0.030   & 0.575     % Incep_3-32|3-64_m_weight_p0_e20_r0 

0.730  & 0.569  & 0.189   & 0.840     & 0.730  & 0.569  & 0.189   & 0.840     % Incep_3-32;1-32|3-64;1-64_m_weight_bn_p0_e20_r0 

0.728  & 0.576  & 0.180   & 0.836     & 0.728  & 0.576  & 0.180   & 0.836     % Incep_3-32;1-32|3-64;1-64_G_weight_bn_p0_e20_r0 

0.753  & 0.788  & 0.174   & 0.842     & 0.753  & 0.788  & 0.174   & 0.842     % Incep_3-32;1-32|3-64;1-64_G_weight_p0_e20_r0 

0.500  & 0.000  & 0.030   & 0.567     & 0.500  & 0.

In [None]:
import re
file = open('./norm_T   up_sample   train_acc   recall   val_acc')
table = file.read()
file.close()

In [None]:
re.sub('- .* \\\\', '\\\\', table)