In [3]:
# encoding: utf-8

import numpy as np
import sklearn as sk
import sklearn.linear_model as sklm
import sklearn.metrics as skmt
import matplotlib
import matplotlib.pyplot as plt
import scipy.io as sio
import skimage.io
import h5py
import sys
import os
import gc
import os
import psutil
import re

from optparse import OptionParser

sys.path.append('../Metric/')
sys.path.append('../../Visualization/')
sys.path.append('../../Data_Preprocessing/')
from Metric import *
from Visualization import *
from Data_Extractor import *


parser = OptionParser()
parser.add_option("--dir", dest="dir")
(options, args) = parser.parse_args(["--dir", "./Result/"])

log_dir = options.dir

In [34]:
record_list = []
cnt = 0

road_type = np.array(["motorway", "trunk", "primary", "secondary", "tertiary", "unclassified", "track", # 0-6
                      "residential", "service", "road", "living_street", # 7-10
                      "all_roads"]) # 11 

for filename in os.listdir(log_dir):
    if not filename.endswith('_rst'): continue
    cnt += 1
    print(cnt, " : ", filename)

    # open file
    file = open(log_dir+filename)
    log = file.read().split('\n')
    file.close()

    # initialization
    train_record = {'avg_pre':None, 'bal_acc':None, 'AUC':None}
    val_record   = {'avg_pre':None, 'bal_acc':None, 'AUC':None}

    record = {}
    for n in ['name', 'rd_type']:
        record[n] = None
    record['train'] = train_record
    record['val'] = val_record

    # name
    record['name'] = filename
    
    # data info
    line = [s for s in log if 'class balance:' in s][0].split()
    record['pos_w'] = float(line[3])
    record['neg_w'] = float(line[4])
    assert line[2] == 'pos='
    
    # settings
    record['rd_idx'] = [int(idx) for idx in filename.split('_')[0].split('-')]
    record['rd_type'] = [road_type[idx] for idx in record['rd_idx']]

    train_idx = log.index("On training set")
    val_idx = log.index("On test set")
    
    # train set
    train_metric = log[train_idx:val_idx]
    
    line = [s for s in train_metric if 'balanced_accuracy' in s][0].split()
    assert line[0] == 'balanced_accuracy'
    record['train']['bal_acc'] = float(line[-1])

    line = [s for s in train_metric if 'AUC' in s][0].split()
    assert line[0] == 'AUC='
    record['train']['AUC'] = float(line[1])
    assert line[2] == 'avg_precision='
    record['train']['avg_pre'] = float(line[-1])
    
    # validation set
    val_metric = log[val_idx:]
    
    line = [s for s in val_metric if 'balanced_accuracy' in s][0].split()
    assert line[0] == 'balanced_accuracy'
    record['val']['bal_acc'] = float(line[-1])

    line = [s for s in val_metric if 'AUC' in s][0].split()
    assert line[0] == 'AUC='
    record['val']['AUC'] = float(line[1])
    assert line[2] == 'avg_precision='
    record['val']['avg_pre'] = float(line[-1])

    record_list.append(record)
    print(record)
    print()

1  :  7-8-9-10_rst
{'name': '7-8-9-10_rst', 'rd_type': ['residential', 'service', 'road', 'living_street'], 'train': {'avg_pre': 0.5613416780574223, 'bal_acc': 0.6385928879426754, 'AUC': 0.9165213163184444}, 'val': {'avg_pre': 0.4131386696129125, 'bal_acc': 0.610297710281164, 'AUC': 0.856047943454419}, 'pos_w': 0.039278334900389525, 'neg_w': 0.9607216650996104, 'rd_idx': [7, 8, 9, 10]}

2  :  0-1-2-3-4_rst
{'name': '0-1-2-3-4_rst', 'rd_type': ['motorway', 'trunk', 'primary', 'secondary', 'tertiary'], 'train': {'avg_pre': 0.42847042400958846, 'bal_acc': 0.5473421406042354, 'AUC': 0.816624329472174}, 'val': {'avg_pre': 0.3379000495517638, 'bal_acc': 0.5378406510848942, 'AUC': 0.7681901358741525}, 'pos_w': 0.11341390574228451, 'neg_w': 0.8865860942577155, 'rd_idx': [0, 1, 2, 3, 4]}

3  :  0-1-2-3-4-5_rst
{'name': '0-1-2-3-4-5_rst', 'rd_type': ['motorway', 'trunk', 'primary', 'secondary', 'tertiary', 'unclassified'], 'train': {'avg_pre': 0.503084284773963, 'bal_acc': 0.5855526164832994, 'A

In [35]:
sorted_record_list = sorted(record_list, key=lambda r: r['rd_idx'])

In [41]:
print("%-80s & %-9s & %-6s %-7s %-9s | %-6s %-7s %-9s \\\\ \n" % ('road types', 'pos_ratio',
                                   'bl_acc', 'avg_pre', 'train_AUC',  
                                   'bl_acc', 'avg_pre', 'val_AUC'))
for r, cnt in zip(sorted_record_list, range(len(record_list))):
    print("%d.& %-80s & %-9.2f & %-7.3f & %-9.3f \\\\ \n" % 
          (cnt+1, ', '.join(r['rd_type']), r['pos_w'],
#            r['train']['avg_pre'], r['train']['AUC'], 
           r['val']['avg_pre'],   r['val']['AUC'])
         )

road types                                                                       & pos_ratio & bl_acc avg_pre train_AUC | bl_acc avg_pre val_AUC   \\ 

1.& motorway, trunk, primary                                                         & 0.03      & 0.136   & 0.801     \\ 

2.& motorway, trunk, primary, secondary                                              & 0.07      & 0.242   & 0.796     \\ 

3.& motorway, trunk, primary, secondary, tertiary                                    & 0.11      & 0.338   & 0.768     \\ 

4.& motorway, trunk, primary, secondary, tertiary, unclassified                      & 0.22      & 0.433   & 0.722     \\ 

5.& motorway, trunk, primary, secondary, tertiary, unclassified, track               & 0.34      & 0.518   & 0.671     \\ 

6.& motorway, trunk, primary, secondary, tertiary, unclassified, track, residential, service, road, living_street & 0.36      & 0.548   & 0.676     \\ 

7.& motorway, trunk, primary, secondary, residential, service, road, living

In [None]:
import re
file = open('./norm_T   up_sample   train_acc   recall   val_acc')
table = file.read()
file.close()

In [None]:
re.sub('- .* \\\\', '\\\\', table)