In [1]:
import pandas as pd
import numpy as np
import os
import re
from collections import defaultdict

In [2]:
def agg_by_threshold(thresholds):
    files = sorted(os.listdir('logs/'))
    
    by_threshold = []
    for threshold in thresholds:
        by_threshold += [[file for file in files if threshold in file.split('_')[-1]]]
    return by_threshold

In [3]:
def create_key(original_line):
    result = []
    of_interest = ['block_depth', 
                   'const_factor', 
                   'learning_rate', 
                   'linear_dim', 
                   'percentile']
    for item in of_interest:
        pattern = re.compile(item+'=[\d.]*')
        result += [re.search(pattern, original_line).group(0)]
    return ','.join(result)

In [4]:
def process_single_log_file(file_path):
    split_colon = lambda x: x.split(':')

    test_data = (pd.read_csv(file_path)
                   .iloc[:-1, 2:]
                   .iloc[1::2]
                   .iloc[:, 1:8]
                   .applymap(split_colon))
    
    if test_data.empty:
        return None
    
    names = (test_data.applymap(lambda x: x[0])
                      .iloc[0]
                      .values
                      .tolist())
    names = list(map(lambda x: x.replace(' ', ''), names))
    dataframe = (test_data.applymap(lambda x: float(x[-1]))
                     .copy())
    dataframe.columns = names
    
    return dataframe

In [5]:
def get_key_and_df(file_name):
    file_path = 'logs/' + file_name

    with open(file_path, 'r') as f:
        line = f.readlines()[0]
        key = create_key(line)
    
    df = process_single_log_file(file_path)
    
    if df is None:
        return None, None
    else:
        return key, df 

In [6]:
def get_relevant_key_df(file_name):
    key, df = get_key_and_df(file_name)
    if df is None:
        return None, None
    
    relevant_df = df[df.F1Score > 0.05].copy()
    if len(relevant_df) > 0:
        return key, relevant_df
    else:
        return None, None

In [7]:
def garner_relevant_dictionary():
    
    thresholds = ['0.1', '0.2', '0.3', '0.35', '0.65', '0.7', '0.8', '0.9']
    files_by_thresholds = agg_by_threshold(thresholds)

    # Not sure if this should be default_dict
    so_far = defaultdict(list)
    for files in files_by_thresholds:
        for file in files:
            key, df = get_relevant_key_df(file)
            if key is not None:
                so_far[key].append(df)
    
    return so_far

In [8]:
my_dict = garner_relevant_dictionary()
# iter_my_dict = iter(my_dict.items())

In [58]:
training_results = []
pct = 0.8
statement = 'percentile={}'.format(pct)
for key, df in my_dict.items():
    if statement in key : # and 'block_depth=3' in key:
        training_results.append((key, df[0]))

In [59]:
# training_results_sorted = sorted(training_results, 
#                                  key=lambda x: (x[1].Mean*x[1].F1Score).mean(), 
#                                  reverse=True)
reverse = pct > 0.5

training_results_sorted = sorted(training_results, 
                                 key=lambda x: (x[1].Mean).mean(), 
                                 reverse=reverse)

In [60]:
for key, result in training_results_sorted:
    print(key)

block_depth=5,const_factor=8,learning_rate=0.007,linear_dim=3,percentile=0.8
block_depth=3,const_factor=8,learning_rate=0.01,linear_dim=2,percentile=0.8
block_depth=5,const_factor=8,learning_rate=0.007,linear_dim=5,percentile=0.8
block_depth=3,const_factor=8,learning_rate=0.007,linear_dim=5,percentile=0.85
block_depth=5,const_factor=8,learning_rate=0.007,linear_dim=5,percentile=0.85
block_depth=3,const_factor=8,learning_rate=0.007,linear_dim=3,percentile=0.8
block_depth=5,const_factor=8,learning_rate=0.007,linear_dim=6,percentile=0.85
block_depth=3,const_factor=16,learning_rate=0.007,linear_dim=6,percentile=0.85
block_depth=3,const_factor=8,learning_rate=0.007,linear_dim=6,percentile=0.85
block_depth=6,const_factor=4,learning_rate=0.007,linear_dim=5,percentile=0.85
block_depth=3,const_factor=2,learning_rate=0.007,linear_dim=2,percentile=0.8
block_depth=6,const_factor=4,learning_rate=0.007,linear_dim=3,percentile=0.85
block_depth=3,const_factor=8,learning_rate=0.007,linear_dim=3,percent

In [61]:
print('F1Score, Mean, Precision, Count')
for key, result in training_results_sorted:
    print('{:.5f}, {:.5f}, {:.5f}, {}'.format(
          result.F1Score.mean(), 
          result.Mean.mean(), 
          result.Precision.mean(), 
          result.Mean.count()))

F1Score, Mean, Precision, Count
0.17922, 0.01338, 0.39327, 30
0.22208, 0.01149, 0.37211, 19
0.24413, 0.01102, 0.34047, 29
0.16932, 0.01080, 0.26266, 28
0.12625, 0.01028, 0.28027, 26
0.16053, 0.00990, 0.33468, 28
0.12834, 0.00972, 0.26941, 28
0.10877, 0.00963, 0.29102, 24
0.11362, 0.00918, 0.24601, 29
0.12926, 0.00891, 0.23334, 24
0.14407, 0.00890, 0.34747, 24
0.12807, 0.00889, 0.25183, 25
0.09826, 0.00886, 0.26536, 26
0.11461, 0.00883, 0.26455, 25
0.15034, 0.00836, 0.29979, 26
0.18125, 0.00835, 0.33398, 29
0.19071, 0.00819, 0.32979, 22
0.17758, 0.00801, 0.34084, 28
0.08482, 0.00764, 0.28134, 22
0.16853, 0.00739, 0.31126, 27
0.19617, 0.00719, 0.32692, 25
0.18793, 0.00712, 0.31359, 29
0.12554, 0.00703, 0.32067, 28
0.19330, 0.00694, 0.30793, 29
0.20437, 0.00669, 0.29850, 18
0.15242, 0.00666, 0.31667, 30
0.17367, 0.00652, 0.30034, 28
0.19241, 0.00638, 0.33580, 27
0.19568, 0.00603, 0.31015, 28
0.11825, 0.00601, 0.26624, 24
0.15309, 0.00585, 0.25472, 25
0.16789, 0.00580, 0.27878, 29
0.15554,

In [65]:
ranks = 4
print(training_results_sorted[ranks][0])
training_results_sorted[ranks][1]

block_depth=5,const_factor=8,learning_rate=0.007,linear_dim=5,percentile=0.85


Unnamed: 0,BCE,F1Score,ROC_AUC,Precision,Recall,Mean,Stdev
13,0.42354,0.06412,0.58594,0.5473,0.03405,0.02624,0.03656
15,0.4282,0.05445,0.58305,0.42988,0.02907,0.02012,0.03243
17,0.43975,0.07715,0.57452,0.38641,0.04285,0.01757,0.03396
19,0.4582,0.1346,0.57209,0.2999,0.08677,0.0117,0.0348
21,0.47012,0.11354,0.56182,0.29395,0.07036,0.01109,0.03495
23,0.46491,0.14713,0.57156,0.29655,0.09784,0.0117,0.03414
25,0.46986,0.10516,0.563,0.29926,0.06378,0.01204,0.03515
27,0.49435,0.11795,0.55239,0.23981,0.07821,0.00723,0.03515
29,0.47911,0.09836,0.55442,0.26185,0.06055,0.00914,0.03481
31,0.49411,0.15324,0.56459,0.2586,0.10888,0.00949,0.03562
