# Generate log file

In [1]:
%matplotlib inline
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import re

import pandas as pd

## List all logs files

In [2]:
import glob
files = glob.glob('../results/logs/**/events.out.tfevents.*', recursive=True)
#files = glob.glob('../results/logs_2019_03_09 _RBM_RBMCF_100_250_500_1000/**/events.out.tfevents.*', recursive=True)

len(files), files[0:3]

(16,
 ['../results/logs/kfold=0/kfold-intern=0/batch_size=64/class=RBMCF/visible_size=702/hidden_size=500/regularization=NoRegularization-0.0/learning_rate=Adam-0.001-0.9-0.999-1e-08/sampling_method=CD-1/momentum=0/1553233398.0850723/events.out.tfevents.1553233401.paulo-notebook',
  '../results/logs/kfold=0/kfold-intern=0/batch_size=64/class=RBMCF/visible_size=702/hidden_size=500/regularization=NoRegularization-0.0/learning_rate=Adam-0.0001-0.9-0.999-1e-08/sampling_method=CD-1/momentum=0/1553240274.3316448/events.out.tfevents.1553240278.paulo-notebook',
  '../results/logs/kfold=0/kfold-intern=0/batch_size=64/class=RBMCF/visible_size=702/hidden_size=500/regularization=NoRegularization-0.0/learning_rate=Adam-0.02-0.9-0.999-1e-08/sampling_method=CD-1/momentum=0/1553220473.393037/events.out.tfevents.1553220478.paulo-notebook'])

## Read a file

In [3]:
file = files[0]

def extract_information(file):
    ea = EventAccumulator(file)
    ea.Reload()
    
    scalars = ea.Tags()['scalars']
    first_scalar = ea.Scalars(scalars[0])
    
    dataframes = pd.DataFrame(first_scalar)[['wall_time', 'step']]
    
    for df_name in ea.Tags()['scalars']:
        df = pd.DataFrame(ea.Scalars(df_name))
        series = df['value'].rename(df_name)
        dataframes = pd.concat([dataframes, series], axis=1)
    
    del ea

    return dataframes

dfs = extract_information(file)
print(dfs.shape[0], 'evaluates')
print(dfs.shape[1], 'data columns')

dfs.head()

97 evaluates
44 data columns


Unnamed: 0,wall_time,step,measure/reconstruction/error,measure/activation/mean,measure/activation/std,measure/parameters/W/mean,measure/parameters/b_h/mean,measure/parameters/b_v/mean,measure/reconstruction_1/hamming,details/measure/top-1/evaluate-0/train,...,details/measure/top-5/evaluate-5/validation,measure/evaluate/top-5/train,measure/evaluate/top-5/validation,measure/evaluate/Free/mean_free_energy_train,measure/evaluate/Free/mean_free_energy_validation,measure/evaluate/Free/ratio_mean_free_energy,measure/evaluate/Free/mean_free_energy_gap,measure/evaluate/Free/mean_free_energy_noisy,measure/evaluate/reconstruction/RMSE_train,measure/evaluate/reconstruction/RMSE_validation
0,1553233000.0,1,0.130536,6.091435,6.147889,-0.000107,0.000715,0.003647,4.554977,0.295139,...,0.568129,0.354938,0.360277,-348.135712,-348.152863,1.000049,1.49234,-346.660522,0.091782,0.091776
1,1553233000.0,101,0.12398,6.018518,5.28322,-0.002389,0.703285,-0.132103,3.04456,0.295139,...,0.692841,0.457176,0.465358,-564.209473,-564.080627,0.999772,15.108948,-548.97168,0.087511,0.08724
2,1553234000.0,201,0.124309,6.07118,5.321902,-0.002778,0.767351,-0.154157,3.009259,0.295139,...,0.699769,0.457562,0.466513,-586.115479,-585.931885,0.999687,16.286255,-569.64563,0.087509,0.087234
3,1553234000.0,301,0.123571,5.947338,5.335769,-0.003052,0.789317,-0.170251,3.016783,0.295139,...,0.699769,0.457562,0.463818,-593.820557,-593.593994,0.999618,17.046021,-576.547974,0.087506,0.087231
4,1553234000.0,401,0.12416,6.049768,5.104005,-0.003319,0.784898,-0.18626,3.045717,0.295139,...,0.699769,0.458816,0.463818,-592.159668,-591.912598,0.999583,17.478394,-574.434204,0.087503,0.087237


In [4]:
def extract_columns(file):
    regex = r'(.+?)=(.+?)\/'
    test_str = file.replace('../results/logs/', '')
    matches = re.finditer(regex, test_str, re.MULTILINE)

    columns = {match.groups()[0]: match.groups()[1] for match in matches}
    columns['path'] = test_str
    return columns

def populate_columns(file, dataframe):
    columns = extract_columns(file)
    
    for k, v in columns.items():
        dataframe[k] = v

    return dataframe

In [5]:
def generate_logs_files(files):
    data = []

    for index, file in enumerate(files):
        print(f'{index+1} of {len(files)}')
        information = extract_information(file)
        information = populate_columns(file, information)
        
        data.append(information)
        
    return pd.concat(data)

data = generate_logs_files(files)

1 of 16
2 of 16
3 of 16
4 of 16
5 of 16
6 of 16
7 of 16
8 of 16
9 of 16
10 of 16
11 of 16
12 of 16
13 of 16
14 of 16
15 of 16
16 of 16


In [6]:
data.index.name = 'evaluate'
data.head(5)

Unnamed: 0_level_0,wall_time,step,measure/reconstruction/error,measure/activation/mean,measure/activation/std,measure/parameters/W/mean,measure/parameters/b_h/mean,measure/parameters/b_v/mean,measure/reconstruction_1/hamming,details/measure/top-1/evaluate-0/train,...,kfold-intern,batch_size,class,visible_size,hidden_size,regularization,learning_rate,sampling_method,momentum,path
evaluate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1553233000.0,1,0.130536,6.091435,6.147889,-0.000107,0.000715,0.003647,4.554977,0.295139,...,0,64,RBMCF,702,500,NoRegularization-0.0,Adam-0.001-0.9-0.999-1e-08,CD-1,0,kfold=0/kfold-intern=0/batch_size=64/class=RBM...
1,1553233000.0,101,0.12398,6.018518,5.28322,-0.002389,0.703285,-0.132103,3.04456,0.295139,...,0,64,RBMCF,702,500,NoRegularization-0.0,Adam-0.001-0.9-0.999-1e-08,CD-1,0,kfold=0/kfold-intern=0/batch_size=64/class=RBM...
2,1553234000.0,201,0.124309,6.07118,5.321902,-0.002778,0.767351,-0.154157,3.009259,0.295139,...,0,64,RBMCF,702,500,NoRegularization-0.0,Adam-0.001-0.9-0.999-1e-08,CD-1,0,kfold=0/kfold-intern=0/batch_size=64/class=RBM...
3,1553234000.0,301,0.123571,5.947338,5.335769,-0.003052,0.789317,-0.170251,3.016783,0.295139,...,0,64,RBMCF,702,500,NoRegularization-0.0,Adam-0.001-0.9-0.999-1e-08,CD-1,0,kfold=0/kfold-intern=0/batch_size=64/class=RBM...
4,1553234000.0,401,0.12416,6.049768,5.104005,-0.003319,0.784898,-0.18626,3.045717,0.295139,...,0,64,RBMCF,702,500,NoRegularization-0.0,Adam-0.001-0.9-0.999-1e-08,CD-1,0,kfold=0/kfold-intern=0/batch_size=64/class=RBM...


In [7]:
data.to_csv('extracted_log-minibatch64.csv')