# Logs analysis

In [1]:
%matplotlib inline
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import pandas as pd
import re

## List all logs files

In [2]:
import glob
files = glob.glob('../results/logs/**/events.out.tfevents.*', recursive=True)

len(files), files[0:3]

(5,
 ['../results/logs/kfold=2/kfold-intern=0/batch_size=10/class=RBMCF/visible_size=702/hidden_size=500/regularization=NoRegularization-0.0/learning_rate=ConstantLearningRate-0.2/sampling_method=CD-1/momentum=1/1550458705.0008924/events.out.tfevents.1550458708.paulo-notebook',
  '../results/logs/kfold=4/kfold-intern=0/batch_size=10/class=RBMCF/visible_size=702/hidden_size=500/regularization=NoRegularization-0.0/learning_rate=ConstantLearningRate-0.2/sampling_method=CD-1/momentum=1/1550460861.4268224/events.out.tfevents.1550460865.paulo-notebook',
  '../results/logs/kfold=1/kfold-intern=0/batch_size=10/class=RBMCF/visible_size=702/hidden_size=500/regularization=NoRegularization-0.0/learning_rate=ConstantLearningRate-0.2/sampling_method=CD-1/momentum=1/1550457432.9702668/events.out.tfevents.1550457436.paulo-notebook'])

## Read a file

In [3]:
file = files[0]

def extract_information(file):
    ea = EventAccumulator(file)
    ea.Reload()
    
    scalars = ea.Tags()['scalars']
    first_scalar = ea.Scalars(scalars[0])
    
    dataframes = pd.DataFrame(first_scalar)[['wall_time', 'step']]
    
    for df_name in ea.Tags()['scalars']:
        df = pd.DataFrame(ea.Scalars(df_name))
        series = df['value'].rename(df_name)
        dataframes = pd.concat([dataframes, series], axis=1)
    
    #train = pd.DataFrame(ea.Scalars('measure/evaluate/train'))
    #validation = pd.DataFrame(ea.Scalars('measure/evaluate/validation'))
    
    del ea

    return dataframes

dfs = extract_information(file)
dfs.head()

#train, validation = extract_information(file)
#train.head()

Unnamed: 0,wall_time,step,measure/reconstruction/error,measure/activation/mean,measure/activation/std,measure/parameters/W/mean,measure/parameters/b_h/mean,measure/parameters/b_v/mean,measure/reconstruction_1/hamming,details/measure/top-1/evaluate-0/train,...,details/measure/expectation/normalized/evaluate-5/validation,measure/evaluate/expectation/normalized/RMSE_train_y_predicted,measure/evaluate/expectation/normalized/RMSE_validation_y_predicted,measure/evaluate/expectation/normalized/train,measure/evaluate/expectation/normalized/validation,measure/evaluate/Free/mean_free_energy_train_,measure/evaluate/Free/mean_free_energy_validation,measure/evaluate/Free/diff_mean_free_energy_,measure/evaluate/reconstruction/RMSE_train,measure/evaluate/reconstruction/RMSE_validation
0,1550459000.0,1,0.122436,5.907176,5.125681,-0.037394,-4.359916,-0.001174,3.81501,0.331139,...,0.002632,0.13011,0.130313,0.009655,0.006579,-7702.736816,-2027.242554,-5675.494141,0.085741,0.085742
1,1550459000.0,2,0.120661,5.980908,5.227418,-0.037773,-4.415276,-0.000415,3.472679,0.337722,...,0.015789,0.129915,0.130169,0.012618,0.008772,-8289.332031,-2178.856445,-6110.475586,0.084546,0.084646
2,1550459000.0,3,0.11976,5.926267,4.789822,-0.039035,-4.446284,-0.000905,3.337064,0.323239,...,0.007895,0.130132,0.130053,0.009326,0.010526,-8283.580078,-2168.016357,-6115.563477,0.084176,0.084289
3,1550459000.0,4,0.119144,5.915076,5.400952,-0.03991,-4.456356,0.000332,3.301514,0.319289,...,0.013158,0.130038,0.130428,0.010753,0.004825,-8270.451172,-2162.177246,-6108.273926,0.08362,0.083609
4,1550459000.0,5,0.118754,5.9684,5.378855,-0.040439,-4.443341,0.000478,3.18894,0.319947,...,0.018421,0.12998,0.130111,0.01163,0.009649,-8805.15332,-2303.972168,-6501.181152,0.083298,0.083285


In [4]:
def extract_columns(file):
    regex = r'(.+?)=(.+?)\/'
    test_str = file.replace('../results/logs/', '')
    matches = re.finditer(regex, test_str, re.MULTILINE)

    columns = {match.groups()[0]: match.groups()[1] for match in matches}
    columns['path'] = test_str
    return columns

def populate_columns(file, dataframe):
    columns = extract_columns(file)
    
    for k, v in columns.items():
        dataframe[k] = v

    return dataframe

In [5]:
def generate_logs_files(files):
    data = []

    for index, file in enumerate(files):
        print(f'{index+1} of {len(files)}')
        information = extract_information(file)
        information = populate_columns(file, information)
        
        data.append(information)
        
    return pd.concat(data)

data = generate_logs_files(files)

1 of 5
2 of 5
3 of 5
4 of 5
5 of 5


In [6]:
data.index.name = 'evaluate'
data.head(5)

Unnamed: 0_level_0,wall_time,step,measure/reconstruction/error,measure/activation/mean,measure/activation/std,measure/parameters/W/mean,measure/parameters/b_h/mean,measure/parameters/b_v/mean,measure/reconstruction_1/hamming,details/measure/top-1/evaluate-0/train,...,kfold-intern,batch_size,class,visible_size,hidden_size,regularization,learning_rate,sampling_method,momentum,path
evaluate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1550459000.0,1,0.122436,5.907176,5.125681,-0.037394,-4.359916,-0.001174,3.81501,0.331139,...,0,10,RBMCF,702,500,NoRegularization-0.0,ConstantLearningRate-0.2,CD-1,1,kfold=2/kfold-intern=0/batch_size=10/class=RBM...
1,1550459000.0,2,0.120661,5.980908,5.227418,-0.037773,-4.415276,-0.000415,3.472679,0.337722,...,0,10,RBMCF,702,500,NoRegularization-0.0,ConstantLearningRate-0.2,CD-1,1,kfold=2/kfold-intern=0/batch_size=10/class=RBM...
2,1550459000.0,3,0.11976,5.926267,4.789822,-0.039035,-4.446284,-0.000905,3.337064,0.323239,...,0,10,RBMCF,702,500,NoRegularization-0.0,ConstantLearningRate-0.2,CD-1,1,kfold=2/kfold-intern=0/batch_size=10/class=RBM...
3,1550459000.0,4,0.119144,5.915076,5.400952,-0.03991,-4.456356,0.000332,3.301514,0.319289,...,0,10,RBMCF,702,500,NoRegularization-0.0,ConstantLearningRate-0.2,CD-1,1,kfold=2/kfold-intern=0/batch_size=10/class=RBM...
4,1550459000.0,5,0.118754,5.9684,5.378855,-0.040439,-4.443341,0.000478,3.18894,0.319947,...,0,10,RBMCF,702,500,NoRegularization-0.0,ConstantLearningRate-0.2,CD-1,1,kfold=2/kfold-intern=0/batch_size=10/class=RBM...


In [7]:
#data.to_csv('extracted_log.csv')
#data.to_csv('extracted_log-rbm_rbm-cf_final.csv')
data.to_csv('extracted_log-rbm-cf_final.csv')