# Logs analysis

In [1]:
%matplotlib inline
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import pandas as pd
import re

## List all logs files

In [2]:
import glob
files = glob.glob('../results/logs/**/events.out.tfevents.*', recursive=True)

files[0:3]

['../results/logs/batch_size=100/class=RBMCF/visible_size=702/hidden_size=100/regularization=NoRegularization-0.0/learning_rate=ConstantLearningRate-0.01/sampling_method=CD-1/momentum=1/1542219439.61487/events.out.tfevents.1542219442.paulo-notebook',
 '../results/logs/batch_size=100/class=RBMCF/visible_size=702/hidden_size=100/regularization=NoRegularization-0.0/learning_rate=ConstantLearningRate-0.01/sampling_method=CD-1/momentum=0.9/1542219284.007691/events.out.tfevents.1542219286.paulo-notebook',
 '../results/logs/batch_size=100/class=RBMCF/visible_size=702/hidden_size=100/regularization=NoRegularization-0.0/learning_rate=ConstantLearningRate-0.01/sampling_method=CD-5/momentum=1/1542220276.1884933/events.out.tfevents.1542220279.paulo-notebook']

## Read a file

In [3]:
file = files[0]

def extract_information(file):
    ea = EventAccumulator(file)
    ea.Reload()

    train = pd.DataFrame(ea.Scalars('measure/evaluate/train'))
    validation = pd.DataFrame(ea.Scalars('measure/evaluate/validation'))
    return train, validation

train, validation = extract_information(file)
train.head()

Unnamed: 0,wall_time,step,value
0,1542219000.0,1,0.005898
1,1542219000.0,26,0.019753
2,1542219000.0,51,0.009191
3,1542219000.0,76,0.015775
4,1542219000.0,101,0.015912


In [4]:
def extract_columns(file):
    regex = r'(.+?)=(.+?)\/'
    test_str = file.replace('../results/logs/', '')
    matches = re.finditer(regex, test_str, re.MULTILINE)

    return {match.groups()[0]: match.groups()[1] for match in matches}

def populate_columns(file, dataframe, data_type):
    dataframe['data_type'] = data_type
    columns = extract_columns(file)
    
    for k, v in columns.items():
        dataframe[k] = v

    return dataframe

In [5]:
def generate_logs_files(files):
    trains = []
    validations = []

    for file in files:
        train, validation = extract_information(file)
        train = populate_columns(file, train, 'train')
        validation = populate_columns(file, train, 'validation')
        
        trains.append(train)
        validations.append(validation)
        
    trains = pd.concat(trains)
    validations = pd.concat(validations)
    
    return trains, validations

trains, validations = generate_logs_files(files)

In [6]:
trains.head(5)

Unnamed: 0,wall_time,step,value,data_type,batch_size,class,visible_size,hidden_size,regularization,learning_rate,sampling_method,momentum
0,1542219000.0,1,0.005898,validation,100,RBMCF,702,100,NoRegularization-0.0,ConstantLearningRate-0.01,CD-1,1
1,1542219000.0,26,0.019753,validation,100,RBMCF,702,100,NoRegularization-0.0,ConstantLearningRate-0.01,CD-1,1
2,1542219000.0,51,0.009191,validation,100,RBMCF,702,100,NoRegularization-0.0,ConstantLearningRate-0.01,CD-1,1
3,1542219000.0,76,0.015775,validation,100,RBMCF,702,100,NoRegularization-0.0,ConstantLearningRate-0.01,CD-1,1
4,1542219000.0,101,0.015912,validation,100,RBMCF,702,100,NoRegularization-0.0,ConstantLearningRate-0.01,CD-1,1
