In [1]:
import os
import tensorflow as tf
from tensorflow.core.util import event_pb2
from tensorflow.python.lib.io import tf_record

2024-03-21 14:41:35.603747: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-21 14:41:35.607183: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-21 14:41:35.651784: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
logdir = "logs/"
models_files = [x for x in os.listdir(logdir) if "best_model" in x]
models_files

['angles_best_model_20240321_142727',
 'gradients_best_model_20240321_142340',
 'movement_vectors_best_model_20240321_143112',
 'positions_best_model_20240321_143507',
 'max_ang_diff_best_model_20240321_141225',
 'max_movement_vector_distance_normalised_best_model_20240321_141957',
 'max_grad_diff_best_model_20240321_123118',
 'max_movement_vector_distance_best_model_20240321_141610']

In [3]:
mappings = {"Training": "train_loss", "Validation": "val_loss", "Accuracy": "val_acc"}

In [4]:
def extract_event_values(file_path):
    values = []
    for record in tf_record.tf_record_iterator(file_path):
        event = event_pb2.Event()
        event.ParseFromString(record)
        for value in event.summary.value:
            values.append(value.simple_value)
    return values

In [5]:
def extract_model_data(model_name):
    subdirs = [x for x in os.listdir(logdir + model_name) if "events" not in x]
    model_vals = {}
    for _, key in mappings.items():
        model_vals[key] = []

    for subdir in subdirs:
        events_file = [
            x for x in os.listdir(logdir + model_name + "/" + subdir) if "events" in x
        ]
        model_value_to_add = mappings[subdir.split("_")[-1]]
        record_path = logdir + model_name + "/" + subdir + "/" + events_file[0]
        values = extract_event_values(record_path)
        model_vals[model_value_to_add] = values
    return model_vals

In [13]:
models_data = {}
for model_name in models_files:
    # for each model, get the subdirectories
    model_save_name = model_name.split("_best_model")[0]
    model_data = extract_model_data(model_name)
    models_data[model_save_name] = model_data

In [14]:
models_data

{'angles': {'train_loss': [0.03667757660150528,
   0.01722247153520584,
   0.025857824832201004,
   0.015292179770767689,
   0.021555617451667786,
   0.007667058147490025,
   0.011237910017371178,
   0.0016919811023399234,
   0.0026631378568708897,
   0.008064287714660168],
  'val_loss': [0.02829521894454956,
   0.09344732761383057,
   0.026611341163516045,
   0.02539271116256714,
   0.03230423107743263,
   0.06341363489627838,
   0.020828254520893097,
   0.0526580736041069,
   0.03684510290622711,
   0.06732960045337677],
  'val_acc': [0.9924721717834473,
   0.9804961681365967,
   0.9905902743339539,
   0.9929854869842529,
   0.9936698079109192,
   0.9856287240982056,
   0.9953806400299072,
   0.9948673844337463,
   0.9953806400299072,
   0.9950385093688965]},
 'gradients': {'train_loss': [0.044568371027708054,
   0.009702349081635475,
   0.03140696510672569,
   0.009983424097299576,
   0.03225817158818245,
   0.028800563886761665,
   0.001833591377362609,
   0.017099035903811455,
   

In [21]:
import pandas as pd

raw_data = pd.DataFrame(models_data).T
# remove [] from train_loss and val_loss and val_acc
raw_data["train_loss"].values

array([list([0.03667757660150528, 0.01722247153520584, 0.025857824832201004, 0.015292179770767689, 0.021555617451667786, 0.007667058147490025, 0.011237910017371178, 0.0016919811023399234, 0.0026631378568708897, 0.008064287714660168]),
       list([0.044568371027708054, 0.009702349081635475, 0.03140696510672569, 0.009983424097299576, 0.03225817158818245, 0.028800563886761665, 0.001833591377362609, 0.017099035903811455, 0.00426280265673995, 0.0009630948770791292]),
       list([0.028078412637114525, 0.0496220663189888, 0.02177427150309086, 0.014406715519726276, 0.007904350757598877, 0.013996398076415062, 0.0234769806265831, 0.027278725057840347, 0.02122286520898342, 0.0007057776092551649]),
       list([0.034043263643980026, 0.02789740450680256, 0.013502223417162895, 0.010805541649460793, 0.015522312372922897, 0.011969365179538727, 0.0053049116395413876, 0.012537915259599686, 0.009446930140256882, 0.059896472841501236]),
       list([0.026314791291952133, 0.013214362785220146, 0.02010416

In [26]:
data = raw_data.copy()
data["val_acc_mean"] = data["val_acc"].apply(lambda x: sum(x) / len(x))
data["val_acc_peak"] = data["val_acc"].apply(lambda x: max(x))
data.drop(columns=["val_acc", "train_loss", "val_loss"], inplace=True)
data["val_acc_peak"]

angles                                     0.995381
gradients                                  0.994354
movement_vectors                           0.993499
positions                                  0.995723
max_ang_diff                               0.996749
max_movement_vector_distance_normalised    0.994525
max_grad_diff                              0.997434
max_movement_vector_distance               0.996236
Name: val_acc_peak, dtype: float64

In [25]:
data.to_csv("metadata_models_val_data.csv")