In [59]:
import json
import os
import glob
import pandas as pd
import plotly.express as px

In [60]:
logs_dir = './logs'

llama_1 = {'name': 'llama-1', 'path': logs_dir + '/llama/llama-1/*/*.json'}
llama_2 = {'name': 'llama-2', 'path': logs_dir + '/llama/llama-2/*/*.json'}

mistral = {'name': 'mistral', 'path': logs_dir + '/mistral/*/*.json'}

phi_1 = {'name': 'phi-1', 'path': logs_dir + '/phi/phi-1/*/*.json'}
phi_1_5 = {'name': 'phi-1_5', 'path': logs_dir + '/phi/phi-1-5/*/*.json'}
phi_2 = {'name': 'phi-2', 'path': logs_dir + '/phi/phi-2/*/*.json'}

models = [llama_1, llama_2, mistral, phi_1, phi_1_5, phi_2]

In [61]:
with open ('logs/phi/phi-1-5/6gim80lk/evaluations.json', 'r') as file:
    data = json.load(file)
    print(data['evaluations'][0]['stats'].keys())

dict_keys(['target_0', 'target_1', 'target_2', 'target_3'])


In [62]:
dfs = {}

for model in models:
    eval_scores = [0] * 10
    move_scores = [0] * 10
    coin_scores = [0] * 10
    partial_coin_scores = [0] * 10
    steps = [0] * 10

    for i, file in enumerate(glob.glob(model['path'])):

        with open(file, 'r') as eval_file:
            data = json.load(eval_file)
            
            for j, eval in enumerate(data["evaluations"]):
                eval_scores[j] += eval['eval_score']
                steps[j] = eval["step"]
                stats = eval['stats']

                move, coin, partial = 0, 0 ,0
                for key in stats.keys():
                    move += stats[key]['move_percentage']
                    coin += stats[key]['coins_percentage']
                    partial += stats[key]['partial_coins']
                
                move_scores[j] += move / len(stats.keys())
                coin_scores[j] += coin / len(stats.keys())
                partial_coin_scores[j] += partial / len(stats.keys())

    eval_scores = [s / len(eval_scores) for s in eval_scores]
    move_scores = [s / len(eval_scores) for s in move_scores]
    coin_scores = [s / len(eval_scores) for s in coin_scores]
    partial_coin_scores = [s / len(eval_scores) for s in partial_coin_scores]

    dfs[model['name']] = pd.DataFrame({'models': [model['name']] * 10, 
                                       'steps': steps, 
                                       'eval_scores': eval_scores,
                                       'move_scores': move_scores,
                                       'coin_scores': coin_scores,
                                       'partial_coin_scores': partial_coin_scores
                                       })

In [63]:
df = pd.concat(dfs.values())

In [64]:
fig = px.line(df, x="steps", y="eval_scores", title='Mean eval score', color = 'models')
fig.show()

In [65]:
fig = px.line(df, x="steps", y="move_scores", title='Mean move score', color = 'models')
fig.show()

In [66]:
fig = px.line(df, x="steps", y="coin_scores", title='Mean coin score', color = 'models')
fig.show()

In [67]:
fig = px.line(df, x="steps", y="partial_coin_scores", title='Mean partial coin score', color = 'models')
fig.show()