In [1]:
import pandas as pd
import plotly.express as px
import re

In [22]:
models = ['indobert_base_p1','indobert_base_p2']

In [23]:
p1_df = pd.read_csv("result/training_progress_scores_p1.csv")
p2_df = pd.read_csv("result/training_progress_scores_p2.csv")

In [24]:
dataset = [p1_df,p2_df]

In [25]:
for df, model in zip(dataset,models):
    df = df.assign(model=model)

In [26]:
p1_df = p1_df.assign(model="indobert_base_p1")
p2_df = p2_df.assign(model="indobert_base_p2")

In [27]:
p1_df = p1_df.drop_duplicates()
p2_df = p2_df.drop_duplicates()

In [28]:
p1_df['epoch'] = [ep for ep in range(1,len(p1_df)+1)]
p2_df['epoch'] = [ep for ep in range(1,len(p2_df)+1)]

In [29]:
p1_df['epoch'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=int64)

In [30]:
dataset = [p1_df,p2_df]

In [31]:
new_dataset = []
for df in dataset:
    df = df[['epoch','train_loss','eval_loss','model']].groupby(['epoch']).min().reset_index()

    df = df[['train_loss', 'eval_loss','epoch','model']]
    loss = []
    type = []
    epoch = []
    model = []
    for idx in df.index:
        for col in df.columns.tolist()[0:2]:
            loss.append(df[col][idx])
            type.append(col)
            epoch.append(df['epoch'][idx])
            model.append(df['model'][idx])

    df_loss = pd.DataFrame()
    df_loss['loss'] = loss
    df_loss['type'] = type
    df_loss['epoch'] = epoch
    df_loss['model'] =model

    new_dataset.append(df_loss)


In [32]:
new_df_loss = pd.concat([new_dataset[0], new_dataset[1]])
new_df_loss

Unnamed: 0,loss,type,epoch,model
0,0.418477,train_loss,1,indobert_base_p1
1,0.32012,eval_loss,1,indobert_base_p1
2,0.223725,train_loss,2,indobert_base_p1
3,0.300937,eval_loss,2,indobert_base_p1
4,0.039448,train_loss,3,indobert_base_p1
5,0.211314,eval_loss,3,indobert_base_p1
6,0.133527,train_loss,4,indobert_base_p1
7,0.181528,eval_loss,4,indobert_base_p1
8,0.15291,train_loss,5,indobert_base_p1
9,0.245237,eval_loss,5,indobert_base_p1


In [33]:
accuracy = []
for model in models:
    with open(f"result\score_{model}.txt") as file:
        a = file.readlines()
        acc= re.findall(r'\s+(\d\.\d+)',a[0])
        acc = round(float(acc[0])*100,2)
        accuracy.append(acc)
        print(accuracy)
    new_df_loss.loc[new_df_loss['model']==model, 'acc'] = acc

[93.2]
[93.2, 91.34]


In [34]:
new_df_loss

Unnamed: 0,loss,type,epoch,model,acc
0,0.418477,train_loss,1,indobert_base_p1,93.2
1,0.32012,eval_loss,1,indobert_base_p1,93.2
2,0.223725,train_loss,2,indobert_base_p1,93.2
3,0.300937,eval_loss,2,indobert_base_p1,93.2
4,0.039448,train_loss,3,indobert_base_p1,93.2
5,0.211314,eval_loss,3,indobert_base_p1,93.2
6,0.133527,train_loss,4,indobert_base_p1,93.2
7,0.181528,eval_loss,4,indobert_base_p1,93.2
8,0.15291,train_loss,5,indobert_base_p1,93.2
9,0.245237,eval_loss,5,indobert_base_p1,93.2


In [35]:
accuracy = sorted(accuracy, reverse=True)
accuracy

[93.2, 91.34]

In [51]:
new_df = pd.DataFrame()
for acc in accuracy:
        df_temp = new_df_loss.loc[new_df_loss['acc']==acc]
        new_df = pd.concat([new_df, df_temp], ignore_index=True)

In [52]:
new_df['model'].unique()

array(['indobert_base_p1', 'indobert_base_p2'], dtype=object)

In [53]:
fig = px.line(new_df, y='loss', x='epoch', markers=True, color='type', facet_col='model', facet_col_wrap=4, width=1700, height=800, 
                )
fig.add_annotation(
    xref="x domain",
    yref="y domain",
    # The arrow head will be 25% along the x axis, starting from the left
    x=0.5,
    # The arrow head will be 40% along the y axis, starting from the bottom
    y=0.7,
    text=f"F1 Score Best Model: {accuracy[0]}",
    row=0, col=1
)
fig.add_annotation(
    xref="x domain",
    yref="y domain",
    # The arrow head will be 25% along the x axis, starting from the left
    x=0.7,
    # The arrow head will be 40% along the y axis, starting from the bottom
    y=0.7,
    text=f"F1 Score Best Model: {accuracy[1]}",
    row=0, col=2
)
fig.show()

In [9]:
new_df = pd.read_csv('result/training_progress.csv', sep=';')

In [10]:
new_df.tail()

Unnamed: 0,loss,type,epoch,model,acc
35,0.114907,val_loss,6,indobert_base_p2,91.8
36,0.09563,val_loss,7,indobert_base_p2,91.8
37,0.091967,val_loss,8,indobert_base_p2,91.8
38,0.107147,val_loss,9,indobert_base_p2,91.8
39,0.084674,val_loss,10,indobert_base_p2,91.8


In [11]:
fig = px.line(new_df, y='loss', x='epoch', markers=True, color='type', facet_col='model', facet_col_wrap=4, width=1700, height=800, 
                )
fig.add_annotation(
    xref="x domain",
    yref="y domain",
    # The arrow head will be 25% along the x axis, starting from the left
    x=0.5,
    # The arrow head will be 40% along the y axis, starting from the bottom
    y=0.7,
    text=f"F1 Score Best Model: {new_df['acc'][0]}",
    row=0, col=1
)
fig.add_annotation(
    xref="x domain",
    yref="y domain",
    # The arrow head will be 25% along the x axis, starting from the left
    x=0.7,
    # The arrow head will be 40% along the y axis, starting from the bottom
    y=0.7,
    text=f"F1 Score Best Model: {new_df['acc'][25]}",
    row=0, col=2
)
fig.show()