# Weights and Biases Logger

This notebook logs the evaluation metrics of dev set and test set on every model.

In [1]:
import pandas as pd
import wandb
from configparser import ConfigParser

.cfg file is a config file that store your personal api key
The format inside the file looks like this:
```
[<YOUR_API_KEY_NAME>]
auth_key: <HERE_IS_YOUR_API_KEY>
```
.gitignore is set on .cfg

In [2]:
parser = ConfigParser()
_ = parser.read("./notebook.cfg")
wandb_api_auth_key = parser.get("wandb_api_key", "auth_key")
len(wandb_api_auth_key)

40

In [3]:
wandb.login(key=wandb_api_auth_key)

wandb: Currently logged in as: minhng (bossy_beaver). Use `wandb login --relogin` to force relogin
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\Minh UBC\.netrc


True

# Validation set

In [4]:
PROJECT_NAME = 'BioLaySumm2024'# wandb project name
ENTITY = 'bossy_beaver' #change to your wandb team name
# PATH_TO_VAL_METRICS = './Proxy_val_Results'
PATH_TO_VAL_METRICS = './data/output/mini_dev_set/scores/'
VAL_METRICS_FILES = ['elife_scores.txt', 'plos_scores.txt', 'scores.txt']

In [12]:
# For example we can use these parameters to define our metrics
# file = 'elife_scores.txt'
# data_src = 'eLife_dev'
# sample_portion = 0.1
# name = 'mixtral_api'
# tags = ['milestone4', 'mixtral_api', 'mini_dev_set']

In [6]:
# Initialize WandB
def wandb_log_eval_metrics(file, data_src, sample_portion, name, tags, job_type='eval', project='BioLaySumm2024', entity='bossy_beaver'):
    run_config = {'data_src':data_src, 'sample_portion':sample_portion}
    run = wandb.init(project=PROJECT_NAME, entity=ENTITY, 
                     job_type=job_type, tags=tags, name=name)
    
    # Open the file in read mode
    with open(PATH_TO_VAL_METRICS + "/" + file, 'r') as file:
        
        lines = file.readlines()
    
    # Create an empty dictionary to store the metrics
    metrics = {}
    
    # Iterate over each line
    for line in lines:
        # Split the line into key and value using ':' as the delimiter
        key, value = line.strip().split(': ')
        # Store the key-value pair in the dictionary
        metrics[key] = float(value)
    print(metrics)
    
    # Log metrics to WandB
    run.log(metrics)
    
    # Finish WandB run
    run.finish()


In [8]:
# eLife Mixtral_API on 10% dev set

wandb_log_eval_metrics(file='elife_scores.txt', data_src='eLife_dev', sample_portion=0.1, 
                       name='elife_mixtral_8x7b_colab', tags=['milestone5', 'elife', 'colab', 'mixtral_8x7b', 'mini_dev_set'])

{'ROUGE1': 0.35194966651419907, 'ROUGE2': 0.07484516647009286, 'ROUGEL': 0.32395962291791336, 'BERTScore': 0.828493575255076, 'FKGL': 15.308333333333332, 'DCRS': 10.618333333333334, 'CLI': 16.187083333333334, 'LENS': 59.7051124551227, 'AlignScore': 0.6490997783839703, 'SummaC': 0.5200653014083704}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
AlignScore,▁
BERTScore,▁
CLI,▁
DCRS,▁
FKGL,▁
LENS,▁
ROUGE1,▁
ROUGE2,▁
ROUGEL,▁
SummaC,▁

0,1
AlignScore,0.6491
BERTScore,0.82849
CLI,16.18708
DCRS,10.61833
FKGL,15.30833
LENS,59.70511
ROUGE1,0.35195
ROUGE2,0.07485
ROUGEL,0.32396
SummaC,0.52007


In [9]:
# PLOS Mixtral_API on 50 samples of dev set
wandb_log_eval_metrics(file='plos_scores.txt', data_src='PLOS_dev', sample_portion=0.036, 
                       # name='plos_mixtral_8x7b_api', tags=['milestone4', 'api', 'mixtral_8x7b', 'FULL_dev_set'])
                                              name='plos_mixtral_8x7b_colab', tags=['milestone5', 'plos', 'colab', 'mixtral_8x7b', 'mini_dev_set'])

{'ROUGE1': 0.4384269690268519, 'ROUGE2': 0.15129554115439517, 'ROUGEL': 0.40586138217870593, 'BERTScore': 0.8534935617446899, 'FKGL': 13.718, 'DCRS': 10.1466, 'CLI': 15.174599999999998, 'LENS': 60.27202732370523, 'AlignScore': 0.7503356927633286, 'SummaC': 0.6279243922233582}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
AlignScore,▁
BERTScore,▁
CLI,▁
DCRS,▁
FKGL,▁
LENS,▁
ROUGE1,▁
ROUGE2,▁
ROUGEL,▁
SummaC,▁

0,1
AlignScore,0.75034
BERTScore,0.85349
CLI,15.1746
DCRS,10.1466
FKGL,13.718
LENS,60.27203
ROUGE1,0.43843
ROUGE2,0.1513
ROUGEL,0.40586
SummaC,0.62792


In [11]:
# Combined dummy baseline on 10% dev set
wandb_log_eval_metrics(file='scores.txt', data_src='combined_dev', sample_portion=0.1, 
                       # name='plos_mixtral_8x7b_api', tags=['milestone4', 'api', 'mixtral_8x7b', 'FULL_dev_set'])
                        name='combined_mixtral_8x7b_colab', tags=['milestone5', 'colab', 'mixtral_8x7b', 'mini_dev_set'])

{'ROUGE1': 0.3951883177705255, 'ROUGE2': 0.113070353812244, 'ROUGEL': 0.36491050254830965, 'BERTScore': 0.840993568499883, 'FKGL': 14.513166666666667, 'DCRS': 10.382466666666666, 'CLI': 15.680841666666666, 'LENS': 59.98856988941397, 'AlignScore': 0.6997177355736495, 'SummaC': 0.5739948468158642}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
AlignScore,▁
BERTScore,▁
CLI,▁
DCRS,▁
FKGL,▁
LENS,▁
ROUGE1,▁
ROUGE2,▁
ROUGEL,▁
SummaC,▁

0,1
AlignScore,0.69972
BERTScore,0.84099
CLI,15.68084
DCRS,10.38247
FKGL,14.51317
LENS,59.98857
ROUGE1,0.39519
ROUGE2,0.11307
ROUGEL,0.36491
SummaC,0.57399


# Test Set

In [10]:
# PATH_TO_VAL_METRICS = './output/test_set/scores/'
# TEST_METRICS_FILES = ['elife_scores.txt', 'plos_scores.txt', 'scores.txt']

In [21]:
# # elife dummy baseline on test set
# wandb_log_eval_metrics(file='elife_scores.txt', data_src='eLife_test', sample_portion=1, 
#                        name='elife_mixtral_8x7b_api', tags=['milestone4', 'api', 'mixtral_8x7b', 'test'])

{'ROUGE1': 0.36767078504764394, 'ROUGE2': 0.07775751179930965, 'ROUGEL': 0.33860339300850645, 'BERTScore': 0.8308823684933769, 'FKGL': 15.845643153526972, 'DCRS': 11.176182572614108, 'CLI': 17.12738589211618, 'LENS': 53.31051884333611, 'AlignScore': 0.8289315695584563, 'SummaC': 0.6359409374567483}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
AlignScore,▁
BERTScore,▁
CLI,▁
DCRS,▁
FKGL,▁
LENS,▁
ROUGE1,▁
ROUGE2,▁
ROUGEL,▁
SummaC,▁

0,1
AlignScore,0.82893
BERTScore,0.83088
CLI,17.12739
DCRS,11.17618
FKGL,15.84564
LENS,53.31052
ROUGE1,0.36767
ROUGE2,0.07776
ROUGEL,0.3386
SummaC,0.63594


In [22]:
# PLOS dummy baseline on test set
wandb_log_eval_metrics(file='plos_scores.txt', data_src='PLOS_test', sample_portion=1, 
                       name='plos_mixtral_8x7b_api', tags=['milestone4', 'api', 'mixtral_8x7b', 'test'])

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

{'ROUGE1': 0.4585145989579301, 'ROUGE2': 0.1567383636706995, 'ROUGEL': 0.4182240773887399, 'BERTScore': 0.8570368931477153, 'FKGL': 15.472238372093022, 'DCRS': 11.107834302325582, 'CLI': 16.737296511627907, 'LENS': 58.78431951188854, 'AlignScore': 0.8135919511848844, 'SummaC': 0.6438957830717744}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
AlignScore,▁
BERTScore,▁
CLI,▁
DCRS,▁
FKGL,▁
LENS,▁
ROUGE1,▁
ROUGE2,▁
ROUGEL,▁
SummaC,▁

0,1
AlignScore,0.81359
BERTScore,0.85704
CLI,16.7373
DCRS,11.10783
FKGL,15.47224
LENS,58.78432
ROUGE1,0.45851
ROUGE2,0.15674
ROUGEL,0.41822
SummaC,0.6439


In [23]:
# Combined dummy baseline on test set
wandb_log_eval_metrics(file='scores.txt', data_src='combined_test', sample_portion=1, 
                       name='combined_mixtral_8x7b_api', tags=['milestone4', 'api', 'mixtral_8x7b', 'test'])

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888278356, max=1.0…

{'ROUGE1': 0.41309269200278703, 'ROUGE2': 0.11724793773500457, 'ROUGEL': 0.3784137351986232, 'BERTScore': 0.8439596308205461, 'FKGL': 15.658940762809998, 'DCRS': 11.142008437469844, 'CLI': 16.932341201872042, 'LENS': 56.047419177612326, 'AlignScore': 0.8212617603716703, 'SummaC': 0.6399183602642613}


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
AlignScore,▁
BERTScore,▁
CLI,▁
DCRS,▁
FKGL,▁
LENS,▁
ROUGE1,▁
ROUGE2,▁
ROUGEL,▁
SummaC,▁

0,1
AlignScore,0.82126
BERTScore,0.84396
CLI,16.93234
DCRS,11.14201
FKGL,15.65894
LENS,56.04742
ROUGE1,0.41309
ROUGE2,0.11725
ROUGEL,0.37841
SummaC,0.63992
