# Download artifacts/data from wandb for figure generation

**Purpose:** This script is used to download artifacts from weights and biases for retrieving results for the 10x10 K-fold cross-validations and the chemberta version and loss-function sweeps.

**Dependency:** `hyperparameter_sweep.ipynb`, `Kfold_crossvalidation_sweep.ipynb`. This script requires results being added to weights and biases. The chemberta version and loss function results (`hyperparameter_sweep.ipynb`) and the 10x10 K-fold cross-validation (`Kfold_crossvalidation_sweep.ipynb`).

**Consecutive scripts:** After running this script the following scripts may be executed. `generate_figures_for_publication.ipynb`

## Imports

In [1]:
import pandas as pd
import wandb
import json
import os
from tqdm.notebook import tqdm

## Download

In [2]:
def GetRuns(project, sweepid, endpoints):

    api = wandb.Api()

    # Project is specified by <entity/project-name>
    runs = api.sweep(project+'/'+sweepid).runs

    summary_list, config_list, name_list, ids = [], [], [], []
    for run in runs: 
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files

        if run.config['endpoints'] == endpoints:

            summary_list.append(run.summary._json_dict)

            # .config contains the hyperparameters.
            #  We remove special values that start with _.
            config_list.append(
                {k: v for k,v in run.config.items()
                if not k.startswith('_')})

            # .name is the human-readable name of the run.
            name_list.append(run.name)
            ids.append(run.id)

    return pd.DataFrame({
        "summary": summary_list,
        "config": config_list,
        "name": name_list,
        "run_ids": ids,
        'sweepid': sweepid
        })

In [3]:
def LoadArtifact(wandbrun, runid, entity, project, artifact_name, fname, version, type, save_cls_embeddings):
    if os.path.isdir(f'./artifacts/run-{runid}-{artifact_name}-{version}'):
        jsonfile = json.load(open(f'./artifacts/run-{runid}-{artifact_name}-{version}/{fname}.table.json'))
    else:
        artifact = wandbrun.use_artifact(f'{entity}/{project}/run-{runid}-{artifact_name}:{version}', type=type)
        artifact_dir = artifact.download()
        jsonfile = json.load(open(f'{artifact_dir}/{fname}.table.json'))

    cols = jsonfile['columns']
    data = jsonfile['data']
    df = pd.DataFrame(data=data, columns=cols)

    columns=['internal_id', 'Conc_sign', 'species_group', 'Pubchem_CID', 'xlogp', 'mw', 'Canonical_SMILES','Lineage','OneHotEnc_effect','OneHotEnc_endpoint']
    
    if save_cls_embeddings == False:
        print('Dropping CLS embeddings\n')
        columns.append('CLS_embeddings')

    for col in columns:
        try:
            df.drop(columns=col, inplace=True)
        except:
            pass


    return df

In [4]:
def CombineKFoldPredictions(wandbrun, runs_df, project, version, artifact_name, name, save_cls_embeddings):

    df = LoadArtifact(
            wandbrun = wandbrun, 
            runid= runs_df.run_ids[0], 
            entity = 'ecotoxformer', 
            project = project, 
            artifact_name = artifact_name, 
            fname = name,
            version = version,
            type = 'run_table',
            save_cls_embeddings=save_cls_embeddings)
    
    df[['seed', 'fold_id']] = [runs_df.config[0]['seed'], runs_df.config[0]['fold_id']]

    for i in tqdm(range(1,len(runs_df),1)):
        df2 = LoadArtifact(
            wandbrun = wandbrun, 
            runid= runs_df.run_ids[i], 
            entity = 'ecotoxformer', 
            project = project, 
            artifact_name = artifact_name, 
            fname = name,
            version = version,
            type = 'run_table',
            save_cls_embeddings=save_cls_embeddings)

        df2[['seed', 'fold_id']] = [runs_df.config[i]['seed'], runs_df.config[i]['fold_id']]

        df = pd.concat([df, df2], ignore_index=True)

    return df

def CombinePredictions(wandbrun, runs_df, project, version, artifact_name, name, save_cls_embeddings):

    df = LoadArtifact(
            wandbrun = wandbrun, 
            runid= runs_df.run_ids[0], 
            entity = 'ecotoxformer', 
            project = project, 
            artifact_name = artifact_name, 
            fname = name,
            version = version,
            type = 'run_table',
            save_cls_embeddings=save_cls_embeddings)
    
    df[['base_model', 'loss_fun']] = [runs_df['base_model'][0], runs_df['loss_fun'][0]]
    for i in tqdm(range(1,len(runs_df),1)):
        df2 = LoadArtifact(
            wandbrun = wandbrun, 
            runid= runs_df.run_ids[i], 
            entity = 'ecotoxformer', 
            project = project, 
            artifact_name = artifact_name, 
            fname = name,
            version = version,
            type = 'run_table',
            save_cls_embeddings=save_cls_embeddings)
        df2[['base_model', 'loss_fun']] = [runs_df['base_model'][i], runs_df['loss_fun'][i]]
        df = pd.concat([df, df2], ignore_index=True)

    return df

In [5]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mstyrbjornkall[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [6]:
wandbrun = wandb.init(project="artifacts-analysis", job_type='tmp_run')

In [7]:
PROJECT_NAME = '100Fold_CV_RDKit_invertebrates'
SWEEP_ID = 'u1c9loib'
ENDPOINT = ['EC50','EC10','NOEC']
SAVE_CLS_EMBEDDINGS = False
ARTIFACT_NAME = 'BestValidationResults'
DOWNLOADED_ARTIFACT_NAME = 'Best Validation Results' #Split ARTIFACT_NAME by capital letters
ARTIFACT_VERSION = 'v0'
FILENAME = f'EC50EC10_invertebrates_withoverlap_predictions_100x_CV_RDKit' #Filename to which results will be saved

In [8]:
runs_df = GetRuns(project=f"ecotoxformer/{PROJECT_NAME}/", sweepid=SWEEP_ID, endpoints=ENDPOINT)

In [9]:
runs_df

Unnamed: 0,summary,config,name,run_ids,sweepid
0,{'Training df': {'path': 'media/table/Training...,"{'lr': 0.0002, 'seed': 44, 'label': 'mgperL', ...",silver-sweep-100,fr33fhmi,u1c9loib
1,"{'Learning Rate': 0, 'validation epoch': 39, '...","{'lr': 0.0002, 'seed': 43, 'label': 'mgperL', ...",neat-sweep-99,v6h9qwrx,u1c9loib
2,"{'training epoch': 39, 'Validation Median Loss...","{'lr': 0.0002, 'seed': 42, 'label': 'mgperL', ...",wandering-sweep-98,n5q9w1qg,u1c9loib
3,"{'Training Loss function': 0.3971881876731741,...","{'lr': 0.0002, 'seed': 41, 'label': 'mgperL', ...",fresh-sweep-97,wsql82ql,u1c9loib
4,"{'Training Loss function': 0.3986595672639934,...","{'lr': 0.0002, 'seed': 50, 'label': 'mgperL', ...",sage-sweep-96,9va03qnq,u1c9loib
...,...,...,...,...,...
95,"{'_step': 3602, 'Training df': {'nrows': 44358...","{'lr': 0.0002, 'seed': 45, 'label': 'mgperL', ...",swift-sweep-5,oicbyak6,u1c9loib
96,"{'Validation Mean Loss': 0.7832218408584595, '...","{'lr': 0.0002, 'seed': 43, 'label': 'mgperL', ...",stellar-sweep-3,k6rnxzu9,u1c9loib
97,{'Validation Loss function': 0.724690765142440...,"{'lr': 0.0002, 'seed': 44, 'label': 'mgperL', ...",young-sweep-4,ndyab7l8,u1c9loib
98,"{'Learning Rate': 0, 'Training Median Loss': 0...","{'lr': 0.0002, 'seed': 42, 'label': 'mgperL', ...",youthful-sweep-2,3v9gknqk,u1c9loib


K-Fold CV

In [10]:
concatenated_results = CombineKFoldPredictions(
    wandbrun=wandbrun, 
    runs_df=runs_df,
    project=PROJECT_NAME, 
    version=ARTIFACT_VERSION, 
    artifact_name=ARTIFACT_NAME,
    name=DOWNLOADED_ARTIFACT_NAME,
    save_cls_embeddings=SAVE_CLS_EMBEDDINGS)

[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



  0%|          | 0/99 [00:00<?, ?it/s]

[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings



[34m[1mwandb[0m:   1 of 1 files downloaded.  


Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings

Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-x420euz8-BestValidationResults:v0, 95.20MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:22.7


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-4wowu0z7-BestValidationResults:v0, 67.76MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:15.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-6jti1h9b-BestValidationResults:v0, 77.84MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:24.5


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-f3lrqr95-BestValidationResults:v0, 73.91MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:19.7


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-m0ktts82-BestValidationResults:v0, 58.09MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:19.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-rbsozyqi-BestValidationResults:v0, 107.56MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:43.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-st7wht7j-BestValidationResults:v0, 80.32MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:54.4


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-x6rmbos8-BestValidationResults:v0, 77.58MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:22.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-z7ov0t5l-BestValidationResults:v0, 61.41MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:38.4


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-u1gmwyur-BestValidationResults:v0, 62.82MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:24.2


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-7dtbxm3f-BestValidationResults:v0, 75.98MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:18.9


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-lyxlv6v1-BestValidationResults:v0, 69.80MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:20.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-ln8ayztz-BestValidationResults:v0, 63.10MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:22.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-a45jyn24-BestValidationResults:v0, 77.69MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:35.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-56yao1z1-BestValidationResults:v0, 80.85MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:33.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-bjmhozaf-BestValidationResults:v0, 94.62MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:33.3


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-nf2fbx2l-BestValidationResults:v0, 75.65MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:22.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-n4osomxz-BestValidationResults:v0, 87.94MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:28.2


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-ehnyus33-BestValidationResults:v0, 96.13MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:24.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-8hpel387-BestValidationResults:v0, 79.67MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:22.9


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-r91adcnk-BestValidationResults:v0, 88.62MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:19.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-v7k5snb7-BestValidationResults:v0, 83.58MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:21.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-buvq4os4-BestValidationResults:v0, 99.14MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:21.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-2z7xuxle-BestValidationResults:v0, 69.40MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:13.4


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-52dg3y86-BestValidationResults:v0, 78.71MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:15.3


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-5doyc2u7-BestValidationResults:v0, 68.85MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:13.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-z5t5plx6-BestValidationResults:v0, 59.85MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:11.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-cesrafor-BestValidationResults:v0, 79.01MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-ts5m43tu-BestValidationResults:v0, 85.23MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.3


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-w3dju3cg-BestValidationResults:v0, 70.26MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.7


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-6039q96g-BestValidationResults:v0, 72.93MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.7


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-eccil7o9-BestValidationResults:v0, 67.80MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.3


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-1dnjfe79-BestValidationResults:v0, 72.32MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.1


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-dnx7f8wk-BestValidationResults:v0, 76.60MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-mqd3gt0s-BestValidationResults:v0, 61.66MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:13.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-mhw8srzq-BestValidationResults:v0, 85.66MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-ty4g6cnm-BestValidationResults:v0, 108.31MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:24.1


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-z5prnd3r-BestValidationResults:v0, 88.73MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:19.9


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-boi9zwix-BestValidationResults:v0, 60.81MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-fur9zgt4-BestValidationResults:v0, 72.62MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-kwq2vbc9-BestValidationResults:v0, 83.19MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.7


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-a9i2yyhe-BestValidationResults:v0, 84.38MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:21.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-jglxs80o-BestValidationResults:v0, 65.67MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:13.9


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-3aafd2rr-BestValidationResults:v0, 63.86MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.7


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-4fdqlnzs-BestValidationResults:v0, 79.21MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-7scrkhuy-BestValidationResults:v0, 88.69MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:17.3


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-kjpak4p7-BestValidationResults:v0, 61.10MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:13.0


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-muz6u0t7-BestValidationResults:v0, 79.37MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:14.6


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-wqkg1pnx-BestValidationResults:v0, 62.10MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:13.4


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-oicbyak6-BestValidationResults:v0, 90.81MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:18.8


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-k6rnxzu9-BestValidationResults:v0, 66.11MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:19.9


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-ndyab7l8-BestValidationResults:v0, 59.31MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:22.4


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-3v9gknqk-BestValidationResults:v0, 97.71MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:26.7


Dropping CLS embeddings



[34m[1mwandb[0m: Downloading large artifact run-cj74v3kh-BestValidationResults:v0, 77.68MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:24.9


Dropping CLS embeddings



Base model runs

In [None]:
concatenated_results = pd.DataFrame()
runs_df = pd.DataFrame(['56vivdmi','dl8u33ui','78lq56v1','128aaus7'], columns=['run_ids'])
runs_df['loss_fun'] = ['L1Loss','L1Loss','MSELoss','MSELoss']
runs_df['base_model'] = ['seyonec/SMILES_tokenized_PubChem_shard00_160k','seyonec/PubChem10M_SMILES_BPE_450k','seyonec/SMILES_tokenized_PubChem_shard00_160k','seyonec/PubChem10M_SMILES_BPE_450k']
for i in tqdm(range(5)):
    df = CombinePredictions(wandbrun, runs_df, 'base_model_sweep_RDKit', 'v0', f'BestValidationResults{i+1}', f'Best Validation Results {i+1}')
    df['fold_id'] = i+1
    concatenated_results = pd.concat([concatenated_results, df],ignore_index=True)

Save results

In [None]:
concatenated_results.to_csv(f'../../data/results/{FILENAME}.csv.zip', index=False, compression='zip')

In [11]:
concatenated_results.to_pickle(f'../../data/results/{FILENAME}.pkl.zip', compression='zip')