In [None]:
import os

import pandas as pd
import wandb
from azure.storage.blob import BlobServiceClient, ContentSettings
from dotenv import load_dotenv
from wandb.errors import CommError

In [2]:
load_dotenv()

True

## Azure BlobService Setup

In [3]:
conn_string = os.getenv("BLOB_CONNECTION_STRING")
container_name = "afribench"

In [4]:
blob_service_client = BlobServiceClient.from_connection_string(conn_string)

In [5]:
azure_container_client = blob_service_client.get_container_client(container_name)

In [5]:
try:
    _ = azure_container_client.get_container_properties()
except Exception as e:
    # Container foo does not exist. Create it.
    azure_container_client.create_container()

## W&B Setup

In [7]:
api = wandb.Api()

## AfriBench Upload

In [None]:
tasks = [
    "afrimgsm",
    "afrimmlu"
    "masakhanews",
    "afrixnli-direct",
    "sib",
    "afriqa",
    "belebele",
    "nollysenti",
    "afrisenti",
    "openaimmlu",
    "naijarc",
]

In [19]:
for task in tasks:
    wandb_project = f"african-research-collective/afribench-{task}"

    for run in api.runs(wandb_project):

        blob_client = azure_container_client.get_blob_client(f"{task}/{run.name}/results.csv")
        if not blob_client.exists():
            try:
                results_table = table = api.artifact(f"{wandb_project}/run-{run.id}-Results:latest").get("Results")
                results_csv = df = pd.DataFrame(data=results_table.data, columns=results_table.columns).to_csv()
            except (ValueError, CommError):
                continue

            blob_client.upload_blob(data=results_csv, overwrite=True, content_settings=ContentSettings(content_type="text/csv"))

        blob_client = azure_container_client.get_blob_client(f"{task}/{run.name}/complete_evaluation_results.csv")
        if not blob_client.exists():
            try:
                output_artifact = table = api.artifact(
                    f"{wandb_project}/run-{run.id}-afribench_eval_results:latest")
                output_table = output_artifact.get("afribench_eval_results")
                output_csv = pd.DataFrame(data=output_table.data, columns=output_table.columns).to_csv()
            except (ValueError, CommError):
                continue
        
            blob_client.upload_blob(data=output_csv, overwrite=True, content_settings=ContentSettings(content_type="text/csv"))
            output_artifact.delete(delete_aliases=True)

[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  
