---
title: "load_wandb_artifact"
author: "Juma Shafara"
date: "2024-11-01"
---

In [None]:
#| hide

In [None]:
import wandb
import pandas as pd
import os
import shutil

def load_wandb_artifact(
        artifact_name: str = None, 
        entity: str = 'analytic',
        project: str = 'Risk Assessment', 
        version: str = "latest", 
        dataset:bool=True,
        download_dir: str = "artifacts", 
        download: bool = False) -> pd.DataFrame:
    """
    Loads a specified WandB artifact as a DataFrame if it's a dataset, 
    and optionally deletes the downloaded artifacts folder after loading.

    Parameters:
    - entity (str): Your WandB username or team name.
    - project (str): The WandB project name.
    - artifact_name (str): The name of the artifact to download.
    - version (str): The version of the artifact to download (default is 'latest').
    - download_dir (str): The local directory to save the downloaded artifact (default is 'artifacts').
    - download (bool): Whether to store a copy of the artifact on local disk

    Returns:
    - pd.DataFrame: Loaded dataset as a DataFrame if applicable.
    """
    # Initialize WandB API
    api = wandb.Api()

    # Fetch the artifact
    artifact = api.artifact(f"{entity}/{project}/{artifact_name}:{version}")

    # Download the artifact to the specified directory
    artifact_dir = artifact.download(download_dir)

    # Check if there is a CSV or similar dataset file and load it
    if dataset:
        df = None
        for file in os.listdir(artifact_dir):
            file_path = os.path.join(artifact_dir, file)
            if file.endswith('.csv'):
                df = pd.read_csv(file_path)
                break
            elif file.endswith('.xlsx'):
                df = pd.read_excel(file_path)
                break

    # Optionally delete the artifact directory after loading
    if download:
        print(file_path)
    else:
        shutil.rmtree(download_dir)

    return df


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()