# Code for fetching run information from wandb

This notebook retrieves the latest run information from wandb. You will need a wandb API key stored in a .env file to use this notebook.

In [None]:
# skip if already have packages installed
%pip install wandb pandas python-dotenv

In [None]:
import os
import time
import pandas as pd
import wandb
import json
from dotenv import load_dotenv
load_dotenv()

In [None]:
wandb_api_key = os.environ.get('WANDB_API_KEY')
wandb.login(key=wandb_api_key)

In [29]:
wandb_api = wandb.Api()
runs = wandb_api.runs('graphite-ai/Graphite-Subnet-V2', per_page=1000, order="-created_at") # arrange in descending order of creation

In [30]:
# create dfs to store distance and reward info
columns = ['run_id', 'n_nodes', 'validator', 'created_at'] + [i for i in range(256)]
distance_df = pd.DataFrame(columns=columns)
rewards_df = pd.DataFrame(columns=columns)

In [31]:
def extract_run_info(attr):
    config = json.loads(attr['displayName'])
    validator = config['validator']
    n_nodes = config['n_nodes']
    created_at = attr['createdAt']
    run_id = attr['name']
    return {
        'validator': validator,
        'n_nodes': n_nodes,
        'created_at': created_at,
        'run_id': run_id
    }

def extract_scoring_info(history):
    for column in history.columns:
        if column.startswith('distance'):
            distance_data = [round(float(distance), 5) for distance in history[column].tolist()]
        if column.startswith('rewards'):
            reward_data = [round(float(reward), 5) for reward in history[column].tolist()]
    return {i:data for i,data in enumerate(distance_data)}, {i:data for i,data in enumerate(reward_data)}

In [None]:
# get the most recent 100 runs; edit as per your needs
for i in range(100):
    try:
        print(f"extract run #{i}")
        run = runs.next()
        history = run.history()
        run_info = extract_run_info(run._attrs)
        distances, rewards = extract_scoring_info(history)
        distances.update(run_info)
        rewards.update(run_info)
        new_distance_row = pd.Series(distances).to_frame().T
        new_rewards_row = pd.Series(rewards).to_frame().T
        distance_df = pd.concat([distance_df, new_distance_row])
        rewards_df = pd.concat([rewards_df, new_rewards_row])
        time.sleep(1) # apply sleep to avoid exceeding rate limit. Feel free to adjust this to your needs
    except Exception as e:
        # break in the event of an error
        print(e)
        break

In [33]:
distance_df.to_csv("recent_run_distance_history.csv") # update the naming and pathing to your specific needs
rewards_df.to_csv("recent_run_rewards_history.csv")