In [1]:
from collections import defaultdict
import numpy as np
import pandas as pd
import wandb

from utils.metrics import compute_metrics

In [4]:
def compute_miner_performance(
        wandb_validator_runs,
        miner_uid=None, 
        start_ts=None,
        end_ts=None):

    miner_perf = {}
    for run in wandb_validator_runs:
        if run.state != 'running':
            continue

        history_df = run.history()

        for i, challenge_row in history_df.iterrows():
            if start_ts is not None and challenge_row['_timestamp'] < start_ts:
                continue
            if end_ts is not None and challenge_row['_timestamp'] > end_ts:
                continue

            label = challenge_row['label']

            try:
                miner_preds = challenge_row['pred']
            except KeyError as e:
                miner_preds = challenge_row['predictions']
                
            try:
                miner_uids = challenge_row['miner_uid']
            except KeyError as e:
                miner_uids = challenge_row['miner_uids']
                
            if isinstance(miner_uids, dict):  
                continue  # ignore improperly formatted instances
    
            # compute correctness of each miner prediction for the current challenge
            for pred, uid in zip(miner_preds, miner_uids):
                if miner_uid is not None and uid != miner_uid:
                    continue
    
                if pred == -1:
                    continue

                if uid not in miner_perf:
                    miner_perf[uid] = defaultdict(int)

                if label == 1:      
                    if pred > 0.5:
                        miner_perf[uid]['tp'] += 1
                    else: 
                        miner_perf[uid]['fn'] += 1
                elif label == 0:
                    if pred > 0.5:
                        miner_perf[uid]['fp'] += 1
                    else: 
                        miner_perf[uid]['tn'] += 1
                            
    metrics = {uid: compute_metrics(**miner_perf[uid]) for uid in miner_perf}
    flattened_metrics = []
    for uid, metric_dict in metrics.items():
        flattened_metrics.append({'uid': uid, **metric_dict})
    metrics_df = pd.DataFrame(flattened_metrics)

    return metrics_df


In [5]:
api = wandb.Api()
project = "bitmind-subnet"
entity = "bitmindai"

runs = api.runs(f"{entity}/{project}")

metrics_df = compute_miner_performance(
    runs,
    miner_uid=0,  # example uid
    start_ts=1724832296.0833144,  # example timestamp
    end_ts=None)

metrics_df

Unnamed: 0,uid,accuracy,precision,recall,f1_score,sample_size
0,0,0.845679,0.949943,0.731814,0.826733,2268
