# Wandb Data Analysis

In [13]:
import wandb
import os

wandb.login(key=os.environ.get("WANDB_API_KEY"))

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/paul/.netrc


True

In [None]:
import pandas as pd

pull_data = False
PROJECT = "final-gs-on-a-budget"

api = wandb.Api()

runs = api.runs(f"{PROJECT}")

if pull_data:
    data = []
    for run in runs:
        row = {
            "run_id": run.id,
            "name": run.name,
            "state": run.state,
            "created_at": run.created_at,
            **run.config,  # Get run config parameters
            **run.summary,  # Get final metrics (like loss, accuracy, etc.)
        }
        data.append(row)

    df_runs = pd.DataFrame(data)

    df_runs.to_csv("data/wandb_runs.csv", index=False)

In [54]:
df_runs = pd.read_csv("data/wandb_runs.csv")

In [None]:
df_runs["dataset_name"] = df_runs["name"].apply(lambda x: x.split("-")[1])
df_runs["technique"] = df_runs["name"].apply(lambda x: x.split("-")[0])
df_runs["size"] = df_runs["name"].apply(lambda x: x.split("-")[2])

Index(['run_id', 'name', 'state', 'created_at', '_runtime', '_step',
       '_timestamp', '_wandb.runtime', 'cum_created', 'cum_deleted',
       ...
       'propagated_iteration_after', 'propagated_iteration_begin',
       'propagation_interval', 'sky_seg', 'sparse_loss', 'scene_scale_lr_init',
       'sparse_num', 'technique', 'size', 'dataset_name'],
      dtype='object', length=265)

### Which facets of the data have too many runs associated with them?

In [64]:
multiple_runs = df_runs.groupby(["dataset_name", "size", "technique"])["run_id"].count()
multiple_runs[multiple_runs > 1]

dataset_name  size      technique
truck         extended  default      2
                        mcmc         2
              high      default      2
                        mcmc         2
              low       default      3
                        mcmc         2
              medium    default      2
                        mcmc         2
Name: run_id, dtype: int64

### Which are the runs that I actually require to analyze?

In [73]:
def get_latest_run_per_facet(df: pd.DataFrame) -> pd.DataFrame:
    rows = []

    for dataset in ["truck", "room", "stump"]:
        for size in ["low", "medium", "high"]:
            for technique in df["technique"].unique():
                rows.append(
                    df[
                        (df["dataset_name"] == dataset)
                        & (df["size"] == size)
                        & (df["technique"] == technique)
                    ].sort_values("created_at", ascending=False).iloc[0]
                )
    
    return pd.concat(rows, axis=1).transpose()

runs_to_analyze = get_latest_run_per_facet(df_runs)
runs_to_analyze

Unnamed: 0,run_id,name,state,created_at,_runtime,_step,_timestamp,_wandb.runtime,cum_created,cum_deleted,...,propagated_iteration_after,propagated_iteration_begin,propagation_interval,sky_seg,sparse_loss,scene_scale_lr_init,sparse_num,technique,size,dataset_name
33,k22pnjso,mcmc-truck-low-1,finished,2025-01-17T00:57:34Z,1690.067736,30000,1737077063.969009,1690,226523,226523,...,,,,,,,,mcmc,low,truck
29,umm395kx,default-truck-low-1,finished,2025-01-16T15:38:55Z,1599.125979,30000,1737043454.283376,1599,131494,132039,...,,,,,,,,default,low,truck
7,a158f3953af4af37a3cabd915d4c2cee,mini_splatting-truck-low-1,finished,2024-12-26T17:10:59Z,1080.429874,30000,1735234138.871455,1080,115370,115458,...,,,,,,,,mini_splatting,low,truck
12,aff51c752c4f2374fadae3590499749a,mip_splatting-truck-low-1,finished,2024-12-26T17:13:00Z,992.711235,30000,1735234173.162223,992,63469,63630,...,,,,,,,,mip_splatting,low,truck
13,c2a846e0033d020c92f9a392156da6d8,eagles-truck-low-1,finished,2024-12-26T17:13:01Z,883.257063,30000,1735234063.7558,883,67173,77374,...,,,,,,,,eagles,low,truck
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,ea0f6936dcd8e24145ea33618dec612f,mini_splatting-stump-high-1,finished,2025-01-17T04:38:51Z,1221.053574,30000,1737276035.085856,37,2625081,1564631,...,,,,,,,,mini_splatting,high,stump
91,5a2745ca052b6fa3c131bb5974da9c3d,mip_splatting-stump-high-1,finished,2025-01-17T04:47:35Z,1417.500854,30000,1737288278.51617,43,1771279,711073,...,,,,,,,,mip_splatting,high,stump
71,4867c4da20de43a85bea5f43512e1a34,eagles-stump-high-1,finished,2025-01-17T03:44:29Z,1163.159359,30000,1737275953.933852,36,2119820,1215459,...,,,,,,,,eagles,high,stump
75,f5d19e83a1abc01e7247756e1bab37a9,gaussian_pro-stump-high-1,finished,2025-01-17T03:52:52Z,1490.511258,30000,1737087461.89266,1490,2049451,989297,...,12000.0,1000.0,20.0,False,False,,,gaussian_pro,high,stump


### Save the data of a single facet...

In [77]:
pull_data = True

if pull_data:
    runs = []
    for run_id in ["k22pnjso", "umm395kx"]:
        run = api.run(f"{PROJECT}/{run_id}")
        df_run = run.history()
        runs.append(df_run)
    
    df_run = pd.concat(runs, axis=0)

df_run

Unnamed: 0,train/ssim,test_full/psnr,train_every_5th/psnr,train_every_5th/ssim,_runtime,train/psnr,_step,cum_deleted,_timestamp,n_gaussians,test_full/ssim,cum_created
0,0.403687,,,,4.874918,10.663307,59,0,1.737075e+09,117856,,0
1,0.398398,,,,6.949347,12.319818,103,0,1.737075e+09,117856,,0
2,0.364860,,,,10.496916,10.955093,178,0,1.737075e+09,117856,,0
3,0.505366,,,,10.591166,11.418083,180,0,1.737075e+09,117856,,0
4,0.427552,,,,10.685318,11.587824,182,0,1.737075e+09,117856,,0
...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.825604,,,,1511.225366,23.026986,29850,132039,1.737043e+09,117311,,131494
496,0.835190,,,,1512.539728,23.653066,29876,132039,1.737043e+09,117311,,131494
497,0.808983,,,,1515.771467,23.763862,29940,132039,1.737043e+09,117311,,131494
498,0.764916,,,,1517.281494,21.555316,29970,132039,1.737043e+09,117311,,131494
