In [21]:
import wandb
import sys
import matplotlib.pyplot as plt
import scprep
import pandas as pd
sys.path.append('../../src/')
# from evaluate import get_results
from omegaconf import OmegaConf
# from main import load_data
from model2 import Autoencoder
import numpy as np
import os
import glob
import torch
import pathlib
from plotly3d.plot import scatter

def prepare_dm_data(cfg, save_path='../../data/diffusion_model_neurips/', folder_path="../../src/wandb/"):
    folder_list = glob.glob(f"{folder_path}*{run.id}*")
    ckpt_files = glob.glob(f"{folder_list[0]}/files/*.ckpt")
    ckpt_path = ckpt_files[0]
    data_path = os.path.join('../',cfg.data.root, cfg.data.name + cfg.data.filetype)
    data = np.load(data_path, allow_pickle=True)
    model = Autoencoder.load_from_checkpoint(ckpt_path)
    model.eval()
    x_all = torch.tensor(data['data'], dtype=torch.float32)
    with torch.no_grad():
        x = torch.tensor(data['data'], dtype=torch.float32, device=model.device)
        z = model.encoder(x)
    pathlib.Path(save_path).mkdir(exist_ok=True, parents=True)
    save_name = f'{save_path}/{cfg.data.name}_{cfg.dimensions.latent}_dm.npz'
    np.savez(save_name, data=z, train_mask=data['is_train'])

def visualize_dm_data(cfg, folder_path="../../src/wandb/"):
    folder_list = glob.glob(f"{folder_path}*{run.id}*")
    ckpt_files = glob.glob(f"{folder_list[0]}/files/*.ckpt")
    ckpt_path = ckpt_files[0]
    data_path = os.path.join('../',cfg.data.root, cfg.data.name + cfg.data.filetype)
    data = np.load(data_path, allow_pickle=True)
    model = Autoencoder.load_from_checkpoint(ckpt_path)
    model.eval()
    x_all = torch.tensor(data['data'], dtype=torch.float32)
    with torch.no_grad():
        x = torch.tensor(data['data'], dtype=torch.float32, device=model.device)
        z = model.encoder(x)
    scatter(z, data['colors'], s=2, title=cfg.data.name).show()

In [13]:
wandb.login()
api = wandb.Api()

entity = "xingzhis"
project = "dmae"
sweep_id = '8jfjcnrt'
sweep = api.sweep(f"{entity}/{project}/{sweep_id}")
# Initialize an empty list to store run data
runs_data = []
# Iterate through each run in the sweep
for run in sweep.runs:
    # Extract metrics and configs
    metrics = run.summary._json_dict
    configs = run.config
    
    # Combine metrics and configs, and add run ID
    combined_data = {**metrics, **configs, "run_id": run.id}
    
    # Append the combined data to the list
    runs_data.append(combined_data)

# Create a DataFrame from the runs data
df = pd.DataFrame(runs_data)

In [14]:
df.columns

Index(['epoch', '_runtime', 'validation/loss', 'train/dist_loss_step',
       'validation/dist_loss', '_step', '_wandb', 'train/loss_epoch',
       'trainer/global_step', 'train/reconstr_loss_epoch', 'train/loss_step',
       'validation/reconstr_loss', '_timestamp', 'train/dist_loss_epoch',
       'train/reconstr_loss_step', 'data', 'fimm', 'loss', 'path', 'logger',
       'decoder', 'encoder', 'training', 'data.name', 'data.root',
       'cfg/fimm/t', 'dimensions', 'cfg/fimm/knn', 'cfg/path/log',
       'preprocessor', 'cfg/data/name', 'cfg/data/root', 'cfg/path/root',
       'training.mode', 'cfg/fimm/alpha', 'cfg/path/model', 'cfg/logger/tags',
       'cfg/training/lr', 'decoder.dropout', 'encoder.dropout',
       'cfg/data/filetype', 'cfg/logger/entity', 'cfg/training/clip',
       'cfg/training/mode', 'cfg/training/seed', 'dimensions.latent',
       'loss.weights.dist', 'cfg/fimm/bandwidth', 'cfg/logger/project',
       'cfg/training/clamp', 'decoder.batch_norm', 'encoder.batch_n

In [15]:
dims = df['dimensions.latent'].unique()
data_names = df['data.name'].unique()
dfs = []
for dim in dims:
    for data_name in data_names:
        ids = df[(df['data.name'] == data_name) & (df['dimensions.latent'] == dim)].sort_values(by=['validation/reconstr_loss', 'validation/dist_loss'], ascending=[True, False]).iloc[0,:][['data.name', 'dimensions.latent', 'validation/reconstr_loss', 'validation/dist_loss', 'run_id']]
        run_id = ids['run_id']
        run = [run for run in sweep.runs if run.id == run_id][0]
        cfg = OmegaConf.create(run.config)
        prepare_dm_data(cfg)
        dfs.append(ids)

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


In [17]:
df_name = pd.concat(dfs, axis=1).T.sort_values(['data.name', 'dimensions.latent'])
df_name.to_csv('../../data/diffusion_model_neurips/differnt_dims.csv', index=False)

In [18]:
df_name

Unnamed: 0,data.name,dimensions.latent,validation/reconstr_loss,validation/dist_loss,run_id
16,EB_DiffusionMap,2,0.857137,0.359082,ssiq1pox
15,EB_DiffusionMap,10,0.882789,0.97364,qkffbjya
12,EB_DiffusionMap,20,1.024727,1.576212,6pv8xm4h
11,EB_DiffusionMap,30,1.011602,1.440683,1fqcht0r
2,EB_heatgeo,2,0.71985,0.13804,6w4u7r6d
6,EB_heatgeo,10,0.63676,0.206583,ecpnjm7n
1,EB_heatgeo,20,0.634067,0.263051,6psrwwtf
0,EB_heatgeo,30,0.613053,0.274417,pgqfuqt0
7,EB_heatgeo_Denoise.5,2,0.690665,0.062644,iyf72ksn
4,EB_heatgeo_Denoise.5,10,0.671817,0.067602,bdu9h8kq


In [22]:
data_names = df['data.name'].unique()
for dim in [2]:
    for data_name in data_names:
        ids = df[(df['data.name'] == data_name) & (df['dimensions.latent'] == dim)].sort_values(by=['validation/reconstr_loss', 'validation/dist_loss'], ascending=[True, False]).iloc[0,:][['data.name', 'dimensions.latent', 'validation/reconstr_loss', 'validation/dist_loss', 'run_id']]
        run_id = ids['run_id']
        run = [run for run in sweep.runs if run.id == run_id][0]
        cfg = OmegaConf.create(run.config)
        visualize_dm_data(cfg)


Attribute 'preprocessor' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['preprocessor'])`.




Attribute 'preprocessor' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['preprocessor'])`.




Attribute 'preprocessor' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['preprocessor'])`.




Attribute 'preprocessor' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['preprocessor'])`.




Attribute 'preprocessor' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['preprocessor'])`.

