In [2]:
import wandb
import sys
import matplotlib.pyplot as plt
import scprep
import pandas as pd
sys.path.append('../src/')
from evaluate import get_results
from omegaconf import OmegaConf
from main import load_data, make_model
from model import AEDist
import numpy as np
import os
import glob
import demap
from tqdm import tqdm
# from evaluation import compute_encoding_metrics, get_dataset_contents, get_noiseless_name, get_ambient_name, get_data_config, eval_results, compute_recon_metric
from evaluation import compute_all_metrics, get_noiseless_name, get_ambient_name, get_dataset_contents
from transformations import NonTransform

# Initialize wandb (replace 'your_entity' and 'your_project' with your specific details)
wandb.login()
api = wandb.Api()

# Specify your entity, project, and sweep ID
entity = "xingzhis"
project = "dmae"
sweep_id = 'nj0245gm'

# Fetch the sweep
sweep = api.sweep(f"{entity}/{project}/{sweep_id}")

run_ids = [run.id for run in sweep.runs]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mxingzhis[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
data = np.load('../toy_data/converted/make_branch.npz')

In [4]:
data.files

['data_gt',
 'colors',
 'data',
 'rotation_matrix',
 'is_train',
 'dist_all',
 'dist',
 'phate']

In [5]:
results = []

for i in tqdm(range(len(sweep.runs))):
    run = sweep.runs[i]
    cfg = OmegaConf.create(run.config)
    folder_path = "../src/wandb/"
    try:
        folder_list = glob.glob(f"{folder_path}*{run.id}*")
        ckpt_files = glob.glob(f"{folder_list[0]}/files/*.ckpt")
        ckpt_path = ckpt_files[0]
    except:
        print(f"No checkpoint found for run {run.id}")
    cfg = OmegaConf.create(run.config)
    data_root = '../toy_data/converted/'
    data_path = os.path.join(data_root, cfg.data.name + cfg.data.filetype)
    noiseless_path = ''
    # noiseless_path = os.path.join(data_root, get_noiseless_name(cfg.data.name) + cfg.data.filetype)
    ambient_path = os.path.join(data_root, get_ambient_name(cfg.data.name) + '.npy')
    pp = NonTransform()
    emb_dim = cfg.model.emb_dim
    dist_std = 1.
    input_dim = 100
    # model = make_model(cfg, input_dim, emb_dim, pp, dist_std, from_checkpoint=True, checkpoint_path=ckpt_path)
    model = AEDist.load_from_checkpoint(ckpt_path)
    res_dict = compute_all_metrics(model, data_path, noiseless_path, ambient_path, w_gt=True)
    res_dict['dist_weight'] = cfg.model.dist_reconstr_weights
    # results.append(res_dict)

    data_noisy = np.load(data_path, allow_pickle=True)
    X = data_noisy['data']
    train_mask = data_noisy['is_train']
    if 'dist' in data_noisy.files:
        dist = data_noisy['dist']
        dist_true=dist[~train_mask][:,~train_mask]
    else:
        dist_true=None
    # data_noiseless = np.load(noiseless_path, allow_pickle=True)
    # assert (train_mask == data_noiseless['is_train']).all()
    # x_noiseless = data_noiseless['data'][~train_mask]
    x_noiseless = data_noisy['data_gt'][~train_mask]
    x_test=X[~train_mask]
    x_phate = data_noisy['phate'][~train_mask]
    demap_phate = demap.DEMaP(x_noiseless, x_phate)
    res_dict['demap_phate'] = demap_phate

    results.append(res_dict)


res_df = pd.DataFrame(results)
res_df.to_csv("toy_results.csv", index=False)

# res_df = res_df.sort_values(['seed', 'method', 'bcv', 'dropout'])
# Round all numeric columns to 3 decimals, excluding strings
rounded_res_df = res_df.select_dtypes(include=['float64']).round(3)
# Re-attach the non-numeric columns to the rounded DataFrame
for col in res_df.select_dtypes(exclude=['float64']).columns:
    rounded_res_df[col] = res_df[col]

# Reorder columns to match original DataFrame
rounded_res_df = rounded_res_df[res_df.columns]
rounded_res_df.to_csv("toy_results_rounded.csv", index=False)

  0%|          | 0/12 [00:00<?, ?it/s]

  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
100%|██████████| 12/12 [00:10<00:00,  1.19it/s]


In [6]:
res_df_filt = res_df[res_df['dist_weight'].apply(str) == '[0.9, 0.1, 0]'].drop(['accuracy', 'dist_weight', 'recon score'], axis=1)
res_df_filt['data'] = res_df_filt['data'].apply(lambda x: x.split('/')[-1].split('.')[0][5:])

In [7]:
res_df_filt

Unnamed: 0,data,demap,demap_phate
1,sphere_branch,0.850682,0.850746
3,mix_surface,0.791811,0.789818
4,mix_density_surface,0.864376,0.86411
6,intersection,0.8581,0.849697
8,clusters,0.782918,0.780779
10,branch,0.883762,0.880847


In [8]:
import wandb
import sys
import matplotlib.pyplot as plt
import scprep
import pandas as pd
sys.path.append('../src/')
from evaluate import get_results
from omegaconf import OmegaConf
import numpy as np
import os
import glob
import demap
from tqdm import tqdm
from evaluation import compute_all_metrics, get_noiseless_name, get_ambient_name
import torch
from model import AEProb, Decoder

class Model():
    def __init__(self, encoder, decoder):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.encoder = encoder.to(self.device)
        self.decoder = decoder.to(self.device)
    def encode(self, x):
        return self.encoder.encode(x)
    def decode(self, x):
        return self.decoder(x)
    def eval(self):
        self.encoder.eval()
        self.decoder.eval()

In [9]:
root_path = '/gpfs/gibbs/pi/krishnaswamy_smita/dl2282/dmae/results/'

In [10]:
data_paths = [
    'sepa_gaussian_jsd_a1.0_knn5_branch_noise0.1_seed1',
    'sepa_gaussian_jsd_a1.0_knn5_clusters_noise0.1_seed1',
    'sepa_gaussian_jsd_a1.0_knn5_intersection_noise0.1_seed1',
    'sepa_gaussian_jsd_a1.0_knn5_mix_density_surface_noise0.1_seed1',
    'sepa_gaussian_jsd_a1.0_knn5_mix_surface_noise0.1_seed1',
    'sepa_gaussian_jsd_a1.0_knn5_sphere_branch_noise0.1_seed1',
]

In [11]:
import re
results = []
for data_path1 in tqdm(data_paths):
    if data_path1.startswith('sepa_'):
        enc_path = os.path.join(root_path, data_path1, 'model.ckpt')
        dec_path = os.path.join(root_path, data_path1, 'decoder.ckpt')
        encoder_dict = torch.load(enc_path)
        decoder_dict = torch.load(dec_path)
        
        # Regex pattern to extract the values
        pattern = r"sepa_(?P<prob_method>\w+)_a(?P<alpha>[\d.]+)_knn(?P<knn>\d+)_(?P<noisy_path>.+)"

        # Perform regex search
        match = re.search(pattern, data_path1)

        if match:
            # Extracting the values
            prob_method = match.group("prob_method")
            alpha = match.group("alpha")
            knn = match.group("knn")
            noisy_path = match.group("noisy_path")

        data_name = noisy_path[:-15]
        probmtd = prob_method
        
        data_root = '../toy_data/converted/'
        data_path = os.path.join(data_root, 'make_' + data_name + '.npz')
        noiseless_path = os.path.join(data_root, get_noiseless_name(data_name) + '.npz')
        ambient_path = os.path.join(data_root, get_ambient_name(data_name) + '.npy')
        encoder = AEProb(dim=100, emb_dim=2, layer_widths=[256, 128, 64], activation_fn=torch.nn.ReLU(), prob_method=probmtd, dist_reconstr_weights=[1.0,0.0,0.], )
        encoder.load_state_dict(encoder_dict)
        decoder = Decoder(dim=100, emb_dim=2, layer_widths=[256, 128, 64][::-1], activation_fn=torch.nn.ReLU())
        decoder.load_state_dict(decoder_dict)
        model = Model(encoder, decoder)
        res_dict = compute_all_metrics(model, data_path, noiseless_path, ambient_path, w_gt=True)
        res_dict['probmethod'] = probmtd
        res_dict['alpha'] = alpha
        res_dict['knn'] = knn
        
        results.append(res_dict)

res_df_aff = pd.DataFrame(results)
res_df_aff.to_csv("affinity_toy_results.csv", index=False)

# res_df_aff = res_df_aff.sort_values(['seedmethod', 'bcv', 'dropout', 'probmethod'])
rounded_res_df_aff = res_df_aff.select_dtypes(include=['float64']).round(3)
for col in res_df_aff.select_dtypes(exclude=['float64']).columns:
    rounded_res_df_aff[col] = res_df_aff[col]

rounded_res_df_aff = rounded_res_df_aff[res_df_aff.columns]
rounded_res_df_aff.to_csv("affinity_toy_results_rounded.csv", index=False)


  0%|          | 0/6 [00:00<?, ?it/s]

100%|██████████| 6/6 [00:03<00:00,  1.88it/s]


In [12]:
res_df_aff_filt = res_df_aff.drop(['accuracy', 'recon score', 'probmethod', 'alpha', 'knn'], axis=1)
res_df_aff_filt['data'] = res_df_aff_filt['data'].apply(lambda x: x.split('/')[-1].split('.')[0][5:])

In [13]:
res_df_aff_filt.rename(columns={'demap': 'Affi.'}, inplace=True)
res_df_filt.rename(columns={'demap': 'Dist.', 'demap_phate': 'PHATE'}, inplace=True)

In [14]:
res_df_filt

Unnamed: 0,data,Dist.,PHATE
1,sphere_branch,0.850682,0.850746
3,mix_surface,0.791811,0.789818
4,mix_density_surface,0.864376,0.86411
6,intersection,0.8581,0.849697
8,clusters,0.782918,0.780779
10,branch,0.883762,0.880847


In [15]:
res_df_aff_filt

Unnamed: 0,data,Affi.
0,branch,0.900757
1,clusters,0.798298
2,intersection,0.733879
3,mix_density_surface,0.843479
4,mix_surface,0.816707
5,sphere_branch,0.869579


In [16]:
merged_df = res_df_filt.merge(res_df_aff_filt, on='data')


In [17]:
merged_df.iloc[:, 1:] = merged_df.iloc[:, 1:].apply(lambda x: x.round(3))

In [18]:
column_order = ['data', 'Affi.', 'Dist.', 'PHATE']
merged_df = merged_df.reindex(columns=column_order)
