In [None]:
import torch
import umap.umap_ as umap
import numpy as np
import pandas as pd
import sklearn
import umap.umap_ as umap
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

import scope
from scope import model
from scope import computations as cmp
from scope import clustering as cl
from scope import pre_match as pm
from scope import visualizations as vi

import psutil
import pynvml
import time

import random
def setup_seed(seed: int = 42, deterministic: bool = False):

    random.seed(seed)
    
    np.random.seed(seed)
    
    torch.manual_seed(seed)
    
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  
    
    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.enabled = False  
        
setup_seed(seed=42)


In [None]:
class PerformanceMonitor:
    def __init__(self):
        self.laps = []
        try:
            pynvml.nvmlInit()
            self.gpu_enabled = True
        except pynvml.NVMLError:
            print("Warning: Could not initialize NVML. GPU monitoring will be disabled.")
            self.gpu_enabled = False

    def _format_bytes(self, bytes_val):
        if bytes_val is None:
            return "N/A"
        if bytes_val < 1024**2:
            return f"{bytes_val/1024:.2f} KB"
        elif bytes_val < 1024**3:
            return f"{bytes_val/1024**2:.2f} MB"
        else:
            return f"{bytes_val/1024**3:.2f} GB"

    def get_cpu_mem(self):
        return psutil.Process().memory_info().rss

    def get_gpu_mem(self):
        if not self.gpu_enabled:
            return None
        try:
            handle = pynvml.nvmlDeviceGetHandleByIndex(0)
            info = pynvml.nvmlDeviceGetMemoryInfo(handle)
            return info.used
        except pynvml.NVMLError:
            return None

    def start(self, name="Start"):
        """Starts the monitor and records the initial state."""
        self.laps.append({
            "name": name,
            "time": time.time(),  # Store raw timestamp
            "cpu_mem": self.get_cpu_mem(),
            "gpu_mem": self.get_gpu_mem(),
        })
        print(f"Monitoring started for '{name}'...")

    def lap(self, name):
        """Records an intermediate step (a 'lap')."""
        self.laps.append({
            "name": name,
            "time": time.time(),  # Store raw timestamp
            "cpu_mem": self.get_cpu_mem(),
            "gpu_mem": self.get_gpu_mem(),
        })
        print(f"Lap '{name}' recorded.")

    def stop(self):
        """Stops the monitor."""
        print("Monitoring stopped.")

    def get_results(self):
        """Calculates and returns the performance results as a DataFrame."""
        if not self.laps:
            return pd.DataFrame()
        
        results = []
        for i in range(len(self.laps)):
            name = self.laps[i]["name"]
            cpu_mem = self._format_bytes(self.laps[i]["cpu_mem"])
            gpu_mem = self._format_bytes(self.laps[i]["gpu_mem"])

            # Calculate time difference from the PREVIOUS lap
            if i == 0:
                time_diff = 0.0
            else:
                time_diff = self.laps[i]["time"] - self.laps[i-1]["time"]
            
            results.append({
                "Step": name,
                "Time": f"{time_diff:.2f}s",
                "CPU Memory": cpu_mem,
                "GPU Memory": gpu_mem,
            })

        return pd.DataFrame(results)

In [None]:
monitor = PerformanceMonitor()

monitor.start(name="Experiment Run")

In [None]:
expr=pd.read_csv('Data/2.2/expr.csv').iloc[:,1:]
meta=pd.read_csv("Data/2.2/stateFate_inVitro_metadata.csv")
scaler = StandardScaler()
scaler.fit(expr)
xs = pd.DataFrame(scaler.transform(expr), index = expr.index, columns = expr.columns)
pca = sklearn.decomposition.PCA(n_components = 50)
xp = pd.DataFrame(pca.fit_transform(xs))
um = umap.UMAP(n_components = 2, metric = 'euclidean', n_neighbors = 50)
data_day2=torch.from_numpy(xp[meta['Time point']==2].values)
data_day4=torch.from_numpy(xp[meta['Time point']==4].values)
data_day6=torch.from_numpy(xp[meta['Time point']==6].values)
x_seq=torch.cat([data_day2,data_day4,data_day6])
xu = um.fit_transform(x_seq)
data={'expr':expr,
      'meta':meta,
      'scaler':scaler,
      'xs':xs,
      'pca':pca,
      'xp':xp,
      'um':um,
      'xu':xu}
# torch.save(data,'weinreb.pt')
monitor.lap("Data Loaded & Prepared")

In [None]:

data_lst = [data_day2,data_day4,data_day6]
time_steps = [2,4,6]

cluster_centers, labels_list, best_k = cl.kmeans_auto([d.cuda() for d in data_lst],tol=1e-4,method='calinski_harabasz',max_k=8)
print(best_k)

monitor.lap("Clustered")

max_variance_differences = cmp.max_adjacent_covariance_diagonal_differences(data_lst)
print(max_variance_differences)

# scal = cmp.eps_scalar()
eps_func = cmp.piecewise_eps_function(time_steps, max_variance_differences,snr=0.95,min_eps=0.1) 

monitor.lap("Noise Strength Estimated")

In [None]:
all_points_map, msf_edges = pm.calculate_evolutionary_graph([c.cpu().numpy() for c in cluster_centers],metric='euclidean')
vi.visualize_full_evolution(
    raw_data=[d.cpu().numpy() for d in data_lst],
    centers=[c.cpu().numpy() for c in cluster_centers],
    all_points_map=all_points_map,
    edges=msf_edges,
    time_labels=['Day ' + str(t) for t in time_steps],
    umap_model=um,
    save_path='./weinreb_results/evolution_graph'
)

In [None]:
label_list_for_train = [cl.assign_labels(d.cuda(),cluster_centers[i].cuda()) for i, d in enumerate(data_lst)]
populations_map = cmp.count_cluster_samples(label_list_for_train)

all_points_map, cluster_centers, msf_edges = pm.simplify_evolutionary_graph(all_points_map, populations_map, msf_edges, epsilon_merge=10, theta_topo=0.5, metric='euclidean')
vi.visualize_full_evolution(
    raw_data=[d.cpu().numpy() for d in data_lst],
    centers=[c for c in cluster_centers],
    all_points_map=all_points_map,
    edges=msf_edges,
    time_labels=['Day ' + str(t) for t in time_steps],
    umap_model=um,
    save_path='./weinreb_results/evolution_graph_simplified'
)
cluster_centers = [torch.from_numpy(c) for c in cluster_centers]

monitor.lap("Evolution Graph Constructed")

In [None]:
label_list_for_train = [cl.assign_labels(d.cuda(),cluster_centers[i].cuda()) for i, d in enumerate(data_lst)]


In [None]:

steps = 128
eps = eps_func

eps_test = eps_func
sb_object=model.sb_muti_model(data_lst,time_steps,N_pretraining=2,N_finetuning=2,backbone_lr=1e-3,finetuning_lr=1e-3,
                              steps=steps,eps=eps,early_stop=True,patience=8,B=128,lambda_=1e-3,save=True,record_gap=1,save_path='model_history/weinreb_prematched',
                              prematched=True, label_list=label_list_for_train, edges=msf_edges, weighting_strategy='enos', beta=0.999)
monitor.lap("Trainer Loaded")

In [None]:
sb_object.backbone_train()

monitor.lap("Backbone trained")

In [None]:
sb_object.fine_tune(change=4)

monitor.lap("Fine Tuned")

In [None]:
fore_base=sb_object.eval_fore(data_day2.cuda().double(),sb_object.v_fore)

monitor.lap("Backbone Model Generation")

In [None]:
fore_ft=sb_object.eval_fore(data_day2.cuda().double(),sb_object.v_fore_fine_tuned)

monitor.lap("Fine Tuned Model Generation")

In [None]:
monitor.stop()

results_df = monitor.get_results()
print("\n--- Performance Results ---")
print(results_df)

In [None]:
print(len(data_day2))