In [1]:
import json
import pandas as pd

# Load JSON files
with open('/novo/users/iwaq/multi/project_allCT_1.0k/results/best_hyperparameters.json', 'r') as f:
    json1 = json.load(f)

with open('/novo/users/iwaq/multi/project_GSE114412_cluster/results/best_hyperparameters.json', 'r') as f:
    json2 = json.load(f)

with open('/novo/users/iwaq/multi/project_group_5_subset_76_ncells_7/results/best_hyperparameters.json', 'r') as f:
    json3 = json.load(f)

with open('/novo/users/iwaq/multi/project_GSE114412_subcluster/results/best_hyperparameters.json', 'r') as f:
    json4 = json.load(f)

# Create a list to store the data
data = []

for json_data in [json1, json2, json3, json4]:
    dataset_name = json_data['dataset']
    
    # PCA parameters
    pca_latent = json_data['best_pca']['latent_dim']
    pca_clisi = json_data['best_pca']['clisi_score']
    
    # Denoised (LDM) parameters
    ldm_latent = json_data['best_denoised']['latent_dim']
    ldm_denoise = json_data['best_denoised']['denoise_steps']
    ldm_clisi = json_data['best_denoised']['clisi_score']
    
    data.append({
        'Dataset': dataset_name,
        'PCA Latent Dim': pca_latent,
        'PCA cLISI': pca_clisi,
        'LDM Latent Dim': ldm_latent,
        'LDM Denoise Steps': ldm_denoise,
        'LDM cLISI': ldm_clisi
    })

# Create DataFrame
df = pd.DataFrame(data)

# Display the table
print("\nBest Hyperparameters for Each Dataset:")
print("="*100)
print(df.to_string(index=False))
print("="*100)

# Also create a more detailed view
print("\n\nDetailed View:")
print("="*100)
for i, row in df.iterrows():
    print(f"\nDataset: {row['Dataset']}")
    print(f"  PCA:")
    print(f"    Latent Dimension: {row['PCA Latent Dim']}")
    print(f"    cLISI Score: {row['PCA cLISI']:.6f}")
    print(f"  LDM (VAE + Denoising):")
    print(f"    Latent Dimension: {row['LDM Latent Dim']}")
    print(f"    Denoise Steps: {row['LDM Denoise Steps']}")
    print(f"    cLISI Score: {row['LDM cLISI']:.6f}")
    print(f"    Improvement over PCA: {((row['LDM cLISI'] - row['PCA cLISI']) / row['PCA cLISI'] * 100):+.2f}%")


Best Hyperparameters for Each Dataset:
                   Dataset  PCA Latent Dim  PCA cLISI  LDM Latent Dim  LDM Denoise Steps  LDM cLISI
                allCT_1.0k              50   0.992848              50                 40   0.992972
         GSE114412_cluster              20   0.897482              50                 20   0.874853
group_5_subset_76_ncells_7              10   0.984038              40                 10   0.968242
      GSE114412_subcluster              20   0.945856              40                 10   0.930851


Detailed View:

Dataset: allCT_1.0k
  PCA:
    Latent Dimension: 50
    cLISI Score: 0.992848
  LDM (VAE + Denoising):
    Latent Dimension: 50
    Denoise Steps: 40
    cLISI Score: 0.992972
    Improvement over PCA: +0.01%

Dataset: GSE114412_cluster
  PCA:
    Latent Dimension: 20
    cLISI Score: 0.897482
  LDM (VAE + Denoising):
    Latent Dimension: 50
    Denoise Steps: 20
    cLISI Score: 0.874853
    Improvement over PCA: -2.52%

Dataset: group_