In [1]:
import scanpy as sc
import numpy as np
import os
from glob import glob

# -----------------------------
# Input & Output directories
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_mean"
os.makedirs(output_dir, exist_ok=True)

# -----------------------------
# Find all .h5ad files
# -----------------------------
files = glob(os.path.join(input_dir, "*.h5ad"))

# -----------------------------
# Mean Imputation for each file (zeros → mean)
# -----------------------------
for f in files:
    print(f"Processing {f} ...")
    # Load dropout dataset
    adata_missing = sc.read_h5ad(f)
    
    # Copy
    adata_mean = adata_missing.copy()
    
    # Convert sparse to dense (if needed)
    X = adata_mean.X.toarray() if not isinstance(adata_mean.X, np.ndarray) else adata_mean.X.copy()
    
    # Compute mean per gene (ignoring zeros)
    gene_means = np.true_divide(
        X.sum(axis=0),
        (X != 0).sum(axis=0),
        where=(X != 0).sum(axis=0) != 0  # avoid div/0
    )
    
    # Replace zeros with corresponding gene mean
    zero_inds = np.where(X == 0)
    X[zero_inds] = np.take(gene_means, zero_inds[1])
    
    # Assign back
    adata_mean.X = X
    
    # Save with matching name
    fname = os.path.basename(f).replace("adata_dropout", "adata_mean_imputed")
    outpath = os.path.join(output_dir, fname)
    adata_mean.write(outpath)
    
    print(f"Saved imputed file to: {outpath}")

print("All files processed with Mean Imputation (zeros → mean)")


Processing dropout_h5ad\adata_dropout_mf10_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run1.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run10.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run2.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run2.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run3.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run4.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run5.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run5.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run6.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run7.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run8.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run8.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run9.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf10_run9.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run1.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run10.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run2.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run2.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run3.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run4.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run5.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run5.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run6.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run7.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run8.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run8.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run9.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")
  X[zero_inds] = np.take(gene_means, zero_inds[1])


Saved imputed file to: imputed_mean\adata_mean_imputed_mf20_run9.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run1.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run10.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run2.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run2.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run3.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run4.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run5.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")
  X[zero_inds] = np.take(gene_means, zero_inds[1])


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run5.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run6.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run7.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run8.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run8.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run9.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")
  X[zero_inds] = np.take(gene_means, zero_inds[1])


Saved imputed file to: imputed_mean\adata_mean_imputed_mf30_run9.h5ad
All files processed with Mean Imputation (zeros → mean)


In [None]:
import scanpy as sc
import scvi
import os
from glob import glob

# -----------------------------
# Input & Output directories
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_scvi"
os.makedirs(output_dir, exist_ok=True)

# -----------------------------
# Find all .h5ad files
# -----------------------------
files = glob(os.path.join(input_dir, "*.h5ad"))

# -----------------------------
# scVI Imputation for each file
# -----------------------------
for f in files:
    print(f"Processing {f} ...")
    # Load dropout dataset
    adata_missing = sc.read_h5ad(f)
    
    # Copy
    adata_scvi = adata_missing.copy()
    
    # Setup for scVI
    scvi.model.SCVI.setup_anndata(adata_scvi)
    
    # Train model
    model = scvi.model.SCVI(adata_scvi)
    model.train(max_epochs=200)  # you can adjust based on dataset size
    
    # Get imputed / denoised expression
    adata_scvi.layers["scvi_imputed"] = model.get_normalized_expression(adata_scvi)
    
    # Save with matching name
    fname = os.path.basename(f).replace("adata_dropout", "adata_scvi_imputed")
    outpath = os.path.join(output_dir, fname)
    adata_scvi.write(outpath)
    
    print(f"Saved imputed file to: {outpath}")

print("All files processed with scVI Imputation")


ValueError: Unable to compare versions for packaging>=20.0: need=20.0 found=None. This is unusual. Consider reinstalling packaging.

In [15]:
!
!pip install packaging==23.2







In [2]:
import scanpy as sc
import numpy as np
import os
from glob import glob
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer

# -----------------------------
# Input & Output directories
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_iterative"
os.makedirs(output_dir, exist_ok=True)

files = glob(os.path.join(input_dir, "*.h5ad"))

# Configure imputer
imputer = IterativeImputer(
    max_iter=10,
    random_state=0,
    sample_posterior=False
)

for f in files:
    print(f"\nProcessing {f} ...")

    adata_missing = sc.read_h5ad(f)
    adata_missing.obs_names_make_unique()
    adata_missing.var_names_make_unique()

    adata_imp = adata_missing.copy()

    # Convert to dense
    X = adata_imp.X.toarray() if not isinstance(adata_imp.X, np.ndarray) else adata_imp.X.copy()

    # Replace ALL zeros with NaN (since in practice we don’t know which are dropout)
    X[X == 0] = np.nan

    # Drop genes with all NaN (cannot impute)
    valid_genes = ~(np.all(np.isnan(X), axis=0))
    dropped = np.where(~valid_genes)[0]
    if len(dropped) > 0:
        print(f"⚠️ Dropping {len(dropped)} all-zero genes")

    X_valid = X[:, valid_genes]

    # Iterative imputation
    X_imp_valid = imputer.fit_transform(X_valid)

    # Reconstruct full matrix
    X_imp = np.zeros_like(X)
    X_imp[:, valid_genes] = X_imp_valid
    # genes dropped remain zero

    # Save
    adata_imp.X = X_imp
    fname = os.path.basename(f).replace("adata_dropout", "adata_iterative_imputed")
    outpath = os.path.join(output_dir, fname)
    adata_imp.write(outpath)

    print(f"✅ Saved imputed file to: {outpath}")

print("\nAll files processed with Iterative Imputation (zeros → NaN → imputed)")



Processing dropout_h5ad\adata_dropout_mf10_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


⚠️ Dropping 6 all-zero genes


KeyboardInterrupt: 

In [7]:
import scanpy as sc
import numpy as np
import os
from glob import glob
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor

# -----------------------------
# Input & Output directories
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_iterative"
os.makedirs(output_dir, exist_ok=True)

files = glob(os.path.join(input_dir, "*.h5ad"))

# Iterative Imputer with faster estimator
imputer = IterativeImputer(
    estimator=ExtraTreesRegressor(n_estimators=10, random_state=0, n_jobs=-1),
    max_iter=2,
    random_state=0
)

for f in files:
    print(f"\nProcessing {f} ...")

    adata_missing = sc.read_h5ad(f)
    adata_missing.obs_names_make_unique()
    adata_missing.var_names_make_unique()

    # Copy for imputation
    adata_imp = adata_missing.copy()

    # Step 1: Identify HVGs
    sc.pp.highly_variable_genes(adata_imp, n_top_genes=200, flavor="cell_ranger")
    hvg_mask = adata_imp.var["highly_variable"].values
    print(f"Selected {hvg_mask.sum()} HVGs out of {adata_imp.n_vars} genes")

    # Step 2: Convert to dense
    X = adata_imp.X.toarray() if not isinstance(adata_imp.X, np.ndarray) else adata_imp.X.copy()

    # Step 3: Replace zeros with NaN (treat as missing)
    X[X == 0] = np.nan

    # Step 4: Impute only HVGs
    X_hvg = X[:, hvg_mask]
    valid_genes = ~(np.all(np.isnan(X_hvg), axis=0))
    X_hvg_valid = X_hvg[:, valid_genes]

    print(f"Running imputation on HVGs matrix {X_hvg_valid.shape} ...")
    X_hvg_imp_valid = imputer.fit_transform(X_hvg_valid)

    # Step 5: Reconstruct HVG matrix
    X_hvg_imp = np.zeros_like(X_hvg)
    X_hvg_imp[:, valid_genes] = X_hvg_imp_valid
    # dropped HVGs stay zero

    # Step 6: Insert HVGs back into full matrix
    X_final = X.copy()
    X_final[:, hvg_mask] = X_hvg_imp

    # Step 7: Assign back
    adata_imp.X = X_final

    # Save
    fname = os.path.basename(f).replace("adata_dropout", "adata_iterative_imputed")
    outpath = os.path.join(output_dir, fname)
    adata_imp.write(outpath)

    print(f"✅ Saved imputed file to: {outpath}")

print("\nAll files processed with Iterative Imputation (HVGs only)")



Processing dropout_h5ad\adata_dropout_mf10_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run1.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run10.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run2.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run2.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run3.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run4.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run5.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run5.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run6.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run7.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run8.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run8.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run9.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf10_run9.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run1.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run1.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run10.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run2.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run2.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run3.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run4.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run5.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run5.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run6.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run7.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run7.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run8.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run8.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run9.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf20_run9.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run1.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run10.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run2.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run2.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run3.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run4.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run5.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run5.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run6.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run7.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run8.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run8.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run9.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running imputation on HVGs matrix (14746, 200) ...




✅ Saved imputed file to: imputed_iterative\adata_iterative_imputed_mf30_run9.h5ad

All files processed with Iterative Imputation (HVGs only)


In [4]:
pip install --user scikit-misc

Collecting scikit-miscNote: you may need to restart the kernel to use updated packages.

  Downloading scikit_misc-0.5.1-cp312-cp312-win_amd64.whl.metadata (5.0 kB)
Downloading scikit_misc-0.5.1-cp312-cp312-win_amd64.whl (157 kB)
Installing collected packages: scikit-misc
Successfully installed scikit-misc-0.5.1


In [8]:
import scanpy as sc
import numpy as np
import os
from glob import glob
from sklearn.impute import KNNImputer

# -----------------------------
# Input & Output directories
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_knn"
os.makedirs(output_dir, exist_ok=True)

files = glob(os.path.join(input_dir, "*.h5ad"))

# Define KNNImputer
imputer = KNNImputer(
    n_neighbors=10,      # number of neighbors to use
    weights="uniform",   # could also try "distance"
    metric="nan_euclidean"
)

for f in files:
    print(f"\nProcessing {f} ...")

    adata_missing = sc.read_h5ad(f)
    adata_missing.obs_names_make_unique()
    adata_missing.var_names_make_unique()

    # Copy for imputation
    adata_imp = adata_missing.copy()

    # Step 1: Identify HVGs
    sc.pp.highly_variable_genes(adata_imp, n_top_genes=200, flavor="cell_ranger")
    hvg_mask = adata_imp.var["highly_variable"].values
    print(f"Selected {hvg_mask.sum()} HVGs out of {adata_imp.n_vars} genes")

    # Step 2: Convert to dense
    X = adata_imp.X.toarray() if not isinstance(adata_imp.X, np.ndarray) else adata_imp.X.copy()

    # Step 3: Replace zeros with NaN (treat as missing)
    X[X == 0] = np.nan

    # Step 4: Impute only HVGs
    X_hvg = X[:, hvg_mask]
    valid_genes = ~(np.all(np.isnan(X_hvg), axis=0))  # drop all-NaN genes
    X_hvg_valid = X_hvg[:, valid_genes]

    print(f"Running KNNImputer on HVGs matrix {X_hvg_valid.shape} ...")
    X_hvg_imp_valid = imputer.fit_transform(X_hvg_valid)

    # Step 5: Reconstruct HVG matrix
    X_hvg_imp = np.zeros_like(X_hvg)
    X_hvg_imp[:, valid_genes] = X_hvg_imp_valid
    # dropped HVGs stay zero

    # Step 6: Insert HVGs back into full matrix
    X_final = X.copy()
    X_final[:, hvg_mask] = X_hvg_imp

    # Step 7: Assign back
    adata_imp.X = X_final

    # Save
    fname = os.path.basename(f).replace("adata_dropout", "adata_knn_imputed")
    outpath = os.path.join(output_dir, fname)
    adata_imp.write(outpath)

    print(f"✅ Saved imputed file to: {outpath}")

print("\nAll files processed with KNN Imputation (HVGs only)")



Processing dropout_h5ad\adata_dropout_mf10_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run1.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run10.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run2.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run2.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run3.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run4.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run5.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run5.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run6.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run7.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run8.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run8.h5ad

Processing dropout_h5ad\adata_dropout_mf10_run9.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf10_run9.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run1.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run10.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run2.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run2.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run3.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run3.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run4.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run4.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run5.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run5.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run6.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run6.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run7.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run8.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run8.h5ad

Processing dropout_h5ad\adata_dropout_mf20_run9.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf20_run9.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run1.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run10.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run10.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run2.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run2.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run3.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run3.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run4.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run4.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run5.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run5.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run6.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run6.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run7.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run7.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run8.h5ad ...
Selected 200 HVGs out of 2000 genes


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run8.h5ad

Processing dropout_h5ad\adata_dropout_mf30_run9.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running KNNImputer on HVGs matrix (14746, 200) ...
✅ Saved imputed file to: imputed_knn\adata_knn_imputed_mf30_run9.h5ad

All files processed with KNN Imputation (HVGs only)


In [10]:
import scanpy as sc
import numpy as np
import os
from glob import glob
from fancyimpute import SoftImpute

# -----------------------------
# Input & Output directories
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_softimpute"
os.makedirs(output_dir, exist_ok=True)

files = glob(os.path.join(input_dir, "*.h5ad"))

# Define SoftImpute
imputer = SoftImpute(
    max_rank=None,      # None means full rank, or set to e.g. 100
    init_fill_method="zero",  # starting point for missing values
    max_iters=100,
    verbose=True,
    convergence_threshold=1e-5
)

for f in files:
    print(f"\nProcessing {f} ...")

    adata_missing = sc.read_h5ad(f)
    adata_missing.obs_names_make_unique()
    adata_missing.var_names_make_unique()

    # Copy for imputation
    adata_imp = adata_missing.copy()

    # Step 1: Identify HVGs
    sc.pp.highly_variable_genes(adata_imp, n_top_genes=200, flavor="cell_ranger")
    hvg_mask = adata_imp.var["highly_variable"].values
    print(f"Selected {hvg_mask.sum()} HVGs out of {adata_imp.n_vars} genes")

    # Step 2: Convert to dense
    X = adata_imp.X.toarray() if not isinstance(adata_imp.X, np.ndarray) else adata_imp.X.copy()

    # Step 3: Replace zeros with NaN (treat as missing)
    X[X == 0] = np.nan

    # Step 4: Impute only HVGs
    X_hvg = X[:, hvg_mask]
    valid_genes = ~(np.all(np.isnan(X_hvg), axis=0))  # drop all-NaN genes
    X_hvg_valid = X_hvg[:, valid_genes]

    print(f"Running SoftImpute on HVGs matrix {X_hvg_valid.shape} ...")
    X_hvg_imp_valid = imputer.fit_transform(X_hvg_valid)

    # Step 5: Reconstruct HVG matrix
    X_hvg_imp = np.zeros_like(X_hvg)
    X_hvg_imp[:, valid_genes] = X_hvg_imp_valid
    # dropped HVGs stay zero

    # Step 6: Insert HVGs back into full matrix
    X_final = X.copy()
    X_final[:, hvg_mask] = X_hvg_imp

    # Step 7: Assign back
    adata_imp.X = X_final

    # Save
    fname = os.path.basename(f).replace("adata_dropout", "adata_softimpute_imputed")
    outpath = os.path.join(output_dir, fname)
    adata_imp.write(outpath)

    print(f"✅ Saved imputed file to: {outpath}")

print("\nAll files processed with SoftImpute (HVGs only)")


(CVXPY) Aug 26 09:44:31 AM: Encountered unexpected exception importing solver MPAX:
ImportError('DLL load failed while importing utils: A dynamic link library (DLL) initialization routine failed.')



Processing dropout_h5ad\adata_dropout_mf10_run1.h5ad ...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 365.181335




[SoftImpute] Iter 1: observed MAE=0.269784 rank=159
[SoftImpute] Iter 2: observed MAE=0.270434 rank=157
[SoftImpute] Iter 3: observed MAE=0.270995 rank=155
[SoftImpute] Iter 4: observed MAE=0.271501 rank=151
[SoftImpute] Iter 5: observed MAE=0.271868 rank=147
[SoftImpute] Iter 6: observed MAE=0.272086 rank=144
[SoftImpute] Iter 7: observed MAE=0.272367 rank=142
[SoftImpute] Iter 8: observed MAE=0.272634 rank=139
[SoftImpute] Iter 9: observed MAE=0.272904 rank=137
[SoftImpute] Iter 10: observed MAE=0.273160 rank=136
[SoftImpute] Iter 11: observed MAE=0.273366 rank=135
[SoftImpute] Iter 12: observed MAE=0.273494 rank=134
[SoftImpute] Iter 13: observed MAE=0.273401 rank=130
[SoftImpute] Iter 14: observed MAE=0.273212 rank=128
[SoftImpute] Iter 15: observed MAE=0.272972 rank=126
[SoftImpute] Iter 16: observed MAE=0.272765 rank=124
[SoftImpute] Iter 17: observed MAE=0.272442 rank=122
[SoftImpute] Iter 18: observed MAE=0.272164 rank=121
[SoftImpute] Iter 19: observed MAE=0.271856 rank=119
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 361.773712




[SoftImpute] Iter 1: observed MAE=0.266905 rank=154
[SoftImpute] Iter 2: observed MAE=0.267436 rank=151
[SoftImpute] Iter 3: observed MAE=0.267797 rank=147
[SoftImpute] Iter 4: observed MAE=0.268189 rank=146
[SoftImpute] Iter 5: observed MAE=0.268548 rank=143
[SoftImpute] Iter 6: observed MAE=0.268852 rank=140
[SoftImpute] Iter 7: observed MAE=0.269191 rank=137
[SoftImpute] Iter 8: observed MAE=0.269403 rank=134
[SoftImpute] Iter 9: observed MAE=0.269553 rank=131
[SoftImpute] Iter 10: observed MAE=0.269671 rank=129
[SoftImpute] Iter 11: observed MAE=0.269804 rank=129
[SoftImpute] Iter 12: observed MAE=0.269907 rank=128
[SoftImpute] Iter 13: observed MAE=0.269947 rank=127
[SoftImpute] Iter 14: observed MAE=0.269826 rank=124
[SoftImpute] Iter 15: observed MAE=0.269675 rank=122
[SoftImpute] Iter 16: observed MAE=0.269460 rank=120
[SoftImpute] Iter 17: observed MAE=0.269285 rank=119
[SoftImpute] Iter 18: observed MAE=0.269085 rank=117
[SoftImpute] Iter 19: observed MAE=0.268838 rank=115
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 364.962158




[SoftImpute] Iter 1: observed MAE=0.273817 rank=165
[SoftImpute] Iter 2: observed MAE=0.274503 rank=163
[SoftImpute] Iter 3: observed MAE=0.274990 rank=160
[SoftImpute] Iter 4: observed MAE=0.275406 rank=155
[SoftImpute] Iter 5: observed MAE=0.275734 rank=154
[SoftImpute] Iter 6: observed MAE=0.276040 rank=151
[SoftImpute] Iter 7: observed MAE=0.276343 rank=149
[SoftImpute] Iter 8: observed MAE=0.276693 rank=146
[SoftImpute] Iter 9: observed MAE=0.276975 rank=144
[SoftImpute] Iter 10: observed MAE=0.277206 rank=141
[SoftImpute] Iter 11: observed MAE=0.277338 rank=140
[SoftImpute] Iter 12: observed MAE=0.277391 rank=138
[SoftImpute] Iter 13: observed MAE=0.277375 rank=135
[SoftImpute] Iter 14: observed MAE=0.277157 rank=133
[SoftImpute] Iter 15: observed MAE=0.276979 rank=131
[SoftImpute] Iter 16: observed MAE=0.276728 rank=129
[SoftImpute] Iter 17: observed MAE=0.276441 rank=126
[SoftImpute] Iter 18: observed MAE=0.276124 rank=125
[SoftImpute] Iter 19: observed MAE=0.275908 rank=124
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 361.787933




[SoftImpute] Iter 1: observed MAE=0.290959 rank=160
[SoftImpute] Iter 2: observed MAE=0.291668 rank=159
[SoftImpute] Iter 3: observed MAE=0.292212 rank=154
[SoftImpute] Iter 4: observed MAE=0.292626 rank=152
[SoftImpute] Iter 5: observed MAE=0.293033 rank=149
[SoftImpute] Iter 6: observed MAE=0.293342 rank=145
[SoftImpute] Iter 7: observed MAE=0.293620 rank=142
[SoftImpute] Iter 8: observed MAE=0.293980 rank=141
[SoftImpute] Iter 9: observed MAE=0.294345 rank=140
[SoftImpute] Iter 10: observed MAE=0.294680 rank=139
[SoftImpute] Iter 11: observed MAE=0.294892 rank=137
[SoftImpute] Iter 12: observed MAE=0.294975 rank=134
[SoftImpute] Iter 13: observed MAE=0.294878 rank=131
[SoftImpute] Iter 14: observed MAE=0.294721 rank=129
[SoftImpute] Iter 15: observed MAE=0.294585 rank=127
[SoftImpute] Iter 16: observed MAE=0.294410 rank=126
[SoftImpute] Iter 17: observed MAE=0.294246 rank=124
[SoftImpute] Iter 18: observed MAE=0.293964 rank=121
[SoftImpute] Iter 19: observed MAE=0.293665 rank=119
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 357.612518




[SoftImpute] Iter 1: observed MAE=0.268292 rank=161
[SoftImpute] Iter 2: observed MAE=0.268901 rank=160
[SoftImpute] Iter 3: observed MAE=0.269290 rank=155
[SoftImpute] Iter 4: observed MAE=0.269639 rank=154
[SoftImpute] Iter 5: observed MAE=0.270056 rank=151
[SoftImpute] Iter 6: observed MAE=0.270333 rank=147
[SoftImpute] Iter 7: observed MAE=0.270509 rank=143
[SoftImpute] Iter 8: observed MAE=0.270730 rank=141
[SoftImpute] Iter 9: observed MAE=0.270920 rank=139
[SoftImpute] Iter 10: observed MAE=0.271133 rank=137
[SoftImpute] Iter 11: observed MAE=0.271254 rank=135
[SoftImpute] Iter 12: observed MAE=0.271262 rank=132
[SoftImpute] Iter 13: observed MAE=0.271237 rank=131
[SoftImpute] Iter 14: observed MAE=0.271142 rank=128
[SoftImpute] Iter 15: observed MAE=0.270878 rank=127
[SoftImpute] Iter 16: observed MAE=0.270701 rank=125
[SoftImpute] Iter 17: observed MAE=0.270480 rank=123
[SoftImpute] Iter 18: observed MAE=0.270267 rank=121
[SoftImpute] Iter 19: observed MAE=0.270009 rank=119
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 364.323425




[SoftImpute] Iter 1: observed MAE=0.269756 rank=160
[SoftImpute] Iter 2: observed MAE=0.270429 rank=156
[SoftImpute] Iter 3: observed MAE=0.270952 rank=154
[SoftImpute] Iter 4: observed MAE=0.271430 rank=154
[SoftImpute] Iter 5: observed MAE=0.271835 rank=150
[SoftImpute] Iter 6: observed MAE=0.272182 rank=148
[SoftImpute] Iter 7: observed MAE=0.272522 rank=145
[SoftImpute] Iter 8: observed MAE=0.272785 rank=143
[SoftImpute] Iter 9: observed MAE=0.273067 rank=141
[SoftImpute] Iter 10: observed MAE=0.273281 rank=140
[SoftImpute] Iter 11: observed MAE=0.273484 rank=138
[SoftImpute] Iter 12: observed MAE=0.273498 rank=136
[SoftImpute] Iter 13: observed MAE=0.273396 rank=134
[SoftImpute] Iter 14: observed MAE=0.273245 rank=131
[SoftImpute] Iter 15: observed MAE=0.273039 rank=129
[SoftImpute] Iter 16: observed MAE=0.272753 rank=126
[SoftImpute] Iter 17: observed MAE=0.272458 rank=124
[SoftImpute] Iter 18: observed MAE=0.272171 rank=122
[SoftImpute] Iter 19: observed MAE=0.271895 rank=120
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 363.938629




[SoftImpute] Iter 1: observed MAE=0.268249 rank=162
[SoftImpute] Iter 2: observed MAE=0.268920 rank=158
[SoftImpute] Iter 3: observed MAE=0.269437 rank=156
[SoftImpute] Iter 4: observed MAE=0.269970 rank=154
[SoftImpute] Iter 5: observed MAE=0.270343 rank=150
[SoftImpute] Iter 6: observed MAE=0.270598 rank=147
[SoftImpute] Iter 7: observed MAE=0.270871 rank=145
[SoftImpute] Iter 8: observed MAE=0.271147 rank=141
[SoftImpute] Iter 9: observed MAE=0.271452 rank=140
[SoftImpute] Iter 10: observed MAE=0.271781 rank=139
[SoftImpute] Iter 11: observed MAE=0.271949 rank=137
[SoftImpute] Iter 12: observed MAE=0.272013 rank=135
[SoftImpute] Iter 13: observed MAE=0.271947 rank=132
[SoftImpute] Iter 14: observed MAE=0.271840 rank=130
[SoftImpute] Iter 15: observed MAE=0.271720 rank=129
[SoftImpute] Iter 16: observed MAE=0.271509 rank=125
[SoftImpute] Iter 17: observed MAE=0.271179 rank=123
[SoftImpute] Iter 18: observed MAE=0.270893 rank=122
[SoftImpute] Iter 19: observed MAE=0.270598 rank=120
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 364.581360




[SoftImpute] Iter 1: observed MAE=0.290060 rank=149
[SoftImpute] Iter 2: observed MAE=0.290798 rank=147
[SoftImpute] Iter 3: observed MAE=0.291406 rank=145
[SoftImpute] Iter 4: observed MAE=0.291929 rank=142
[SoftImpute] Iter 5: observed MAE=0.292414 rank=139
[SoftImpute] Iter 6: observed MAE=0.292852 rank=136
[SoftImpute] Iter 7: observed MAE=0.293183 rank=134
[SoftImpute] Iter 8: observed MAE=0.293605 rank=133
[SoftImpute] Iter 9: observed MAE=0.293925 rank=131
[SoftImpute] Iter 10: observed MAE=0.294195 rank=129
[SoftImpute] Iter 11: observed MAE=0.294418 rank=127
[SoftImpute] Iter 12: observed MAE=0.294529 rank=125
[SoftImpute] Iter 13: observed MAE=0.294602 rank=124
[SoftImpute] Iter 14: observed MAE=0.294603 rank=121
[SoftImpute] Iter 15: observed MAE=0.294453 rank=119
[SoftImpute] Iter 16: observed MAE=0.294222 rank=116
[SoftImpute] Iter 17: observed MAE=0.294001 rank=116
[SoftImpute] Iter 18: observed MAE=0.293814 rank=114
[SoftImpute] Iter 19: observed MAE=0.293591 rank=112
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 360.744141
[SoftImpute] Iter 1: observed MAE=0.265596 rank=157
[SoftImpute] Iter 2: observed MAE=0.266199 rank=152
[SoftImpute] Iter 3: observed MAE=0.266603 rank=151
[SoftImpute] Iter 4: observed MAE=0.267018 rank=147
[SoftImpute] Iter 5: observed MAE=0.267353 rank=144
[SoftImpute] Iter 6: observed MAE=0.267686 rank=140
[SoftImpute] Iter 7: observed MAE=0.267967 rank=137
[SoftImpute] Iter 8: observed MAE=0.268280 rank=135
[SoftImpute] Iter 9: observed MAE=0.268651 rank=133
[SoftImpute] Iter 10: observed MAE=0.268947 rank=131
[SoftImpute] Iter 11: observed MAE=0.269160 rank=130
[SoftImpute] Iter 12: observed MAE=0.269297 rank=129
[SoftImpute] Iter 13: observed MAE=0.269227 rank=126
[SoftImpute] Iter 14: observed MAE=0.269123 rank=124
[SoftImpute] Iter 15: observed MAE=0.268909 rank=121
[SoftImpute] Iter 16: observed MAE=0.268692 rank=120
[SoftImpute] Iter 17: observed MAE=0.268498 rank=118
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 360.715790
[SoftImpute] Iter 1: observed MAE=0.269868 rank=157
[SoftImpute] Iter 2: observed MAE=0.270429 rank=152
[SoftImpute] Iter 3: observed MAE=0.270926 rank=151
[SoftImpute] Iter 4: observed MAE=0.271265 rank=147
[SoftImpute] Iter 5: observed MAE=0.271584 rank=143
[SoftImpute] Iter 6: observed MAE=0.271816 rank=141
[SoftImpute] Iter 7: observed MAE=0.272086 rank=137
[SoftImpute] Iter 8: observed MAE=0.272385 rank=136
[SoftImpute] Iter 9: observed MAE=0.272639 rank=134
[SoftImpute] Iter 10: observed MAE=0.272885 rank=133
[SoftImpute] Iter 11: observed MAE=0.273074 rank=131
[SoftImpute] Iter 12: observed MAE=0.273135 rank=130
[SoftImpute] Iter 13: observed MAE=0.273152 rank=127
[SoftImpute] Iter 14: observed MAE=0.273052 rank=125
[SoftImpute] Iter 15: observed MAE=0.272950 rank=124
[SoftImpute] Iter 16: observed MAE=0.272766 rank=122
[SoftImpute] Iter 1

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 327.586487
[SoftImpute] Iter 1: observed MAE=0.249884 rank=177
[SoftImpute] Iter 2: observed MAE=0.250118 rank=173
[SoftImpute] Iter 3: observed MAE=0.250263 rank=172
[SoftImpute] Iter 4: observed MAE=0.250326 rank=169
[SoftImpute] Iter 5: observed MAE=0.250338 rank=166
[SoftImpute] Iter 6: observed MAE=0.250347 rank=162
[SoftImpute] Iter 7: observed MAE=0.250330 rank=158
[SoftImpute] Iter 8: observed MAE=0.250262 rank=156
[SoftImpute] Iter 9: observed MAE=0.250192 rank=153
[SoftImpute] Iter 10: observed MAE=0.250145 rank=151
[SoftImpute] Iter 11: observed MAE=0.250128 rank=148
[SoftImpute] Iter 12: observed MAE=0.250107 rank=145
[SoftImpute] Iter 13: observed MAE=0.250036 rank=143
[SoftImpute] Iter 14: observed MAE=0.249980 rank=142
[SoftImpute] Iter 15: observed MAE=0.249850 rank=139
[SoftImpute] Iter 16: observed MAE=0.249725 rank=137
[SoftImpute] Iter 17: observed MAE=0.249495 rank=134
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...




[SoftImpute] Max Singular Value of X_init = 325.797882
[SoftImpute] Iter 1: observed MAE=0.247506 rank=167
[SoftImpute] Iter 2: observed MAE=0.247722 rank=165
[SoftImpute] Iter 3: observed MAE=0.247809 rank=162
[SoftImpute] Iter 4: observed MAE=0.247850 rank=161
[SoftImpute] Iter 5: observed MAE=0.247905 rank=158
[SoftImpute] Iter 6: observed MAE=0.247939 rank=157
[SoftImpute] Iter 7: observed MAE=0.247972 rank=154
[SoftImpute] Iter 8: observed MAE=0.247934 rank=151
[SoftImpute] Iter 9: observed MAE=0.247849 rank=147
[SoftImpute] Iter 10: observed MAE=0.247783 rank=145
[SoftImpute] Iter 11: observed MAE=0.247773 rank=143
[SoftImpute] Iter 12: observed MAE=0.247791 rank=140
[SoftImpute] Iter 13: observed MAE=0.247814 rank=140
[SoftImpute] Iter 14: observed MAE=0.247851 rank=138
[SoftImpute] Iter 15: observed MAE=0.247859 rank=137
[SoftImpute] Iter 16: observed MAE=0.247871 rank=136
[SoftImpute] Iter 17: observed MAE=0.247754 rank=133
[SoftImpute] Iter 18: observed MAE=0.247599 rank=131


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 326.711884




[SoftImpute] Iter 1: observed MAE=0.249112 rank=172
[SoftImpute] Iter 2: observed MAE=0.249354 rank=169
[SoftImpute] Iter 3: observed MAE=0.249399 rank=167
[SoftImpute] Iter 4: observed MAE=0.249383 rank=164
[SoftImpute] Iter 5: observed MAE=0.249362 rank=161
[SoftImpute] Iter 6: observed MAE=0.249248 rank=159
[SoftImpute] Iter 7: observed MAE=0.249215 rank=158
[SoftImpute] Iter 8: observed MAE=0.249236 rank=155
[SoftImpute] Iter 9: observed MAE=0.249253 rank=152
[SoftImpute] Iter 10: observed MAE=0.249253 rank=150
[SoftImpute] Iter 11: observed MAE=0.249286 rank=148
[SoftImpute] Iter 12: observed MAE=0.249339 rank=146
[SoftImpute] Iter 13: observed MAE=0.249361 rank=144
[SoftImpute] Iter 14: observed MAE=0.249331 rank=141
[SoftImpute] Iter 15: observed MAE=0.249266 rank=139
[SoftImpute] Iter 16: observed MAE=0.249157 rank=136
[SoftImpute] Iter 17: observed MAE=0.249000 rank=135
[SoftImpute] Iter 18: observed MAE=0.248784 rank=132
[SoftImpute] Iter 19: observed MAE=0.248579 rank=131
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 326.609863




[SoftImpute] Iter 1: observed MAE=0.246502 rank=169
[SoftImpute] Iter 2: observed MAE=0.246734 rank=165
[SoftImpute] Iter 3: observed MAE=0.246825 rank=164
[SoftImpute] Iter 4: observed MAE=0.246926 rank=162
[SoftImpute] Iter 5: observed MAE=0.246942 rank=158
[SoftImpute] Iter 6: observed MAE=0.246953 rank=155
[SoftImpute] Iter 7: observed MAE=0.246986 rank=153
[SoftImpute] Iter 8: observed MAE=0.246981 rank=150
[SoftImpute] Iter 9: observed MAE=0.247001 rank=149
[SoftImpute] Iter 10: observed MAE=0.247007 rank=147
[SoftImpute] Iter 11: observed MAE=0.247073 rank=145
[SoftImpute] Iter 12: observed MAE=0.247139 rank=143
[SoftImpute] Iter 13: observed MAE=0.247180 rank=141
[SoftImpute] Iter 14: observed MAE=0.247172 rank=139
[SoftImpute] Iter 15: observed MAE=0.247112 rank=137
[SoftImpute] Iter 16: observed MAE=0.246977 rank=134
[SoftImpute] Iter 17: observed MAE=0.246826 rank=133
[SoftImpute] Iter 18: observed MAE=0.246710 rank=131
[SoftImpute] Iter 19: observed MAE=0.246546 rank=130
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 326.191650
[SoftImpute] Iter 1: observed MAE=0.261341 rank=165
[SoftImpute] Iter 2: observed MAE=0.261572 rank=164
[SoftImpute] Iter 3: observed MAE=0.261765 rank=163
[SoftImpute] Iter 4: observed MAE=0.261901 rank=160
[SoftImpute] Iter 5: observed MAE=0.261935 rank=158
[SoftImpute] Iter 6: observed MAE=0.261952 rank=154
[SoftImpute] Iter 7: observed MAE=0.261813 rank=149
[SoftImpute] Iter 8: observed MAE=0.261672 rank=146
[SoftImpute] Iter 9: observed MAE=0.261610 rank=143
[SoftImpute] Iter 10: observed MAE=0.261614 rank=141
[SoftImpute] Iter 11: observed MAE=0.261601 rank=139
[SoftImpute] Iter 12: observed MAE=0.261616 rank=138
[SoftImpute] Iter 13: observed MAE=0.261639 rank=136
[SoftImpute] Iter 14: observed MAE=0.261651 rank=135
[SoftImpute] Iter 15: observed MAE=0.261616 rank=133
[SoftImpute] Iter 16: observed MAE=0.261443 rank=131
[SoftImpute] Iter 17: observed MAE=0.261298 rank=129
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 325.789398




[SoftImpute] Iter 1: observed MAE=0.247443 rank=165
[SoftImpute] Iter 2: observed MAE=0.247678 rank=163
[SoftImpute] Iter 3: observed MAE=0.247803 rank=161
[SoftImpute] Iter 4: observed MAE=0.247833 rank=158
[SoftImpute] Iter 5: observed MAE=0.247822 rank=157
[SoftImpute] Iter 6: observed MAE=0.247840 rank=154
[SoftImpute] Iter 7: observed MAE=0.247863 rank=149
[SoftImpute] Iter 8: observed MAE=0.247756 rank=146
[SoftImpute] Iter 9: observed MAE=0.247696 rank=143
[SoftImpute] Iter 10: observed MAE=0.247640 rank=141
[SoftImpute] Iter 11: observed MAE=0.247625 rank=140
[SoftImpute] Iter 12: observed MAE=0.247623 rank=138
[SoftImpute] Iter 13: observed MAE=0.247644 rank=137
[SoftImpute] Iter 14: observed MAE=0.247653 rank=136
[SoftImpute] Iter 15: observed MAE=0.247622 rank=133
[SoftImpute] Iter 16: observed MAE=0.247482 rank=132
[SoftImpute] Iter 17: observed MAE=0.247394 rank=130
[SoftImpute] Iter 18: observed MAE=0.247269 rank=129
[SoftImpute] Iter 19: observed MAE=0.247082 rank=126
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...




[SoftImpute] Max Singular Value of X_init = 325.086182
[SoftImpute] Iter 1: observed MAE=0.260939 rank=162
[SoftImpute] Iter 2: observed MAE=0.261151 rank=160
[SoftImpute] Iter 3: observed MAE=0.261224 rank=157
[SoftImpute] Iter 4: observed MAE=0.261235 rank=154
[SoftImpute] Iter 5: observed MAE=0.261158 rank=153
[SoftImpute] Iter 6: observed MAE=0.261138 rank=151
[SoftImpute] Iter 7: observed MAE=0.261112 rank=148
[SoftImpute] Iter 8: observed MAE=0.261081 rank=146
[SoftImpute] Iter 9: observed MAE=0.261052 rank=142
[SoftImpute] Iter 10: observed MAE=0.261007 rank=140
[SoftImpute] Iter 11: observed MAE=0.261041 rank=138
[SoftImpute] Iter 12: observed MAE=0.261077 rank=136
[SoftImpute] Iter 13: observed MAE=0.261096 rank=135
[SoftImpute] Iter 14: observed MAE=0.261132 rank=134
[SoftImpute] Iter 15: observed MAE=0.261161 rank=132
[SoftImpute] Iter 16: observed MAE=0.261046 rank=131
[SoftImpute] Iter 17: observed MAE=0.260937 rank=128
[SoftImpute] Iter 18: observed MAE=0.260753 rank=126


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 329.590912




[SoftImpute] Iter 1: observed MAE=0.245714 rank=159
[SoftImpute] Iter 2: observed MAE=0.245943 rank=159
[SoftImpute] Iter 3: observed MAE=0.246082 rank=156
[SoftImpute] Iter 4: observed MAE=0.246124 rank=155
[SoftImpute] Iter 5: observed MAE=0.246169 rank=151
[SoftImpute] Iter 6: observed MAE=0.246199 rank=149
[SoftImpute] Iter 7: observed MAE=0.246147 rank=147
[SoftImpute] Iter 8: observed MAE=0.246176 rank=145
[SoftImpute] Iter 9: observed MAE=0.246226 rank=142
[SoftImpute] Iter 10: observed MAE=0.246268 rank=139
[SoftImpute] Iter 11: observed MAE=0.246313 rank=137
[SoftImpute] Iter 12: observed MAE=0.246370 rank=135
[SoftImpute] Iter 13: observed MAE=0.246426 rank=134
[SoftImpute] Iter 14: observed MAE=0.246452 rank=132
[SoftImpute] Iter 15: observed MAE=0.246444 rank=131
[SoftImpute] Iter 16: observed MAE=0.246380 rank=128
[SoftImpute] Iter 17: observed MAE=0.246238 rank=126
[SoftImpute] Iter 18: observed MAE=0.246086 rank=124
[SoftImpute] Iter 19: observed MAE=0.245942 rank=124
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 327.762421




[SoftImpute] Iter 1: observed MAE=0.265213 rank=163
[SoftImpute] Iter 2: observed MAE=0.265421 rank=160
[SoftImpute] Iter 3: observed MAE=0.265508 rank=156
[SoftImpute] Iter 4: observed MAE=0.265473 rank=155
[SoftImpute] Iter 5: observed MAE=0.265434 rank=151
[SoftImpute] Iter 6: observed MAE=0.265299 rank=149
[SoftImpute] Iter 7: observed MAE=0.265169 rank=147
[SoftImpute] Iter 8: observed MAE=0.265037 rank=143
[SoftImpute] Iter 9: observed MAE=0.264927 rank=140
[SoftImpute] Iter 10: observed MAE=0.264939 rank=139
[SoftImpute] Iter 11: observed MAE=0.264987 rank=137
[SoftImpute] Iter 12: observed MAE=0.265038 rank=135
[SoftImpute] Iter 13: observed MAE=0.265114 rank=134
[SoftImpute] Iter 14: observed MAE=0.265173 rank=133
[SoftImpute] Iter 15: observed MAE=0.265178 rank=130
[SoftImpute] Iter 16: observed MAE=0.265034 rank=128
[SoftImpute] Iter 17: observed MAE=0.264921 rank=126
[SoftImpute] Iter 18: observed MAE=0.264742 rank=124
[SoftImpute] Iter 19: observed MAE=0.264556 rank=123
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 329.085449




[SoftImpute] Iter 1: observed MAE=0.248026 rank=167
[SoftImpute] Iter 2: observed MAE=0.248285 rank=165
[SoftImpute] Iter 3: observed MAE=0.248411 rank=160
[SoftImpute] Iter 4: observed MAE=0.248426 rank=158
[SoftImpute] Iter 5: observed MAE=0.248452 rank=156
[SoftImpute] Iter 6: observed MAE=0.248433 rank=152
[SoftImpute] Iter 7: observed MAE=0.248363 rank=149
[SoftImpute] Iter 8: observed MAE=0.248298 rank=146
[SoftImpute] Iter 9: observed MAE=0.248287 rank=145
[SoftImpute] Iter 10: observed MAE=0.248326 rank=144
[SoftImpute] Iter 11: observed MAE=0.248369 rank=142
[SoftImpute] Iter 12: observed MAE=0.248448 rank=141
[SoftImpute] Iter 13: observed MAE=0.248561 rank=141
[SoftImpute] Iter 14: observed MAE=0.248636 rank=139
[SoftImpute] Iter 15: observed MAE=0.248626 rank=137
[SoftImpute] Iter 16: observed MAE=0.248535 rank=134
[SoftImpute] Iter 17: observed MAE=0.248386 rank=133
[SoftImpute] Iter 18: observed MAE=0.248227 rank=130
[SoftImpute] Iter 19: observed MAE=0.248000 rank=128
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...




[SoftImpute] Max Singular Value of X_init = 289.446198
[SoftImpute] Iter 1: observed MAE=0.233399 rank=179
[SoftImpute] Iter 2: observed MAE=0.233383 rank=177
[SoftImpute] Iter 3: observed MAE=0.233337 rank=177
[SoftImpute] Iter 4: observed MAE=0.233244 rank=172
[SoftImpute] Iter 5: observed MAE=0.233089 rank=170
[SoftImpute] Iter 6: observed MAE=0.232881 rank=167
[SoftImpute] Iter 7: observed MAE=0.232649 rank=165
[SoftImpute] Iter 8: observed MAE=0.232373 rank=164
[SoftImpute] Iter 9: observed MAE=0.232137 rank=162
[SoftImpute] Iter 10: observed MAE=0.231927 rank=159
[SoftImpute] Iter 11: observed MAE=0.231742 rank=158
[SoftImpute] Iter 12: observed MAE=0.231599 rank=156
[SoftImpute] Iter 13: observed MAE=0.231424 rank=153
[SoftImpute] Iter 14: observed MAE=0.231284 rank=149
[SoftImpute] Iter 15: observed MAE=0.231110 rank=148
[SoftImpute] Iter 16: observed MAE=0.230988 rank=146
[SoftImpute] Iter 17: observed MAE=0.230866 rank=143
[SoftImpute] Iter 18: observed MAE=0.230755 rank=142


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 304.501587




[SoftImpute] Iter 1: observed MAE=0.230769 rank=166
[SoftImpute] Iter 2: observed MAE=0.230858 rank=164
[SoftImpute] Iter 3: observed MAE=0.230884 rank=163
[SoftImpute] Iter 4: observed MAE=0.230853 rank=163
[SoftImpute] Iter 5: observed MAE=0.230714 rank=159
[SoftImpute] Iter 6: observed MAE=0.230491 rank=157
[SoftImpute] Iter 7: observed MAE=0.230317 rank=155
[SoftImpute] Iter 8: observed MAE=0.230113 rank=153
[SoftImpute] Iter 9: observed MAE=0.229901 rank=150
[SoftImpute] Iter 10: observed MAE=0.229718 rank=149
[SoftImpute] Iter 11: observed MAE=0.229549 rank=147
[SoftImpute] Iter 12: observed MAE=0.229397 rank=146
[SoftImpute] Iter 13: observed MAE=0.229265 rank=144
[SoftImpute] Iter 14: observed MAE=0.229181 rank=144
[SoftImpute] Iter 15: observed MAE=0.229115 rank=142
[SoftImpute] Iter 16: observed MAE=0.228989 rank=140
[SoftImpute] Iter 17: observed MAE=0.228854 rank=138
[SoftImpute] Iter 18: observed MAE=0.228740 rank=137
[SoftImpute] Iter 19: observed MAE=0.228573 rank=134
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 289.800995




[SoftImpute] Iter 1: observed MAE=0.240550 rank=177
[SoftImpute] Iter 2: observed MAE=0.240583 rank=174
[SoftImpute] Iter 3: observed MAE=0.240538 rank=174
[SoftImpute] Iter 4: observed MAE=0.240476 rank=171
[SoftImpute] Iter 5: observed MAE=0.240293 rank=166
[SoftImpute] Iter 6: observed MAE=0.240131 rank=164
[SoftImpute] Iter 7: observed MAE=0.239941 rank=161
[SoftImpute] Iter 8: observed MAE=0.239758 rank=158
[SoftImpute] Iter 9: observed MAE=0.239527 rank=154
[SoftImpute] Iter 10: observed MAE=0.239324 rank=151
[SoftImpute] Iter 11: observed MAE=0.239127 rank=150
[SoftImpute] Iter 12: observed MAE=0.238992 rank=150
[SoftImpute] Iter 13: observed MAE=0.238921 rank=149
[SoftImpute] Iter 14: observed MAE=0.238832 rank=147
[SoftImpute] Iter 15: observed MAE=0.238729 rank=146
[SoftImpute] Iter 16: observed MAE=0.238614 rank=144
[SoftImpute] Iter 17: observed MAE=0.238483 rank=142
[SoftImpute] Iter 18: observed MAE=0.238326 rank=139
[SoftImpute] Iter 19: observed MAE=0.238184 rank=137
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 294.829254
[SoftImpute] Iter 1: observed MAE=0.242000 rank=175
[SoftImpute] Iter 2: observed MAE=0.242144 rank=173
[SoftImpute] Iter 3: observed MAE=0.242209 rank=171
[SoftImpute] Iter 4: observed MAE=0.242172 rank=167
[SoftImpute] Iter 5: observed MAE=0.242095 rank=166
[SoftImpute] Iter 6: observed MAE=0.241988 rank=162
[SoftImpute] Iter 7: observed MAE=0.241862 rank=161
[SoftImpute] Iter 8: observed MAE=0.241730 rank=159
[SoftImpute] Iter 9: observed MAE=0.241558 rank=157
[SoftImpute] Iter 10: observed MAE=0.241377 rank=154
[SoftImpute] Iter 11: observed MAE=0.241218 rank=151
[SoftImpute] Iter 12: observed MAE=0.241064 rank=149
[SoftImpute] Iter 13: observed MAE=0.240915 rank=148
[SoftImpute] Iter 14: observed MAE=0.240772 rank=146
[SoftImpute] Iter 15: observed MAE=0.240638 rank=144
[SoftImpute] Iter 16: observed MAE=0.240513 rank=141
[SoftImpute] Iter 17: observed MAE=0.240311 rank=139
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 296.561981




[SoftImpute] Iter 1: observed MAE=0.232088 rank=175
[SoftImpute] Iter 2: observed MAE=0.232201 rank=174
[SoftImpute] Iter 3: observed MAE=0.232238 rank=173
[SoftImpute] Iter 4: observed MAE=0.232143 rank=168
[SoftImpute] Iter 5: observed MAE=0.232000 rank=166
[SoftImpute] Iter 6: observed MAE=0.231845 rank=165
[SoftImpute] Iter 7: observed MAE=0.231625 rank=162
[SoftImpute] Iter 8: observed MAE=0.231350 rank=159
[SoftImpute] Iter 9: observed MAE=0.231117 rank=158
[SoftImpute] Iter 10: observed MAE=0.230886 rank=154
[SoftImpute] Iter 11: observed MAE=0.230671 rank=150
[SoftImpute] Iter 12: observed MAE=0.230470 rank=148
[SoftImpute] Iter 13: observed MAE=0.230298 rank=146
[SoftImpute] Iter 14: observed MAE=0.230173 rank=145
[SoftImpute] Iter 15: observed MAE=0.230057 rank=143
[SoftImpute] Iter 16: observed MAE=0.229948 rank=142
[SoftImpute] Iter 17: observed MAE=0.229775 rank=139
[SoftImpute] Iter 18: observed MAE=0.229591 rank=136
[SoftImpute] Iter 19: observed MAE=0.229386 rank=134
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 294.606873




[SoftImpute] Iter 1: observed MAE=0.236374 rank=170
[SoftImpute] Iter 2: observed MAE=0.236417 rank=168
[SoftImpute] Iter 3: observed MAE=0.236394 rank=166
[SoftImpute] Iter 4: observed MAE=0.236276 rank=162
[SoftImpute] Iter 5: observed MAE=0.236113 rank=161
[SoftImpute] Iter 6: observed MAE=0.235967 rank=159
[SoftImpute] Iter 7: observed MAE=0.235782 rank=156
[SoftImpute] Iter 8: observed MAE=0.235616 rank=154
[SoftImpute] Iter 9: observed MAE=0.235379 rank=152
[SoftImpute] Iter 10: observed MAE=0.235163 rank=150
[SoftImpute] Iter 11: observed MAE=0.234957 rank=149
[SoftImpute] Iter 12: observed MAE=0.234772 rank=146
[SoftImpute] Iter 13: observed MAE=0.234661 rank=145
[SoftImpute] Iter 14: observed MAE=0.234542 rank=142
[SoftImpute] Iter 15: observed MAE=0.234490 rank=141
[SoftImpute] Iter 16: observed MAE=0.234443 rank=139
[SoftImpute] Iter 17: observed MAE=0.234372 rank=138
[SoftImpute] Iter 18: observed MAE=0.234273 rank=136
[SoftImpute] Iter 19: observed MAE=0.234165 rank=134
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 288.746521




[SoftImpute] Iter 1: observed MAE=0.231918 rank=171
[SoftImpute] Iter 2: observed MAE=0.231922 rank=169
[SoftImpute] Iter 3: observed MAE=0.231848 rank=168
[SoftImpute] Iter 4: observed MAE=0.231706 rank=163
[SoftImpute] Iter 5: observed MAE=0.231475 rank=162
[SoftImpute] Iter 6: observed MAE=0.231194 rank=158
[SoftImpute] Iter 7: observed MAE=0.230890 rank=154
[SoftImpute] Iter 8: observed MAE=0.230622 rank=153
[SoftImpute] Iter 9: observed MAE=0.230389 rank=152
[SoftImpute] Iter 10: observed MAE=0.230180 rank=151
[SoftImpute] Iter 11: observed MAE=0.229971 rank=149
[SoftImpute] Iter 12: observed MAE=0.229758 rank=146
[SoftImpute] Iter 13: observed MAE=0.229482 rank=143
[SoftImpute] Iter 14: observed MAE=0.229271 rank=142
[SoftImpute] Iter 15: observed MAE=0.229094 rank=139
[SoftImpute] Iter 16: observed MAE=0.228911 rank=138
[SoftImpute] Iter 17: observed MAE=0.228718 rank=136
[SoftImpute] Iter 18: observed MAE=0.228551 rank=135
[SoftImpute] Iter 19: observed MAE=0.228406 rank=134
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 289.161194
[SoftImpute] Iter 1: observed MAE=0.231653 rank=171
[SoftImpute] Iter 2: observed MAE=0.231744 rank=170
[SoftImpute] Iter 3: observed MAE=0.231761 rank=167
[SoftImpute] Iter 4: observed MAE=0.231678 rank=167
[SoftImpute] Iter 5: observed MAE=0.231589 rank=165
[SoftImpute] Iter 6: observed MAE=0.231491 rank=164
[SoftImpute] Iter 7: observed MAE=0.231364 rank=163
[SoftImpute] Iter 8: observed MAE=0.231144 rank=159
[SoftImpute] Iter 9: observed MAE=0.230897 rank=156
[SoftImpute] Iter 10: observed MAE=0.230664 rank=154
[SoftImpute] Iter 11: observed MAE=0.230453 rank=152
[SoftImpute] Iter 12: observed MAE=0.230291 rank=151
[SoftImpute] Iter 13: observed MAE=0.230162 rank=149
[SoftImpute] Iter 14: observed MAE=0.230023 rank=147
[SoftImpute] Iter 15: observed MAE=0.229910 rank=145
[SoftImpute] Iter 16: observed MAE=0.229781 rank=143
[SoftImpute] Iter 1

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Running SoftImpute on HVGs matrix (14746, 200) ...
[SoftImpute] Max Singular Value of X_init = 286.116241




[SoftImpute] Iter 1: observed MAE=0.231145 rank=170
[SoftImpute] Iter 2: observed MAE=0.231157 rank=169
[SoftImpute] Iter 3: observed MAE=0.231084 rank=166
[SoftImpute] Iter 4: observed MAE=0.230978 rank=166
[SoftImpute] Iter 5: observed MAE=0.230845 rank=163
[SoftImpute] Iter 6: observed MAE=0.230663 rank=161
[SoftImpute] Iter 7: observed MAE=0.230456 rank=158
[SoftImpute] Iter 8: observed MAE=0.230258 rank=157
[SoftImpute] Iter 9: observed MAE=0.230061 rank=154
[SoftImpute] Iter 10: observed MAE=0.229900 rank=153
[SoftImpute] Iter 11: observed MAE=0.229740 rank=150
[SoftImpute] Iter 12: observed MAE=0.229534 rank=147
[SoftImpute] Iter 13: observed MAE=0.229382 rank=146
[SoftImpute] Iter 14: observed MAE=0.229253 rank=145
[SoftImpute] Iter 15: observed MAE=0.229107 rank=142
[SoftImpute] Iter 16: observed MAE=0.228923 rank=140
[SoftImpute] Iter 17: observed MAE=0.228795 rank=139
[SoftImpute] Iter 18: observed MAE=0.228669 rank=136
[SoftImpute] Iter 19: observed MAE=0.228521 rank=135
[S

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


Selected 200 HVGs out of 2000 genes
Running SoftImpute on HVGs matrix (14746, 200) ...




[SoftImpute] Max Singular Value of X_init = 290.512604
[SoftImpute] Iter 1: observed MAE=0.240997 rank=179
[SoftImpute] Iter 2: observed MAE=0.240999 rank=176
[SoftImpute] Iter 3: observed MAE=0.240977 rank=174
[SoftImpute] Iter 4: observed MAE=0.240909 rank=172
[SoftImpute] Iter 5: observed MAE=0.240737 rank=166
[SoftImpute] Iter 6: observed MAE=0.240503 rank=164
[SoftImpute] Iter 7: observed MAE=0.240312 rank=162
[SoftImpute] Iter 8: observed MAE=0.240145 rank=160
[SoftImpute] Iter 9: observed MAE=0.239951 rank=158
[SoftImpute] Iter 10: observed MAE=0.239725 rank=154
[SoftImpute] Iter 11: observed MAE=0.239472 rank=152
[SoftImpute] Iter 12: observed MAE=0.239264 rank=151
[SoftImpute] Iter 13: observed MAE=0.239072 rank=148
[SoftImpute] Iter 14: observed MAE=0.238879 rank=146
[SoftImpute] Iter 15: observed MAE=0.238674 rank=143
[SoftImpute] Iter 16: observed MAE=0.238522 rank=141
[SoftImpute] Iter 17: observed MAE=0.238404 rank=140
[SoftImpute] Iter 18: observed MAE=0.238294 rank=139


In [9]:
pip install fancyimpute


Collecting fancyimpute
  Using cached fancyimpute-0.7.0.tar.gz (25 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting knnimpute>=0.1.0 (from fancyimpute)
  Using cached knnimpute-0.1.0.tar.gz (8.3 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting cvxpy (from fancyimpute)
  Downloading cvxpy-1.7.2-cp312-cp312-win_amd64.whl.metadata (9.8 kB)
Collecting cvxopt (from fancyimpute)
  Downloading cvxopt-1.3.2-cp312-cp312-win_amd64.whl.metadata (1.4 kB)
Collecting nose (from fancyimpute)
  Using cached nose-1.3.7-py3-none-any.whl.metadata (1.7 kB)
Collecting osqp>=0.6.2 (from cvxpy->fancyimpute)
  Downloading osqp-1.0.4-cp312-cp312-win_amd64.whl.metadata (2.1 kB)
Collecting clarabel>=0.5.0 (from cvxpy->fancyimpute)
  Downloading clarabel-0.11.1-cp39-abi3-win_amd64.whl.metadata (4.9 kB)
Collecting scs>=3.2.4.post1 (from cvxpy->fancyimpute)
  Downloading scs-3.2.8-

  DEPRECATION: Building 'fancyimpute' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'fancyimpute'. Discussion can be found at https://github.com/pypa/pip/issues/6334
  DEPRECATION: Building 'knnimpute' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'knnimpute'. Discussion can be found at https://github.com/pypa/pip/issues/6334


In [12]:
import scanpy as sc
import torch
import torch.nn as nn
import os
from glob import glob

# -----------------------------
# Simple GAN for Imputation
# -----------------------------
class Generator(nn.Module):
    def __init__(self, input_dim, hidden_dim=256):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim),
            nn.ReLU()
        )

    def forward(self, x):
        return self.net(x)

class Discriminator(nn.Module):
    def __init__(self, input_dim, hidden_dim=256):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

# -----------------------------
# Imputation function
# -----------------------------
def gan_impute(adata, epochs=100, lr=1e-4):
    # Convert sparse matrix → dense numpy
    if hasattr(adata.X, "toarray"):  
        X = adata.X.toarray()  
    else:
        X = adata.X  
    
    X = torch.tensor(X, dtype=torch.float32)

    input_dim = X.shape[1]
    G, D = Generator(input_dim), Discriminator(input_dim)

    g_opt = torch.optim.Adam(G.parameters(), lr=lr)
    d_opt = torch.optim.Adam(D.parameters(), lr=lr)
    loss_fn = nn.BCELoss()

    for epoch in range(epochs):
        # Train discriminator
        real = X
        noise = torch.randn_like(real)
        fake = G(noise)

        d_real = D(real)
        d_fake = D(fake.detach())
        d_loss = loss_fn(d_real, torch.ones_like(d_real)) + loss_fn(d_fake, torch.zeros_like(d_fake))
        d_opt.zero_grad()
        d_loss.backward()
        d_opt.step()

        # Train generator
        fake = G(noise)
        d_fake = D(fake)
        g_loss = loss_fn(d_fake, torch.ones_like(d_fake))
        g_opt.zero_grad()
        g_loss.backward()
        g_opt.step()

    # Use generator output as imputation
    imputed = G(torch.randn_like(X)).detach().numpy()
    adata.layers["imputed"] = imputed
    return adata


# -----------------------------
# Run GAN Imputation
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_gan"
os.makedirs(output_dir, exist_ok=True)

files = glob(os.path.join(input_dir, "*.h5ad"))

for f in files:
    print(f"Processing {f} with GAN...")
    adata = sc.read_h5ad(f)

    adata = gan_impute(adata, epochs=200)

    out = os.path.join(output_dir, os.path.basename(f).replace("dropout", "adata_gan_imputed"))
    adata.write_h5ad(out)
    print(f"✅ Saved {out}")


Processing dropout_h5ad\adata_dropout_mf10_run1.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run1.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run10.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run10.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run2.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run2.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run3.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run3.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run4.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run4.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run5.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run5.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run6.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run6.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run7.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run7.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run8.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run8.h5ad
Processing dropout_h5ad\adata_dropout_mf10_run9.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf10_run9.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run1.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run1.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run10.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run10.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run2.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run2.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run3.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run3.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run4.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run4.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run5.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run5.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run6.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run6.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run7.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run7.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run8.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run8.h5ad
Processing dropout_h5ad\adata_dropout_mf20_run9.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf20_run9.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run1.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run1.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run10.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run10.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run2.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run2.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run3.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run3.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run4.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run4.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run5.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run5.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run6.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run6.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run7.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run7.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run8.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run8.h5ad
Processing dropout_h5ad\adata_dropout_mf30_run9.h5ad with GAN...


  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("var")


✅ Saved imputed_gan\adata_adata_gan_imputed_mf30_run9.h5ad


In [1]:
import scanpy as sc
import scvi
import numpy as np
import os
from glob import glob

# -----------------------------
# Input & Output directories
# -----------------------------
input_dir = "dropout_h5ad"
output_dir = "imputed_scvae"
os.makedirs(output_dir, exist_ok=True)

files = glob(os.path.join(input_dir, "*.h5ad"))

for f in files:
    print(f"\nProcessing {f} with scVAE ...")

    adata = sc.read_h5ad(f)
    adata.obs_names_make_unique()
    adata.var_names_make_unique()

    # -----------------------------
    # Setup scVI
    # -----------------------------
    scvi.model.SCVI.setup_anndata(
        adata,
        layer=None,            # counts are in adata.X
        categorical_covariate_keys=None,
        continuous_covariate_keys=None
    )

    # -----------------------------
    # Train SCVI model
    # -----------------------------
    model = scvi.model.SCVI(
        adata,
        n_latent=30,           # latent dimensions
        gene_likelihood="zinb" # scRNA-seq usually ZINB
    )
    model.train(
        max_epochs=400,
        early_stopping=True,
        early_stopping_patience=45
    )

    # -----------------------------
    # Get latent representation
    # -----------------------------
    Z = model.get_latent_representation()
    adata.obsm["X_scVI"] = Z

    # -----------------------------
    # Denoised (imputed) expression
    # -----------------------------
    denoised = model.get_normalized_expression(library_size=1e4)
    adata.layers["scVI_imputed"] = denoised.values

    # -----------------------------
    # Save
    # -----------------------------
    fname = os.path.basename(f).replace("adata_dropout", "adata_scVI_imputed")
    outpath = os.path.join(output_dir, fname)
    adata.write(outpath)

    print(f"✅ Saved scVAE-imputed file to: {outpath}")

print("\nAll files processed with scVAE (scVI) imputation")


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [14]:
import packaging
print(packaging.__version__)


25.0


In [15]:
pip install packaging==25.0


Collecting packaging==25.0
  Using cached packaging-25.0-py3-none-any.whl.metadata (3.3 kB)
Using cached packaging-25.0-py3-none-any.whl (66 kB)
Installing collected packages: packaging
  Attempting uninstall: packaging
    Found existing installation: packaging None
Note: you may need to restart the kernel to use updated packages.


error: uninstall-no-record-file

× Cannot uninstall packaging None
╰─> The package's contents are unknown: no RECORD file was found for packaging.

hint: You might be able to recover from this via: pip install --force-reinstall --no-deps packaging==23.2


In [17]:
pip install --force-reinstall --no-deps packaging==23.2


Note: you may need to restart the kernel to use updated packages.Collecting packaging==23.2
  Using cached packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Using cached packaging-23.2-py3-none-any.whl (53 kB)
Installing collected packages: packaging
  Attempting uninstall: packaging
    Found existing installation: packaging None



error: uninstall-no-record-file

× Cannot uninstall packaging None
╰─> The package's contents are unknown: no RECORD file was found for packaging.

hint: You might be able to recover from this via: pip install --force-reinstall --no-deps packaging==23.2
