# DeepHeal Tutorial

This tutorial demonstrates how to use the `DeepHeal` package programmatically to learn low-dimensional representations of drug-induced proteomic changes.

In [None]:
import pandas as pd
import numpy as np
from deepheal.deepheal import DeepHeal
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

## 1. Create Synthetic Data

For this tutorial, we will create a synthetic dataset simulating proteomics log2 fold-changes. We will simulate 100 samples belonging to 3 different drug classes.

In [None]:
# Parameters
n_samples = 100
n_features = 500
n_classes = 3

# Generate random features
X = np.random.randn(n_samples, n_features)

# Add some structure based on class
y = np.random.randint(0, n_classes, n_samples)
for i in range(n_samples):
    X[i, :] += y[i] * 0.5  # Shift mean based on class

# Create Sample IDs
sample_ids = [f"Sample_{i}" for i in range(n_samples)]
drug_classes = [f"Class_{label}" for label in y]

# Create Input DataFrame (Features)
input_df = pd.DataFrame(X, columns=[f"Prot_{i}" for i in range(n_features)])
input_df.insert(0, "Sample_ID", sample_ids)

# Create Meta DataFrame (Labels)
meta_df = pd.DataFrame({
    "Sample_ID": sample_ids,
    "Drug_Class": drug_classes
})

print("Input Data:")
print(input_df.head())
print("\nMeta Data:")
print(meta_df.head())

## 2. Initialize and Train DeepHeal

We initialize the model with desired hyperparameters. Here we use `no_batch=True` (implied by not providing a domain key or setting it to None) as we are doing simple dimensionality reduction.

In [None]:
# Initialize model
model = DeepHeal(
    save_dir="tutorial_output",
    latent_dim=16,
    n_epochs=10,
    batch_size=32,
    lr=1e-3,
    verbose=True,
    domain_key=None, # No batch correction
)

# Prepare data (drop ID column from features)
features = input_df.drop(columns=["Sample_ID"])

# Set data
model.set_data(features, meta_data=meta_df)

# Train
model.train(save_model=True)

## 3. Generate Embeddings

After training, we can extract the low-dimensional embeddings for our samples.

In [None]:
embeddings = model.predict(features)
print(f"Embeddings shape: {embeddings.shape}")

# Create a DataFrame for results
embedding_df = pd.DataFrame(embeddings, columns=[f"z{i+1}" for i in range(embeddings.shape[1])])
embedding_df["Drug_Class"] = meta_df["Drug_Class"]
embedding_df.head()

## 4. Visualization

Let's visualize the learned embeddings using PCA to see if the classes separate.

In [None]:
pca = PCA(n_components=2)
pca_result = pca.fit_transform(embeddings)

plt.figure(figsize=(8, 6))
for cls in np.unique(drug_classes):
    mask = np.array(drug_classes) == cls
    plt.scatter(pca_result[mask, 0], pca_result[mask, 1], label=cls, alpha=0.7)

plt.title("PCA of DeepHeal Embeddings")
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.legend()
plt.show()