# Signature Heatmap Analysis

This notebook demonstrates signature heatmap visualization using clustered expression data.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from amprenta_rag.client import RAGClient
import os
import numpy as np

# Connect to API
api_url = os.environ.get('API_URL', 'http://host.docker.internal:8000')
client = RAGClient(api_url=api_url)
print(f'Connected to {api_url}')

## Load Signatures

In [None]:
# Load signatures from API
signatures = client.signatures.list(limit=10)
print(f"Loaded {len(signatures)} signatures")

# Display signature names
for sig in signatures[:5]:
    print(f"  - {sig.name}")

## Create Expression Matrix

In [None]:
# Example: Create expression matrix (features x samples)
# In practice, this would come from actual dataset feature data
np.random.seed(42)

# Example feature names (from signatures)
feature_names = ["BRCA1", "TP53", "EGFR", "MYC", "PIK3CA", "AKT1", "PTEN", "KRAS"]
sample_names = [f"Sample_{i}" for i in range(1, 21)]

# Generate mock expression data
expression_matrix = np.random.randn(len(feature_names), len(sample_names))

# Create DataFrame
df_expression = pd.DataFrame(
    expression_matrix,
    index=feature_names,
    columns=sample_names
)

print(f"Expression matrix shape: {df_expression.shape}")
print(f"Features: {len(feature_names)}")
print(f"Samples: {len(sample_names)}")
print("\nFirst few rows:")
print(df_expression.head())

## Normalize/Scale Data

In [None]:
from sklearn.preprocessing import StandardScaler

# Option 1: Z-score normalization (standardize across samples)
scaler = StandardScaler()
df_scaled = pd.DataFrame(
    scaler.fit_transform(df_expression.T).T,
    index=df_expression.index,
    columns=df_expression.columns
)

print("Data normalized (Z-score)")
print(f"Mean: {df_scaled.values.mean():.4f}")
print(f"Std: {df_scaled.values.std():.4f}")

## Generate Clustered Heatmap

In [None]:
# Create clustered heatmap using seaborn
plt.figure(figsize=(14, 10))

# Clustermap with row and column clustering
clustered = sns.clustermap(
    df_scaled,
    method='ward',
    metric='euclidean',
    cmap='RdBu_r',
    center=0,
    vmin=-2,
    vmax=2,
    figsize=(14, 10),
    cbar_kws={"label": "Normalized Expression"},
    row_cluster=True,
    col_cluster=True,
    xticklabels=True,
    yticklabels=True
)

plt.suptitle('Signature Expression Heatmap (Clustered)', fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()

## Save Figure

In [None]:
# Save the heatmap figure
output_path = "signature_heatmap.png"
# Uncomment to save:
# clustered.savefig(output_path, dpi=300, bbox_inches='tight')
# print(f"Figure saved to {output_path}")