# scClone2DR Fold Change Analysis: Ground Truth vs. Predicted vs. Observed

This notebook compares three perspectives of fold change in drug response predictions using simulated data at different difficulty levels.

## Overview

The analysis evaluates how well scClone2DR's predictions match both the ground truth (known from simulation) and the observed data (with noise), helping to distinguish model accuracy from data quality.

## Three Fold Change Perspectives

1. **Ground Truth**: The true underlying fold changes used to generate the simulated data
2. **Predicted**: The model's predictions from scClone2DR after training
3. **Observed**: The actual measurements from the noisy simulated data

## Experimental Settings

### Setting 1: Hard (R=5, θ=10³)
- Lower drug response signal (R=5)
- Lower negative binomial parameter (θ=1000) → more noise
- Regularization: L1=0.1, L2=0.1
- Training steps: 2000

### Setting 2: Easy (R=20, θ=10⁵)
- Stronger drug response signal (R=20)
- Higher negative binomial parameter (θ=100,000) → less noise
- Regularization: L1=0.01, L2=0.01
- Training steps: 3000

## Output Figure

A 2×3 grid of scatter plots showing pairwise comparisons:
- **Column 1**: Ground Truth vs. Predicted
- **Column 2**: Ground Truth vs. Observed
- **Column 3**: Predicted vs. Observed

Each row corresponds to one experimental setting, allowing visual assessment of:
- Model prediction accuracy relative to ground truth
- Data quality (how noise affects observations)
- Consistency between predictions and observations

In [None]:
import sys
sys.path.append('../')
import scClone2DR as sccdr
import matplotlib.pyplot as plt
import numpy as np
import torch
import pandas as pd
n_steps = 2000
np.float_ = np.float64

# R=5, theta = 1000

In [None]:
modelscClone2DR = sccdr.models.scClone2DR()
data_ref = modelscClone2DR.get_simulated_training_data({'C':24,'R':5,'N':100,'Kmax':7, 'D':30, 'theta_rna':15}, neg_bin_n=1, mode_nu="noise_correction", mode_theta="not shared decoupled")
data_ref['pi'] = modelscClone2DR.compute_survival_probas_subclone_features(data_ref, data_ref)
idxs_train = [i for i in range(int(0.5*data_ref['N']))]
idxs_test = [i for i in range(int(0.5*data_ref['N']), data_ref['N'])]

data_train, data_test = modelscClone2DR.get_data_split_simu(data_ref, idxs_train, idxs_test)

In [None]:
params_svi = modelscClone2DR.train(data_train, penalty_l1=0.1, penalty_l2=0.1 , n_steps=n_steps)

In [None]:
params_svi = modelscClone2DR.convert_to_tensor(params_svi)
params_svi['pi'] = modelscClone2DR.compute_survival_probas_subclone_features(data_ref, params_svi)
modelscClone2DR.compute_all_stats(data_ref, data_ref, params_svi)

# R=20, theta = 100000

In [None]:
modelscClone2DR_easy = sccdr.models.scClone2DR()
data_ref_easy = modelscClone2DR_easy.get_simulated_training_data({'C':24,'R':20,'N':100,'Kmax':7, 'D':30, 'theta_rna':15}, neg_bin_n=100, mode_nu="noise_correction", mode_theta="not shared decoupled")
data_ref_easy['pi'] = modelscClone2DR_easy.compute_survival_probas_subclone_features(data_ref_easy, data_ref_easy)
idxs_train = [i for i in range(int(0.5*data_ref_easy['N']))]
idxs_test = [i for i in range(int(0.5*data_ref_easy['N']), data_ref_easy['N'])]

data_train_easy, data_test = modelscClone2DR_easy.get_data_split_simu(data_ref_easy, idxs_train, idxs_test)

In [None]:
params_svi_easy = modelscClone2DR_easy.train(data_train_easy, penalty_l1=0.01, penalty_l2=0.01 , n_steps=n_steps)

In [None]:
params_svi_easy = modelscClone2DR_easy.convert_to_tensor(params_svi_easy)
params_svi_easy['pi'] = modelscClone2DR_easy.compute_survival_probas_subclone_features(data_ref_easy, params_svi_easy)
modelscClone2DR_easy.compute_all_stats(data_ref_easy, data_ref_easy, params_svi_easy)

# Figure

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from sklearn.metrics import explained_variance_score

# =========================
# Label colors
# =========================
LABEL_COLORS = {
    "ground truth": "#1f77b4",  # blue
    "predicted": "#d62728",     # red
    "observed": "#2ca02c",      # green
}

# =========================
# Helpers
# =========================
def clean_xy(x, y):
    """Remove pairs where x or y is nan or inf."""
    x = np.asarray(x)
    y = np.asarray(y)
    mask = np.isfinite(x) & np.isfinite(y)
    return x[mask], y[mask]


def add_identity_line(ax, x, y):
    """Add y=x line using joint min/max of x and y."""
    lim_min = min(x.min(), y.min())
    lim_max = max(x.max(), y.max())
    ax.plot([lim_min, lim_max], [lim_min, lim_max],
            color='black', linewidth=1, zorder=0)
    ax.set_xlim(lim_min, lim_max)
    ax.set_ylim(lim_min, lim_max)


def set_colored_labels(ax, xlabel, ylabel, fontsize):
    """Set axis labels and color them by semantic meaning."""
    ax.set_xlabel(xlabel, fontsize=fontsize)
    ax.set_ylabel(ylabel, fontsize=fontsize)

    for key, color in LABEL_COLORS.items():
        if key in xlabel.lower():
            ax.xaxis.label.set_color(color)
        if key in ylabel.lower():
            ax.yaxis.label.set_color(color)


# =========================
# Parameters
# =========================
size_dots = 2
custom_font_size = 20
custom_fontsize_labelsaxis = 16
fontsizelegend = 18

# =========================
# Figure
# =========================
fig = plt.figure(figsize=(15, 10))
gs = GridSpec(2, 3, width_ratios=[1, 1, 1])

# =========================
# Plot 1
# =========================
ax00 = fig.add_subplot(gs[0, 0])
fc_true, fc_pred = clean_xy(
    modelscClone2DR.results['fold_change_true'],
    modelscClone2DR.results['fold_change_pred']
)

ax00.scatter(fc_true, fc_pred, s=size_dots, zorder=2)
add_identity_line(ax00, fc_true, fc_pred)
set_colored_labels(
    ax00,
    'Fold change (ground truth)',
    'Fold change (predicted)',
    custom_fontsize_labelsaxis
)

# =========================
# Plot 2
# =========================
ax01 = fig.add_subplot(gs[0, 1])
fc_true, fc_pred = clean_xy(
    modelscClone2DR.results['fold_change_true'],
    modelscClone2DR.results['fold_change_data']
)

ax01.scatter(fc_true, fc_pred, s=size_dots, zorder=2)
add_identity_line(ax01, fc_true, fc_pred)
set_colored_labels(
    ax01,
    'Fold change (ground truth)',
    'Fold change (observed)',
    custom_fontsize_labelsaxis
)

# =========================
# Plot 3
# =========================
ax10 = fig.add_subplot(gs[0, 2])
fc_true, fc_pred = clean_xy(
    modelscClone2DR.results['fold_change_pred'],
    modelscClone2DR.results['fold_change_data']
)

ax10.scatter(fc_true, fc_pred, s=size_dots, zorder=2)
add_identity_line(ax10, fc_true, fc_pred)
set_colored_labels(
    ax10,
    'Fold change (predicted)',
    'Fold change (observed)',
    custom_fontsize_labelsaxis
)

# =========================
# Plot 4
# =========================
ax11 = fig.add_subplot(gs[1, 0])
fc_true, fc_pred = clean_xy(
    modelscClone2DR_easy.results['fold_change_true'],
    modelscClone2DR_easy.results['fold_change_pred']
)

ax11.scatter(fc_true, fc_pred, s=size_dots, zorder=2)
add_identity_line(ax11, fc_true, fc_pred)
set_colored_labels(
    ax11,
    'Fold change (ground truth)',
    'Fold change (predicted)',
    custom_fontsize_labelsaxis
)

# =========================
# Plot 5
# =========================
ax12 = fig.add_subplot(gs[1, 1])
fc_true, fc_pred = clean_xy(
    modelscClone2DR_easy.results['fold_change_true'],
    modelscClone2DR_easy.results['fold_change_data']
)

ax12.scatter(fc_true, fc_pred, s=size_dots, zorder=2)
add_identity_line(ax12, fc_true, fc_pred)
set_colored_labels(
    ax12,
    'Fold change (ground truth)',
    'Fold change (observed)',
    custom_fontsize_labelsaxis
)

# =========================
# Plot 6
# =========================
ax02 = fig.add_subplot(gs[1, 2])
fc_true, fc_pred = clean_xy(
    modelscClone2DR_easy.results['fold_change_pred'],
    modelscClone2DR_easy.results['fold_change_data']
)

ax02.scatter(fc_true, fc_pred, s=size_dots, zorder=2)
add_identity_line(ax02, fc_true, fc_pred)
set_colored_labels(
    ax02,
    'Fold change (predicted)',
    'Fold change (observed)',
    custom_fontsize_labelsaxis
)

# =========================
# Add row descriptions on the left side
# =========================
# x position: just left of the leftmost subplot (ax00 and ax11)
left_x = ax00.get_position().x0 - 0.05  # a bit left from the axis

fig.text(
    left_x,
    ax00.get_position().y0 + ax00.get_position().height / 1.5,
    r"$R=5$, $\theta=10^{3}$",
    ha='right',
    va='center',
    rotation=90,
    fontsize=custom_font_size
)

fig.text(
    left_x,
    ax11.get_position().y0 + ax11.get_position().height / 2,
    r"$R=20$, $\theta=10^{5}$",
    ha='right',
    va='center',
    rotation=90,
    fontsize=custom_font_size
)

# =========================
# Final touches
# =========================
plt.tight_layout(rect=[0.1, 0, 1, 1])  # leave space on left for labels
ax00.text(0.01, 0.98, "(a)", transform=ax00.transAxes,
          fontsize=16, fontweight='bold', va='top', ha='left')
ax01.text(0.01, 0.98, "(b)", transform=ax01.transAxes,
          fontsize=16, fontweight='bold', va='top', ha='left')
ax10.text(0.01, 0.98, "(c)", transform=ax10.transAxes,
          fontsize=16, fontweight='bold', va='top', ha='left')
ax11.text(0.01, 0.98, "(d)", transform=ax11.transAxes,
          fontsize=16, fontweight='bold', va='top', ha='left')
ax12.text(0.01, 0.98, "(e)", transform=ax12.transAxes,
          fontsize=16, fontweight='bold', va='top', ha='left')
ax02.text(0.01, 0.98, "(f)", transform=ax02.transAxes,
          fontsize=16, fontweight='bold', va='top', ha='left')

plt.show()
