# Explore Ki67 as proxy of aggressiveness

In [None]:
import os
import numpy as np
import anndata as ad
import pandas as pd
import scanpy as sc
from plotnine import *

In [None]:
adata = ad.read_h5ad('../../data/adata_consensus_cell_types.h5ad')

In [None]:
clini = pd.read_csv("../../data/summary_clinical_data_modified.csv", index_col=2)
adata.obs = adata.obs.merge(clini, left_on="fov", right_index=True, how="left")
adata.obs["Stage"] = adata.obs["pT group"]
# # E1 and E2 samples are annotated 'SCT' while E3 and E4 are 'Colon-no.'
adata.obs.loc[adata.obs.fov.str.contains("E4"), "Stage"] = "Colon-no."
adata.obs.loc[adata.obs.fov.str.contains("E3"), "Stage"] = "Colon-no."
adata.obs.loc[adata.obs.fov.str.contains("E2"), "Stage"] = "SCT"
adata.obs.loc[adata.obs.fov.str.contains("E1"), "Stage"] = "SCT"

We want a DataFrame with, for each cancer cell, the corresponding FOV, pT stage, Ki67 level.

In [None]:
df = adata[adata.obs["annotation_consensus"] == "Cancer_cell"].obs.loc[:,["fov", "Ki67", 'Stage']]
# Drop rows with missing Stage values
df = df.loc[~df.Stage.isna(), :]

In [None]:
ggplot(df, aes(x="Stage", y="Ki67")) +\
    geom_violin(aes(fill = "Stage"), style="right", scale="width", width = 1.25) +\
    stat_summary(fun_y=np.median, geom='point', color='white', size=2) +\
    theme_classic() +\
    theme(axis_text_x=element_text(rotation=90, hjust=1)) +\
    coord_flip() +\
    ylim(0,0.5)

## Compare metabolism of Ki67hi and Ki67lo cancer cells

In [None]:
df = pd.DataFrame(adata[(adata.obs["annotation_consensus"] == "Cancer_cell") &
           (adata.obs["Stage"] != "SCT") &
           (adata.obs["Stage"] != "Colon-no.")].obs)

In [None]:
agg_low = np.percentile(df["Ki67"], 20)
agg_hi = np.percentile(df["Ki67"], 80)

df["Aggressiveness"] = ["High" if x > agg_hi else "Low" if x < agg_low else "Intermediate" for x in df["Ki67"]]

In [None]:
df["PanCK"] = adata[(adata.obs["annotation_consensus"] == "Cancer_cell") &
    (adata.obs["Stage"] != "SCT") &
    (adata.obs["Stage"] != "Colon-no.")].X[:,adata.var_names == "PanCK"].flatten().tolist()

In [None]:
all_metab_col = ['CA9', 'CD98', 'CytC', 'MSH2', 'MCT1', 'ASCT2',
       'LDH', 'GS', 'GLS', 'ATP5A', 'CS', 'PKM2', 'GLUT1', 'MSH6', 'ARG1', 'CPT1A']
meta_col = ["Aggressiveness", "Stage", "fov"]

In [None]:
# Display violin plots of metabolic marker intensity per cell
# Split by aggressiveness (low = right violin, high = left violin)

# Convert to long format
df_long = pd.melt(df.loc[:,all_metab_col+meta_col], id_vars=meta_col, value_vars=all_metab_col)

gp = ggplot(df_long, aes(x="variable", y="value")) +\
    geom_violin(data = df_long.loc[df_long.Aggressiveness == "Low",:], 
                fill = "#7fc980", 
                style="right", 
                scale="width", 
                width = 1) +\
    geom_violin(data = df_long.loc[df_long.Aggressiveness == "High",:], 
                fill = "#beaed4", 
                style="left", 
                scale="width", 
                width = 1) +\
    stat_summary(data = df_long.loc[df_long.Aggressiveness != "Intermediate",:], 
                 mapping = aes(fill = "Aggressiveness"),
                 fun_y=np.median, geom='point', color='white', 
                 size=2) +\
    scale_fill_manual(values = ["#beaed4", "#7fc980"]) +\
    theme_classic() +\
    coord_flip() +\
    ylab("Normalized intensity") +\
    xlab("Protein") +\
    ylim(0,1)

ggsave(gp, "../../figures/fig3/intensity_ki67_cancer.pdf", width = 6, height = 4)
gp

In [None]:
df.Aggressiveness.value_counts()

In [None]:
# Repeat for healthy stage
df = pd.DataFrame(adata[(adata.obs["annotation_consensus"] == "Cancer_cell") &
           (adata.obs["Stage"] == "Colon-no.")].obs)

# Thresholds are kept identical
df["Aggressiveness"] = ["High" if x > agg_hi else "Low" if x < agg_low else "Intermediate" for x in df["Ki67"]]

df["PanCK"] = adata[(adata.obs["annotation_consensus"] == "Cancer_cell") &
    (adata.obs["Stage"] == "Colon-no.")].X[:,adata.var_names == "PanCK"].flatten().tolist()

# Convert to long format
df_long = pd.melt(df.loc[:,all_metab_col+meta_col], id_vars=meta_col, value_vars=all_metab_col)

gp = ggplot(df_long, aes(x="variable", y="value")) +\
    geom_violin(data = df_long.loc[df_long.Aggressiveness == "Low",:], 
                fill = "#7fc980", 
                style="right", 
                scale="width", 
                width = 1) +\
    geom_violin(data = df_long.loc[df_long.Aggressiveness == "High",:], 
                fill = "#beaed4", 
                style="left", 
                scale="width", 
                width = 1) +\
    stat_summary(data = df_long.loc[df_long.Aggressiveness != "Intermediate",:], 
                 mapping = aes(fill = "Aggressiveness"),
                 fun_y=np.median, geom='point', color='white', 
                 size=2) +\
    scale_fill_manual(values = ["#beaed4", "#7fc980"]) +\
    theme_classic() +\
    coord_flip() +\
    ylab("Normalized intensity") +\
    xlab("Protein") +\
    ylim(0,1)

ggsave(gp, "../../figures/fig3/intensity_ki67_healthy.pdf", width = 6, height = 4)
gp

More changes in CPT1A, CytC and MCT1 in malignant cells than healthy?  
Double check that the Ki67+ cells in healthy samples are truly healthy.

In [None]:
df.Aggressiveness.value_counts()

## Proportion of Ki67+ per sample

In [None]:
df = pd.DataFrame(adata[(adata.obs["annotation_consensus"] == "Cancer_cell")].obs)

# Check if Ki67 is higher than agg_hi
df['high_Ki67'] = df['Ki67'] > agg_hi

# Group by the fov column and compute the fraction of rows with high Ki67
df_frac = pd.DataFrame({"Fraction": df.groupby('fov')['high_Ki67'].mean(),
                       "Stage": df.groupby('fov')['Stage'].first()})

ggplot(df_frac, aes(x="Stage", y="Fraction")) +\
    geom_violin(aes(fill = "Stage"), style="right", scale="width", width = 1.25) +\
    stat_summary(fun_y=np.median, geom='point', color='white', size=2) +\
    theme_classic() +\
    theme(axis_text_x=element_text(rotation=90, hjust=1)) +\
    coord_flip() +\
    ylim(0,1)