In [2]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import statsmodels.api as sm

In [4]:
# Set the working directory
path = "/Users/teresaglauner/Documents/Hamburg_folder/CRC_TMA"
input_path = os.path.join(path, "Final/FOV_pixel_intensities_per_patients_of_all_channels.xlsx")
images_path = os.path.join(path, "image_data/")

In [5]:
# Read in the data and drop observations with no disease stage
dat = pd.read_excel(input_path)
dat = dat[dat['group'] != ""]

In [6]:
# Define metabolic markers
metabolic = ["ATP5A", "GLUT1", "LDH", "ARG1", "GLS", "GS", "CS", "ASCT2",
             "CytC", "MCT1", "CA9", "PKM2", "CPT1A", "CD98"]

In [7]:
# Define stage colors
stage_colors = ["#f7beba", "#FC7873", "#C72636", "#990b02"]

In [8]:
# Create output directories if they don't exist
if not os.path.exists(path):
    os.makedirs(path)

In [19]:
def plot_density(dt, dir_name="density/", markers=metabolic, ip=images_path, col="mean"):
    pa = os.path.join(ip, dir_name)
    if not os.path.exists(pa):
        os.makedirs(pa)

    for mar in markers:
        sns.kdeplot(dt[dt['target'] == mar][col], fill=True, label=mar)
        percentiles = np.percentile(dt[dt['target'] == mar][col], [99, 99.5, 99.9])
        for percentile in percentiles:
            plt.axvline(percentile, color='black', linestyle='dashed', label=f"{mar} - {percentile:.2f}")
        plt.xlabel(mar)
        plt.legend()
        plt.savefig(os.path.join(pa, f"{mar}.png"))
        plt.close()


def make_heatmap(mat, ip=images_path, h=2, w=8, file_name="heatmap"):
    sns.set_theme()
    plt.figure(figsize=(w, h))
    sns.heatmap(mat, cmap="viridis", annot=True, fmt=".2f", linewidths=.5, cbar_kws={'label': 'Median Value'})
    plt.xticks(rotation=45, ha='right')
    plt.savefig(os.path.join(ip, f"{file_name}.png"))
    plt.close()


def make_big_heatmap(dt, markers, file_name, ip=images_path, h=8, w=8):
    pa = os.path.join(ip, file_name)
    if not os.path.exists(pa):
        os.makedirs(pa)

    temp = dt[dt['target'].isin(markers)].pivot_table(index=['recording', 'group'], columns='target', values='scaled', aggfunc='mean')
    side_table = temp.reset_index()['group']
    # Remove the 'recording' prefix from markers
    markers = [m.replace('recording', '') for m in markers]
    mat = temp.reset_index()[['recording'] + markers].set_index('recording').clip(upper=1).T
    make_heatmap(mat, ip, h, w, file_name)


def make_boxplots(dt, cor_dt, ip=images_path, dir_name="boxplots/", sc=stage_colors, markers=metabolic):
    pa = os.path.join(ip, dir_name)
    if not os.path.exists(pa):
        os.makedirs(pa)

    for mar in markers:
        temp = dt[dt['target'] == mar]
        temp.loc[temp['scaled'] > 1, 'scaled'] = 1

        plt.figure(figsize=(2, 3))
        sns.boxplot(x='group', y='scaled', data=temp, palette=sc, showfliers=False)
        plt.text(1, temp['scaled'].max(), f"R={cor_dt[cor_dt['target'] == mar]['cor'].values[0]:.2f}", ha='left', va='bottom')
        plt.text(0.9, temp['scaled'].max(), f"q={cor_dt[cor_dt['target'] == mar]['q.value'].values[0]:.2f}", ha='left', va='bottom')
        plt.xlabel("Stage")
        plt.ylabel("Mean intensity")
        plt.title(mar)
        plt.savefig(os.path.join(pa, f"{mar}.png"))
        plt.close()

# Other functions (makeRegBoxplots, make_heatmap, make_big_heatmap) can be similarly translated

In [14]:
# Visualize distributions
dat['scaled'] = dat.groupby('target')['mean'].transform(lambda x: x / x.quantile(0.99))
dat['stage'] = dat['group'].str.extract('(\d+)').astype(float)

# Visualize distributions
plot_density(dat, dir_name="density_unscaled/")
plot_density(dat, dir_name="density_scaled/", col="scaled")


  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use_inf_as_na', True):
  if pd.api.types.is_categorical_dtype(vector):
  with pd.option_context('mode.use

In [15]:
# Make median heatmaps
prot_matrix = dat.groupby(['group', 'target'])['scaled'].median().unstack()
met_matrix = prot_matrix[metabolic]
make_heatmap(prot_matrix, file_name="heatmap_all")
make_heatmap(met_matrix, file_name="heatmap_met", w=6, h=2.5)

In [20]:
# Make hierarchically clustered heatmaps of total observations
make_big_heatmap(dat, markers=dat['target'].unique(), file_name="heatmap_big_all")
make_big_heatmap(dat, markers=metabolic, file_name="heatmap_big_met", w=6)

In [24]:
# Correlate markers with disease stage
cor_dat = dat.groupby('target')[['mean', 'stage']].apply(lambda x: pd.Series([spearmanr(x['mean'], x['stage']).correlation, spearmanr(x['mean'], x['stage']).pvalue], index=['correlation', 'p.value'])).reset_index()
# Print the DataFrame to inspect its structure
print(cor_dat)
# Assign column names
cor_dat.columns = ['target', 'correlation', 'p.value']
# Add the q.value column
cor_dat['q.value'] = cor_dat['p.value'].apply(lambda x: np.nan if pd.isnull(x) else RLM(x, M=1).fit().pvalues[0])
# Sort the DataFrame
cor_dat.sort_values(by='q.value', inplace=True)


      target  correlation  p.value
0       ARG1          NaN      NaN
1      ASCT2          NaN      NaN
2      ATP5A          NaN      NaN
3        CA9          NaN      NaN
4       CD14          NaN      NaN
5      CD163          NaN      NaN
6       CD20          NaN      NaN
7       CD31          NaN      NaN
8       CD3e          NaN      NaN
9        CD4          NaN      NaN
10      CD45          NaN      NaN
11      CD68          NaN      NaN
12       CD7          NaN      NaN
13       CD8          NaN      NaN
14      CD98          NaN      NaN
15     CPT1A          NaN      NaN
16        CS          NaN      NaN
17      CytC          NaN      NaN
18       DCN          NaN      NaN
19     FoxP3          NaN      NaN
20       GLS          NaN      NaN
21     GLUT1          NaN      NaN
22        GS          NaN      NaN
23    HLADRa          NaN      NaN
24      Ki67          NaN      NaN
25       LDH          NaN      NaN
26      MCT1          NaN      NaN
27       MPO        

In [25]:
# Visualize Boxplots
sig_markers = cor_dat[(cor_dat['q.value'] < 0.05) & cor_dat['target'].isin(metabolic)]['target']
make_boxplots(dat, cor_dat, markers=sig_markers)

# Similar translation for the remaining code