In [1]:
import anndata as ad
from lifelines import CoxPHFitter
from lifelines import KaplanMeierFitter
from lifelines.statistics import pairwise_logrank_test
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import scanpy as sc
import seaborn as sns

In [2]:
# Functions

def restructure_visiopharm_id(name):
    tma_number = name.split('_')[1]
    core_row = name.split(',')[1]
    core_col = name.split(',')[2][:1]
    if len(tma_number) < 2:
        tma_number = '0' + tma_number
    if len(core_row) < 2:
        core_row = '0' + core_row
    new_name = tma_number + '-' + core_row + '-' + core_col
    return new_name

def z_score(frame, column):
    z_scores = (frame[column] - np.mean(frame[column])) / np.std(frame[column])
    return z_scores

In [None]:
# Read the raw visiopharm output data
# We work with the nuclear signal of each marker, calculating the z_score to normalise the data across TMAs
# Concatenate all frames together

frames_list = []
root_dir = '/mnt/cephfs/home/users/krakovic/data/KRAKOVIC/bioclavis_vpharm/v2_022024'

markers = ['CKPAN', 'CD4', 'CD8', 'CD68', 'SMA', 'Ki67']

for file in os.scandir(root_dir):
    frame = pd.read_csv(file.path, sep='\t')
    frame['core'] = frame['Name'].apply(lambda x: restructure_visiopharm_id(x))
    frame['TMA'] = frame['core'].apply(lambda x: x.split('-')[0])
    frame = frame.rename(columns=lambda x: x[4:] if x.startswith('3.') else x)
    frame.rename(columns=lambda x: x.split('MeanIntensity')[1] if x.startswith('MeanIntensity') and x.endswith('Nuclei') else x, inplace=True)
    frame.rename(columns=lambda x: x.split('Nuclei')[0] if x.endswith('Nuclei') else x, inplace=True)
    for marker in markers:
        frame[f'{marker}_z'] = z_score(frame=frame, column=marker)
    frames_list.append(frame)

data = pd.concat(frames_list)
data

In [4]:
# Cells in these areas will be excluded from analysis

exclude_ROIs = ['Background', 'Default', 'Necrosis']

data_filtered_ROIs = data[~data['ObjectInfoNuclei - ROIName'].isin(exclude_ROIs)]
data_filtered_ROIs = data_filtered_ROIs.dropna(subset=['ObjectInfoNuclei - ROIName'])

#Backup the filtered regions
# data_filtered_ROIs.to_csv('/mnt/cephfs/home/users/krakovic/data/KRAKOVIC/bioclavis_vpharm/all_tma_data_filteredROIs_042024.csv')
data_filtered_ROIs.to_csv('all_tma_data_filteredROIs_042024_ki67.csv')

In [3]:
# data_filtered_ROIs = pd.read_csv('/mnt/cephfs/home/users/krakovic/data/KRAKOVIC/bioclavis_vpharm/all_tma_data_filteredROIs_042024.csv')

  data_filtered_ROIs = pd.read_csv('/mnt/cephfs/home/users/krakovic/data/KRAKOVIC/bioclavis_vpharm/all_tma_data_filteredROIs_042024.csv')


In [6]:
# Filter the data to the core elements we require for phenotyping and cluster alignment
# Ie. Cell IDs, coordinates and expression values

data_cellmeans = data_filtered_ROIs[['TMA', 'core', 'ObjectInfoNuclei - ROIName', 'CellX', 'CellY', 'CKPAN_z', 'CD4_z', 'CD8_z', 'CD68_z', 'SMA_z', 'Ki67_z']]
data_cellmeans.rename(columns={'ObjectInfoNuclei - ROIName':'compartment'}, inplace=True)
data_cellmeans = data_cellmeans.dropna()
data_cellmeans


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cellmeans.rename(columns={'ObjectInfoNuclei - ROIName':'compartment'}, inplace=True)


Unnamed: 0,TMA,core,compartment,CellX,CellY,CKPAN_z,CD4_z,CD8_z,CD68_z,SMA_z,Ki67_z
1,22,22-05-J,Tumour,8526.375294,-47209.294961,2.308457,-0.450230,-0.204453,-0.302017,-0.049015,-0.225220
2,22,22-05-J,Tumour,8502.780534,-47222.466136,3.451042,-0.263806,-0.194394,-0.387213,1.035879,-0.227051
3,22,22-05-J,Tumour,8510.929662,-47224.074935,2.488115,-0.475064,-0.279479,-0.315544,-0.155966,0.633958
4,22,22-05-J,Tumour,8517.623256,-47224.496313,2.098992,-0.376636,-0.268872,-0.287480,0.892927,-0.052510
5,22,22-05-J,Stroma,8522.685445,-47229.076912,0.711897,-0.517915,-0.206411,1.691172,0.079570,0.027650
...,...,...,...,...,...,...,...,...,...,...,...
776711,07,07-03-E,Tumour,16055.293306,-60032.408448,-0.546098,0.278704,2.150438,2.299076,-0.126520,-0.190065
776712,07,07-03-E,Stroma,16074.000349,-60032.173736,-0.490777,-0.108997,2.573555,-0.118711,-0.028570,-0.292293
776713,07,07-03-E,Stroma,16094.125869,-60035.216743,-0.399116,-0.183728,-0.164026,-0.202981,0.727282,-0.264193
776714,07,07-03-E,Tumour,16043.593802,-60034.513577,-0.440560,-0.336552,2.331294,-0.441513,0.132079,-0.279672


In [None]:
# data_cellmeans.to_csv('/mnt/cephfs/home/users/krakovic/data/KRAKOVIC/bioclavis_vpharm/all_tma_data_filteredROIs_042024_dropna.csv')
data_cellmeans.to_csv('all_tma_data_filteredROIs_042024_ki67_dropna.csv')