# TCR Analysis

## Environment

In [1]:
import os
import sys
from pathlib import Path
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import warnings
warnings.filterwarnings('ignore')

3000


In [2]:
%load_ext autoreload
%autoreload 2
package_path = r'E:\TMC\PRISM_Code\analysis_cell_typing'
if package_path not in sys.path: sys.path.append(package_path)

# # cell-typing
# from cell_typing import QC_plot, general_preprocess, preprocess_of_UMAP, UMAP_genes_plot, UMAP_obs_plot, annotate
# # spatial
# from spatial import create_hull, show_cluster, ROI_mask_load
# # correlation analysis
# from correlation import matrix_for_heatmap

In [3]:
BASE_DIR = Path(r'F:\spatial_data\processed')
RUN_ID = '20241216_ZCH_TNBC_BZ01_CA2_5um_TCR_Tcell_only'
src_dir = BASE_DIR / f'{RUN_ID}_processed'
stc_dir = src_dir / 'stitched'
read_dir = src_dir / 'readout'
seg_dir = src_dir / 'segmented'
analysis_dir = src_dir/"analysis"
analysis_dir.mkdir(exist_ok=True)

## load exp data

In [4]:
# load expression matrix
df = pd.read_csv(seg_dir/"expression_matrix.csv", index_col=0)
df.drop('False_pos', axis=1, inplace=True)
adata = sc.AnnData(df)
adata.var.index = adata.var.index.str.upper()
adata.obs['dataset'] = ["PRISM"] * len(adata)
adata.obs['tissue'] = ['TNBC_BZ01_CA2'] * len(adata)
adata.raw = adata
gene_list = adata.var.index
gene_list

Index(['CD3D', 'CD4', 'CD8A', 'CLONOTYPE1_TRB', 'CLONOTYPE2_TRA',
       'CLONOTYPE2_TRB', 'CLONOTYPE3_TRA', 'CLONOTYPE3_TRB', 'CLONOTYPE4_TRA',
       'CLONOTYPE4_TRA-2', 'CLONOTYPE4_TRB', 'CLONOTYPE5_TRA',
       'CLONOTYPE5_TRB'],
      dtype='object')

In [5]:
# load spatial information
centroid = pd.read_csv(seg_dir/'dapi_predict.csv')
centroid.index = centroid.index.astype(str)
centroid_sub = centroid.loc[adata.obs.index]
adata.obsm['spatial'] = np.array([centroid_sub['Y'], centroid_sub['X']]).T

In [6]:
adata

AnnData object with n_obs × n_vars = 60543 × 13
    obs: 'dataset', 'tissue'
    obsm: 'spatial'

## Gene colocalization

In [7]:
exp_mtx = pd.read_csv(seg_dir / 'expression_matrix.csv')
exp_mtx = exp_mtx.iloc[:, 1:]
exp_mtx.drop('False_pos', axis=1, inplace=True)
print(exp_mtx.shape)
exp_mtx.head()

(60543, 13)


Unnamed: 0,CD3D,CD4,CD8A,CLONOTYPE1_TRB,CLONOTYPE2_TRA,CLONOTYPE2_TRB,CLONOTYPE3_TRA,CLONOTYPE3_TRB,CLONOTYPE4_TRA,CLONOTYPE4_TRA-2,CLONOTYPE4_TRB,CLONOTYPE5_TRA,CLONOTYPE5_TRB
0,5.0,3.0,2.0,2.0,1.0,4.0,2.0,0.0,2.0,6.0,5.0,0.0,0.0
1,8.0,1.0,2.0,1.0,1.0,2.0,4.0,1.0,0.0,1.0,2.0,0.0,0.0
2,4.0,0.0,3.0,0.0,1.0,0.0,2.0,1.0,1.0,3.0,1.0,1.0,2.0
3,2.0,0.0,2.0,1.0,1.0,1.0,2.0,1.0,0.0,4.0,1.0,0.0,1.0
4,14.0,1.0,3.0,2.0,5.0,3.0,5.0,3.0,3.0,2.0,6.0,0.0,1.0


In [8]:
df = exp_mtx.copy()
correlation_matrix = df.corr()
np.fill_diagonal(correlation_matrix.values, np.nan)
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm')
plt.title('Gene Expression Correlation', fontsize=16)
plt.savefig(analysis_dir / 'gene_expression_correlation.png')
plt.close()

In [9]:
exp_mtx.columns

Index(['CD3D', 'CD4', 'CD8A', 'CLONOTYPE1_TRB', 'CLONOTYPE2_TRA',
       'CLONOTYPE2_TRB', 'CLONOTYPE3_TRA', 'CLONOTYPE3_TRB', 'CLONOTYPE4_TRA',
       'CLONOTYPE4_TRA-2', 'CLONOTYPE4_TRB', 'CLONOTYPE5_TRA',
       'CLONOTYPE5_TRB'],
      dtype='object')

## detailed

In [None]:
sc.pp.calculate_qc_metrics(adata, percent_top=None, inplace=True)

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
# Scale data to unit variance and zero mean
sc.pp.regress_out(adata, ["total_counts"])
sc.pp.scale(adata)

: 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# 假设 adata 是一个 AnnData 对象
# 将 adata.X 转换为 DataFrame，并设置列名
data = pd.DataFrame(adata.X, columns=adata.var_names)

for method in ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']:
    # 绘制热图并对行进行聚类
    # 如果不希望对列进行聚类，可以设置 col_cluster=False
    g = sns.clustermap(
        data,
        cmap='coolwarm',
        # xticklabels=False,   # 不显示列标签
        yticklabels=False,   # 不显示行标签
        row_cluster=True,    # 对行进行聚类
        col_cluster=False,   # 根据需求决定是否对列进行聚类
        method=method,
        vmax=3,vmin=-3,
    )

    # 显示图形
    plt.savefig(analysis_dir / f'clustermap_{method}.png')
    plt.close()