In [None]:
# Wang, Jixin et al. “Pan-Cancer Proteomics Analysis to Identify Tumor-Enriched and Highly Expressed Cell Surface Antigens as Potential Targets for Cancer Therapeutics.”
# Molecular & cellular proteomics : MCP vol. 22,9 (2023)
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10494184/
# https://zenodo.org/records/7991979

In [None]:
# CPTAC_FragPipe_TMT-iBAQ_ProtCorr_v02
# Alberto Bejarano 2024

In [None]:
# intensity-based absolute quantification (iBAQ); tandem mass tag (TMT); total protein approach (TPA)

In [None]:
import time; start = time.time()
from datetime import datetime; print(datetime.now().strftime("%B %d, %Y %H:%M:%S"))

In [None]:
#!pip install pyreadr
#!pip uninstall seaborn
#!pip install seaborn==0.11.2

In [None]:
import pandas as pd
pd.set_option('display.precision', 2)
pd.set_option('display.max_columns', 200)
pd.set_option('display.width', 1000)
pd.options.display.float_format = '{:.3f}'.format
#        -         -        -        -         -        -         -        -        -         -
from scipy.stats import mannwhitneyu, normaltest
from statannotations.Annotator import Annotator
from statannot import add_stat_annotation
from openpyxl import load_workbook
import matplotlib.pyplot as plt
from scipy import stats
#from tqdm import tqdm
import seaborn as sns; print(sns.__version__) # Install version 0.11.2
import numpy as np
import pyreadr
import re
#        -         -        -        -         -        -         -        -        -         -
import warnings; warnings.filterwarnings("ignore") 
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [None]:
GenesOfInterest = ['MSLN', 'RAB5A', 'RAB5B', 'ERBB3', 'ERBB2', 'CTLA4']
#GenesOfInterest = ["TP53", "MYC", 'ERBB2']

In [None]:
GenesOfInterest =  [
                    'VAMP8', 'PYCARD', 'TMED2', 'IER3IP1', 'DPM3',      # BRCA
                    'ELANE', 'PRTN3', 'CEACAM5', 'FCER1G', 'GPRC5A',    # COAD
                    'MIF', 'CRYAB', 'LGALS1', 'ANXA4', 'ANXA2',         # ccRCC
                    'BGN', 'ANXA1', 'CD59', 'CTSE', 'THBS1',            # PDAC
                    'HSPA5', 'HSPD1', 'SEC61B', 'PDIA4', 'SPCS3',       # LUAD
                    'CALR', 'MT-CO2', 'CD9', 'ATP5IF1', 'COX6C',        # UCEC
                    'CALR', 'HSPD1', 'HSP90AB1', 'C1QBP', 'PDIA4',      # OV
                    'ELANE', 'PRTN3', 'HLA-B', 'SLC2A1', 'TAPBP',       # HNSCC
                    'APOA1', 'ANXA5', 'ANXA1', 'ANXA2', 'TF',           # GBM
                    'HSP90AB1', 'HSPA5', 'SLC25A5', 'HSPD1', 'VDAC2'    # LSCC
                   ]

In [None]:
method = 'spearman' # pearson, kendall 

In [None]:
path = './data/iBAQ/CPTAC_PAN_T_N_iBAQ_pheno.RData'
result = pyreadr.read_r(path)

In [None]:
keys = list(result.keys()); #print(f"Keys in {datafile}: {keys}")
iBAQ_pheno_df = result[keys[0]]
iBAQ_pheno_df.index.name = 'Sample'
print(iBAQ_pheno_df.index.name)
iBAQ_pheno_df = iBAQ_pheno_df.reset_index()
print(iBAQ_pheno_df.index.name)
iBAQ_pheno_df.sample(2)

In [None]:
path = './data/iBAQ/CPTAC_PAN_T_N_iBAQ.RData'
result = pyreadr.read_r(path)

In [None]:
keys = list(result.keys()); #print(f"Keys in {datafile}: {keys}")
df = result[keys[0]]
#df.fillna(0, inplace=True)

In [None]:
df.index.name = 'GeneSymbol'
print(df.index.name)
df = df.reset_index()
print(df.index.name)

In [None]:
df.sample(2)

In [None]:
df_filtered = df[df["GeneSymbol"].isin(GenesOfInterest)]
df_filtered.sample()

In [None]:
df_narrow = pd.melt(df_filtered.reset_index(), id_vars=['GeneSymbol'], var_name='Sample', value_name='iBAQ')
df_narrow.sample()

In [None]:
col_order = ['GeneSymbol', 'Sample', 'TumorType', 'TissueType', 'iBAQ', 'log2_iBAQ']

In [None]:
iBAQ_ProtExpression_df = pd.merge(df_narrow, iBAQ_pheno_df, on='Sample')
iBAQ_ProtExpression_df.rename(columns={'ind':'TumorType', 'Tissue':'TissueType'}, inplace=True)
iBAQ_ProtExpression_df['log2_iBAQ'] = iBAQ_ProtExpression_df['iBAQ'].apply(lambda x: np.log2(x+1))
iBAQ_ProtExpression_df = iBAQ_ProtExpression_df[col_order]
iBAQ_ProtExpression_df.sample(5)

In [None]:
%%time
for gene in GenesOfInterest:
    data = iBAQ_ProtExpression_df[iBAQ_ProtExpression_df['GeneSymbol'] == gene]
    sample_counts = data.groupby(['GeneSymbol', 'TumorType', 'TissueType']).size().reset_index(name='SampleCount')
    sample_counts = sample_counts.sort_values(by=['TumorType', 'SampleCount', 'TissueType'], ascending=[True, False, True])
    #print(sample_counts)

In [None]:
iBAQ_ProtExpression_df.dtypes

In [None]:
print(len(iBAQ_ProtExpression_df))
#prot_exp_df = prot_exp_df.head(200)
print(len(iBAQ_ProtExpression_df))

In [None]:
gene_list = iBAQ_ProtExpression_df.GeneSymbol.unique().tolist(); print('\033[33m', sorted(gene_list), '\033[0m')

In [None]:
tissue_list = iBAQ_ProtExpression_df.TissueType.unique().tolist(); print('\033[33m', sorted(tissue_list), '\033[0m')

In [None]:
tumortype_list = iBAQ_ProtExpression_df.TumorType.unique().tolist(); print('\033[33m', sorted(tumortype_list ), '\033[0m')

In [None]:
df_wide = iBAQ_ProtExpression_df.pivot(index=['Sample', 'TumorType', 'TissueType'], columns='GeneSymbol', values='log2_iBAQ').reset_index()
df_wide.sample(3)

In [None]:
hue = 'TissueType'
hue_order = ["Normal", "Tumor"]
palette = sns.color_palette("pastel")
palette = {'Normal': '#a1c9f4', 'Tumor': '#f2b482'}

In [None]:
data = df_wide
#          -          -          -          -          -          -          -          -          -          -          -          -
scatter_kws = {'s':6, 'alpha':0.25, 'linewidth':0.2, 'hue':hue, 'palette':palette, 'edgecolor':'#1E1E1E'}
#kws1 = {'s':2, 'alpha':0.25, 'linewidth':0.5,'edgecolor':'#1E1E1E', 'palette':palette}
line_kas= {'color': 'red', 'alpha':0.25, 'lw':0.5};

In [None]:
for i in range(len(gene_list)):
    for j in range(i + 1, len(gene_list)):
        #          -          -          -          -          -          -          -          -          -          -          -          -
        a = gene_list[i]; b = gene_list[j]; print(a, b)
        #          -          -          -          -          -          -          -          -          -          -          -          -
        count_feat = 1
        figsize=(12/2.54, 6/2.54)
        fig = plt.subplots(figsize=figsize, dpi=300, subplot_kw={'aspect': 'equal'});
        plt.subplots_adjust(wspace=0,hspace=0); plt.axis('off')
        for tumor in (tumortype_list):
            try:
                df00 = data.loc[data.TumorType == tumor]; n = len(df00)
                ax = plt.subplot(2, 5, count_feat)
            #        -         -        -        -         -        -         -        -        -         -        -        -         -                   
                ax = sns.scatterplot(data=df00, x=df00[a], y=df00[b], **scatter_kws)             

                factor = 1.05  # Adjust as needed
                xmin, xmax = ax.get_xlim(); ymin, ymax = ax.get_ylim()
                new_xmin = xmin - factor; new_xmax = xmax + factor
                new_ymin = ymin - factor; new_ymax = ymax + factor
                ax.set_xlim(new_xmin, new_xmax); ax.set_ylim(new_ymin, new_ymax)
                ax.set_xscale('linear'); ax.set_yscale('linear')                  
                
                ax  = sns.regplot(data=df00, x=df00[a], y=df00[b], scatter=False, fit_reg=True, ci=95, truncate=False,
                         scatter_kws=scatter_kws, line_kws=line_kas, ax=ax)                 
            #      -          -          -          -          -          -          -          -          -          -          -          -
                slope, intercept, r_value, p_value, std_err = stats.stats.linregress(x=df00[a], y=df00[b])
                r = df00[a].corr(df00[b], method=method)
            #      -          -          -          -          -          -          -          -          -          -          -          -
                if r >= 0.75:
                    val = "green"
                elif 0.75 >= 0.5:
                    val = "orange"
                elif 0.5 > r > 0.5:
                    val = "red"
                elif -0.5 > r >= 0.75:
                    val = "orange"
                elif -0.75 > r:
                    val = "green" 
            #        -         -        -        -         -        -         -        -        -         -        -        -         -                    
                ax.set_title(' ' + tumor, color='#b8bdbf', fontsize=6, loc='left', pad=2)
                textkws = {'weight':500, 'va':'center', 'alpha':0.8}; axs = ax.transAxes
                bboxkws = {'facecolor':'white', 'edgecolor':'white', 'boxstyle':'round', 'pad':0.2}
                ax.text(0.60, 0.95, "r = "+str('{:,.2f}'.format(r)), fontsize=5, ha='center', color=val, transform=axs, bbox=bboxkws, **textkws)
                ax.text(0.97, 0.02, "n = "+str('{:,.0f}'.format(n)), fontsize=4, ha='right',  color='#7E7E7E', transform=axs,  bbox=bboxkws, **textkws)
                #        -         -        -        -         -        -         -        -        -         -        -        -         - 
                #ax.axes.xaxis.set_visible(False); ax.axes.yaxis.set_visible(False)
                ax.set_xticks([]); ax.set_yticks([]); ax.set_xticklabels([]); ax.set_yticklabels([])
                sns.despine(); ax.spines['left'].set_color('#b8bdbf'); ax.spines['bottom'].set_color('#b8bdbf')
                [x.set_linewidth(.25) for x in ax.spines.values()]
                if count_feat == 6:
                    ax.set_xlabel(a, fontsize=4, color='#b8bdbf', x = 0.2)
                    ax.set_ylabel(b, fontsize=4, color='#b8bdbf', y = 0.2)
                else:
                    ax.set_xlabel('', fontsize=0); ax.set_ylabel('', fontsize=0)
    
            #        -         -        -        -         -        -         -        -        -         -        -        -         -                  
                plt.legend([],[],frameon=False)
            #        -         -        -        -         -        -         -        -        -         -        -        -         -             
            except ValueError as err:
                print('An exception ocurred')
                print(err)
            #        -         -        -        -         -        -         -        -        -         -        -        -         -              
            count_feat += 1

        plt.tight_layout(pad=0.5);
        plt.suptitle('Correlation Prot Expression '+str(a)+' vs '+ str(b), color='#b8bdbf', fontsize=6, x=0.01, y=1.05, ha='left')
        plt.show(); #plt.close()   
        #plt.savefig("tcga.png", format="png", bbox_inches="tight", dpi=300)

In [None]:
print("'CPTAC_FragPipe_TMT-iBAQ_ProtCorr_v01' script run time:", f'{time.time()-start:.0f}', "seconds.")