In [None]:
#!/usr/bin/env python

"""
Preliminary analyses focused on looking at TIL data.
"""

__author__ = "Daniel Ranti"
__license__ = "Open Access"
__version__ = "1.0.1"
__maintainer__ = "Daniel Ranti"
__email__ = "daniel.l.ranti@gmail.com"
__status__ = "Development"

# Standard Imports
import os
import pandas as pd
import numpy as np
import scipy
import logging

# Third Party Imports
from anndata import AnnData
import anndata as ad
import scanpy as sc
import scanpy.external as sce
import graphtools as gt
import phate
import scprep
import meld
import cmocean
import sklearn
from FlowCytometryTools import FCMeasurement
from joblib import Parallel, delayed

# Plotting Imports
import seaborn as sns
import matplotlib.pyplot as plt

plt.rc("font", size=14)
sc.set_figure_params(
    facecolor="white",
    figsize=(8, 8),
    transparent=True,
    fontsize=14,
    dpi_save=200,
    dpi=100,
)
sc.settings.verbosity = 3

# FOR LOGGING
logger = logging.getLogger("CYTOF_analysis_script")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
logger.addHandler(ch)

# making sure plots & clusters are reproducible
np.random.seed(42)

# Figure Directory
FIGDIR = "figures/"

In [None]:
import warnings 
warnings.filterwarnings('ignore')
sc.settings.set_figure_params(transparent=True,dpi_save=300)

# Looking at Ex Vivo Tils

In [None]:
os.chdir(
    "/sc/arion/projects/nmibc_bcg/CancerDiscovery/data/til_experiment_May2022/exvivo_til_cd8/"
)
# Listing and importing the FCS files; extracting their condition as well
fcs_list = []
for file in os.listdir(directory):
    fcs_list.append(pd.read_csv(file.decode("utf-8"), sep='\t', skiprows=[0]))
for fcs in fcs_list:
    temp = AnnData(fcs)
    temp.var_names_make_unique()
    adata_list.append(temp)
total_mtx = adata_list[0]
for mtx in adata_list[1:]:
    total_mtx = AnnData.concatenate(total_mtx, mtx, join="outer")

mtx_abx = total_mtx[:,~total_mtx.var.index.isin(['Event #'])]

# Dimensionality reduction
sc.pp.neighbors(mtx_abx)
sc.tl.umap(mtx_abx)
sc.tl.tsne(mtx_abx)
# Clustering reduction
sc.tl.pca(mtx_abx, n_comps=10)
sce.tl.phenograph(mtx_abx, clustering_algo="leiden", k=50)
sc.tl.dendrogram(mtx_abx, groupby='pheno_leiden')

In [None]:
new_idx = []
for item in mtx_abx.var.index:
    new_idx.append(item.split('_')[0])
mtx_abx.var.index = new_idx

In [None]:
sc.pl.tsne(mtx_abx,color=['GITR','CD161','CXCR3','LAG3','CCR6','NKG2A','2B4','TIGIT','TIM3','CD103','CCR4','CD25'], 
           size=5, 
           vmax=50000,)

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(8,8),constrained_layout=True)
sc.pl.tsne(
    mtx_abx,color='pheno_leiden', 
    size=25, 
    legend_loc='on data',
    legend_fontweight=1000,
    legend_fontsize=20,
    legend_fontoutline=4,
    ax=axs,
    save="21jul2022 C8s tsne of phenograph"
    )

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(8,7),constrained_layout=True)
sc.pl.tsne(
    mtx_abx,color='NKG2A', 
    size=25, 
    ax=axs,
    vmax=50000,
    save='21Jul2022 NKG2A'
    )

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(8,7),constrained_layout=True)
sc.pl.tsne(
    mtx_abx,color='CXCR3', 
    size=25, 
    ax=axs,
    vmax=30000,
    save='21Jul2022 CXCR3'
    )

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(8,7),constrained_layout=True)
sc.pl.tsne(
    mtx_abx,color='PD1', 
    size=25, 
    ax=axs,
    vmax=25000,
    save='21Jul2022 PD1'
    )

fig, axs = plt.subplots(1, 1, figsize=(8,7),constrained_layout=True)
sc.pl.tsne(
    mtx_abx,color='GITR', 
    size=25, 
    ax=axs,
    vmax=50000,
    save='21Jul2022 GITR'
    )
fig, axs = plt.subplots(1, 1, figsize=(8,7),constrained_layout=True)
sc.pl.tsne(
    mtx_abx,color='LAG3', 
    size=25, 
    ax=axs,
    vmax=50000,
    save='21Jul2022 LAG3'
    )

In [None]:
ax = sc.pl.matrixplot(
    mtx_abx, 
    groupby='pheno_leiden',
    var_names=['GITR','CD161','CXCR3','LAG3','CCR6','NKG2A','2B4','TIGIT','TIM3','CD103','CCR4','CD25', 'CD56','PD1'],
    cmap='Blues', standard_scale='var', colorbar_title='column scaled\nexpression', swap_axes=False,
    return_fig=True,
    dendrogram=True,
    figsize=(10,15),
)
ax.add_totals(size=1).show()
ax.savefig('21Jul2022 matrixplot', transparent=True, dpi=300)


# Old Shit

In [None]:
os.chdir(
    "/sc/arion/projects/nmibc_bcg/CancerDiscovery/data/til_experiment_May2022/"
)
# Listing and importing the FCS files; extracting their condition as well
fcs_list = []
fcs_conditions = []
clinical_list = []
directory = os.fsencode(".")
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".fcs"):
        celltype = '_CD8_'
        if celltype in filename:
            if ('No_surface_Ab' in filename) or ('TIL_only' in filename):
                continue
            temp = {}
            condition_list = filename.split('_')[1:-3]
            temp['sampleID'] = condition_list[0]
            temp['k562_type'] = condition_list[1]
            temp['celltype'] = celltype
            try:
                temp['antibody_condition'] = condition_list[2]
            except:
                temp['antibody_condition'] = 'None'
            clinical_list.append(temp)
            fcs_conditions.append('NK')
            fcs_list.append(FCMeasurement(ID=file, datafile=filename))
conditions_df = pd.DataFrame(clinical_list)
conditions_df = conditions_df.replace({
    'K562HLAEPDL1':'E+/PD-L1+', 
    'K562wt':'Wildtype', 
    'K562HLAE':'E+', 
    'aNKG2AaPDL': 'aNKG2AaPDL1'
})

# E+ VS POST
filter_dict = {
    "condition_column": "k562_type",
    "timepoint_column": "antibody_condition",
    "conditions_of_interest": ["Wildtype","E+","E+/PD-L1+"],
    "timepoints_of_interest": ["None", "aNKG2A", "aPDL1","aNKG2AaPDL1"],
}

mtx_prepost = preprocess_cytof(
    fcs_list,
    fcs_conditions,
    conditions_df.to_dict('records'),
    filter_dict,
)

# E+ VS POST
filter_dict = {
    "condition_column": "k562_type",
    "timepoint_column": "antibody_condition",
    "conditions_of_interest": ["Wildtype","E+","E+/PD-L1+"],
    "timepoints_of_interest": ["None", "aNKG2A", "aPDL1","aNKG2AaPDL1"],
}

mtx_prepost = preprocess_cytof(
    fcs_list,
    fcs_conditions,
    conditions_df.to_dict('records'),
    filter_dict,
)

mtx_abx = mtx_prepost[:,mtx_prepost.var.index.isin(['Perforin', 'granzymeA', 'CD45', 'CD3','CD8', 'CD56', 'CD4', 'TIGIT', 'TIM3', 'PD1', 'CD137', 'CD107a', 'LAG3','NKG2A', 'IFNg', 'GM_CSF',])]
sc.pp.neighbors(mtx_abx)
# sc.tl.tsne(mtx_abx)
sc.tl.umap(mtx_abx)
sc.tl.louvain(mtx_abx)

# sc.pl.tsne(mtx_abx, color=['Perforin', 'granzymeA','CD56','TIGIT', 'TIM3',], vmax=100000)
# sc.pl.tsne(mtx_abx, color=['PD1', 'CD137', 'CD107a', 'LAG3','NKG2A', 'IFNg', 'GM_CSF',], vmax=100000)
sc.settings.set_figure_params(transparent=True,dpi_save=300)
sc.pl.tsne(mtx_abx,color=['louvain'], size=1, title='Louvain Clustering', save='Fig 6 TSNE.png')
sc.pl.tsne(mtx_abx,color=['NKG2A'], size=1, title='NKG2A', save='Fig 6 TSNE NKG2A.png', vmax=100000)
sc.pl.tsne(mtx_abx,color=['IFNg'], size=1, title='IFN-g', save='Fig 6 TSNE IFNG.png', vmax=100000)
sc.pl.tsne(mtx_abx,color=['PD1'], size=1, title='PD-1', save='Fig 6 TSNE PD1.png', vmax=100000)
sc.pl.tsne(mtx_abx,color=['TIGIT'], size=1, title='TIGIT', save='Fig 6 TSNE TIGIT.png', vmax=100000)

In [None]:
sc.pl.matrixplot(
    mtx_abx, 
    groupby='louvain',
    var_names=['Perforin', 'granzymeA', 'CD45', 'CD3','CD8', 'CD56', 'CD4', 'TIGIT', 'TIM3', 'PD1', 'CD137', 'CD107a', 'LAG3','NKG2A', 'IFNg', 'GM_CSF',],
    cmap='Blues', 
    standard_scale='var', 
    colorbar_title='column scaled\nexpression', 
    swap_axes=True,
)

In [None]:
e_pdL1_cells = mtx_prepost[mtx_prepost.obs['k562_type'] == 'E+/PD-L1+']
channels_of_interest = ['Perforin', 'granzymeA', 'CD45', 'CD3', 'CD8', 'CD56', 'CD4', 'TIGIT', 'TIM3', 'PD1', 'CD137', 'CD107a', 'LAG3',
       'NKG2A', 'IFNg', 'GM_CSF',]
e_pdL1_cells = e_pdL1_cells[:,e_pdL1_cells.var.index.isin(channels_of_interest)]
e_pdL1_None_Anti2 = e_pdL1_cells[e_pdL1_cells.obs['antibody_condition'].isin(['None', 'aNKG2AaPDL1'])]
e_pdL1_None_Anti1 = e_pdL1_cells[e_pdL1_cells.obs['antibody_condition'].isin(['None', 'aNKG2A'])]



In [None]:
meld_dict = {
    "E+,PD-L1+: None vs Anti NKG2A,PD-L1": {
        "condition1": "None",
        "condition2": "aNKG2AaPDL1",
        "condition_key": "antibody_condition",
    },
    "E+,PD-L1+: None vs Anti NKG2A alone": {
        "condition1": "None",
        "condition2": "aNKG2A",
        "condition_key": "antibody_condition",
    },
}

for iteration, adata in zip(
    [
        "E+,PD-L1+: None vs Anti NKG2A,PD-L1",
        "E+,PD-L1+: None vs Anti NKG2A alone",
     
    ],
    [e_pdL1_None_Anti2, e_pdL1_None_Anti1],
):
    logger.info("{} Iteration has begun".format(iteration))
    condition_key = meld_dict[iteration]["condition_key"]
    condition1 = meld_dict[iteration]["condition1"]
    condition2 = meld_dict[iteration]["condition2"]
    logger.info("Phenograph")

    k = 30
    sc.tl.pca(adata, n_comps=10)
    communities, graph, Q = sce.tl.phenograph(adata.obsm["X_pca"], k=k)
    adata.obs["PhenoGraph_clusters"] = pd.Categorical(communities)
    adata.uns["PhenoGraph_Q"] = Q
    adata.uns["PhenoGraph_k"] = k

    # Drawing UMAP
    logger.info("Drawing UMAP")
    sc.pp.neighbors(adata, n_neighbors=30, n_pcs=10)
    sc.tl.umap(adata)
    sc.pl.umap(
        adata,
        color=["PhenoGraph_clusters", "antibody_condition"],
        title="PhenoGraph Assigned Clusters: {}".format(iteration),
        save="phenograph {} TIL K562 Experiment.png".format(iteration),
    )

    # MELD
    logger.info("Running Meld")
    metadata = run_meld_cytof(
        combined_adata=adata,
        condition1=condition1,
        condition2=condition2,
        condition_key=condition_key,
        cluster_key="PhenoGraph_clusters",
    )
#     metadata.to_csv(
#         "cytof_anndata/cytof_annotated_metadata {} {}.csv".format(condition_key, condition2)
#     )


In [None]:
sc.tl.rank_genes_groups(e_pdL1_cells, 'antibody_condition',method='t-test_overestim_var')
sc.pl.rank_genes_groups(e_pdL1_cells)

In [None]:
dedf = sc.get.rank_genes_groups_df(e_pdL1_cells, group=None)

In [None]:
import math
math.e**np.arcsinh(e_pdL1_cells.X)

In [None]:
temp = e_pdL1_cells[e_pdL1_cells.obs['antibody_condition'].isin(['aNKG2AaPDL1','None'])]
temp.obs = temp.obs.replace({'None':0, 'aNKG2AaPDL1':1})
temp

In [None]:
# import diffxpy.api as de
# test = de.test.wald(
#     data=e_pdL1_cells,
#     formula_loc="~ 1 + antibody_condition",
#     factor_loc_totest="antibody_condition"
# )

# Ex-Vivo CD56+ CD8s

In [None]:
os.chdir(
    "/sc/arion/projects/nmibc_bcg/CancerDiscovery/data/til_experiment_May2022/K562TILs_CD56/"
)
# Listing and importing the FCS files; extracting their condition as well
fcs_list = []
fcs_conditions = []
clinical_list = []
directory = os.fsencode(".")
for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".fcs"):
        filekey = 'T_cells'
        if filekey in filename:
            fcs_list.append(FCMeasurement(ID=file, datafile=filename))
adata_list = []
for fcs in fcs_list:
    temp = AnnData(fcs.data)
    temp.var_names_make_unique()
    adata_list.append(temp)
total_mtx = adata_list[0]
for mtx in adata_list[1:]:
    total_mtx = AnnData.concatenate(total_mtx, mtx, join="outer")
mtx_abx = total_mtx[:,total_mtx.var.index.isin(['GITR','CD161','CXCR3','LAG3','CCR6','NKG2A','2B4','TIGIT','TIM3','CD103','CCR4','CD8','CD25','PD1'])]
sc.pp.filter_cells(mtx_abx, min_genes=1)

sc.pp.neighbors(mtx_abx)
sc.tl.tsne(mtx_abx)
sc.tl.louvain(mtx_abx)
sc.settings.set_figure_params(transparent=True,dpi_save=300)

# sc.pl.tsne(mtx_abx,color=['NKG2A'], size=1, title='NKG2A', save='Fig 6 TSNE NKG2A.png', vmax=100000)
# sc.pl.tsne(mtx_abx,color=['IFNg'], size=1, title='IFN-g', save='Fig 6 TSNE IFNG.png', vmax=100000)
# sc.pl.tsne(mtx_abx,color=['PD1'], size=1, title='PD-1', save='Fig 6 TSNE PD1.png', vmax=100000)
# sc.pl.tsne(mtx_abx,color=['TIGIT'], size=1, title='TIGIT', save='Fig 6 TSNE TIGIT.png', vmax=100000)

# sc.pl.tsne(mtx_abx,color=['louvain'], size=5, title='Louvain Clustering CD8 Ts', save='Fig 6 exvivo TILS NK Louvain.png')
# sc.pl.tsne(mtx_abx,color=['NKG2A'], size=5, title='CD8s: NKG2A', vmax=100000, save='Fig 6 exvivo TILS NK Louvain NKG2A.png')
# sc.pl.tsne(mtx_abx,color=['GITR','CD161','CXCR3','LAG3','CCR6','NKG2A','2B4','TIGIT','TIM3','CD103','CCR4','CD8','CD25'], 
           size=5, 
           vmax=100000,)
# sc.pl.tsne(mtx_abx,color=['TIGIT'], size=5, title='CD8s: TIGIT', vmax=100000, save='Fig 6 exvivo TILS NK Louvain TIGIT.png')
# sc.pl.tsne(mtx_abx,color=['TIM3'], size=5, title='CD8s: TIM3', vmax=100000, save='Fig 6 exvivo TILS NK Louvain TIM3.png')

In [None]:
sc.pl.tsne(mtx_abx,
           color=['GITR','LAG3','NKG2A','TIGIT'], 
           size=5, 
           wspace=0.3,
           vmax=100000,
           save='checkpoints on cd56s'
          )
sc.pl.tsne(mtx_abx,
           color='louvain', 
           size=5, 
           wspace=0.3,
           vmax=100000,
           save='louvaincd56s'
          )


In [None]:
sc.tl.pca(mtx_abx, n_comps=10)
sce.tl.phenograph(mtx_abx, clustering_algo="leiden", k=50)
sc.tl.dendrogram(mtx_abx, groupby='pheno_leiden')

In [None]:
ax = sc.pl.matrixplot(
    mtx_abx, 
    groupby='pheno_leiden',
    var_names=['GITR','CD161','CXCR3','LAG3','CCR6','NKG2A','2B4','TIGIT','TIM3','CD103','CCR4','CD8','CD25','PD1'],
    cmap='Blues', standard_scale='var', colorbar_title='column scaled\nexpression', swap_axes=True,
    #save='',
    return_fig=True,
    dendrogram=True,
    figsize=(10,5),
)
ax.add_totals().show()
ax.savefig('cd56 CD8s phenograph 07162022', dpi=300)


In [None]:
sc.pl.tsne(mtx_abx,
           color='pheno_leiden', 
           size=5, 
           wspace=0.3,
           vmax=100000,
           legend_loc='on data',
           #size=(10,10)
           save='phenograph_tsne 07162022' ,
           title=''
          )


In [None]:
sc.pl.tsne(mtx_abx,
           color='CCR4', 
           size=5, 
           wspace=0.3,
           vmax=50000,
           save='CCR4 07162022',
           title=''
          )
sc.pl.tsne(mtx_abx,
           color='NKG2A', 
           size=5, 
           wspace=0.3,
           vmax=100000,
           save='NKG2A 07162022',
           title=''
          )
sc.pl.tsne(mtx_abx,
           color='PD1', 
           size=5, 
           wspace=0.3,
           vmax=30000,
           save='PD1 07162022',
           title=''
          )
sc.pl.tsne(mtx_abx,
           color='TIGIT', 
           size=5, 
           wspace=0.3,
           vmax=100000,
           save='TIGIT 07162022',
           title=''
          )

In [None]:
sc.pl.tsne(mtx_abx,
           color='CCR4', 
           size=5, 
           wspace=0.3,
           vmax=50000,
#            vmin=0,
           save='CCR4 07162022',
           title=''
          )