#  cells participating in clusters of T-cells (group statistics)

In [1]:
import pickle
from termcolor import colored
import sys
from collections import  Counter
lib = r'D:\Technion studies\Keren Laboratory\python_playground\classifying-response-to-immunotherapy'
sys.path.append(lib)
from utilities.smart_seq_dataset import RNAseq_Dataset
def extract_data_from_pickle(pickle_path):
    """
    Retrieves data from PC located in PICKLE_PATH.
    :return: cells_form, gene_names, patients_information
    """
    cells_form, gene_names, patients_information = pickle.load(open(pickle_path, "rb"))
    return cells_form, gene_names, patients_information


# PICKLE_PATH = r'DATA\1-16291cells.p'
# PICKLE_PATH = r'D:\Technion studies\Keren Laboratory\Data\smart_seq\SmartSeq_RNAseq_DATA.p'
PICKLE_PATH = r'D:\Technion studies\Keren Laboratory\python_playground\outputs\smart_seq\SmartSeq_RNAseq_DATA_3.1.2021.p'

Retrieve Dataset:

In [2]:
cells, gene_names, patients_information = extract_data_from_pickle(PICKLE_PATH)
dataset = RNAseq_Dataset(cells, patients_information, gene_names)

All Supervides cell typs:

In [4]:
cell_types = ['T cells', 'CD4 helper T cells', 'CD8 Cytotoxic T cells', 'Regulatory T cells', 'Regulatory CD4 T cells', 'Regulatory CD8 T cells', 'Regulatory CD4_CD8 T cells', 'NKT cells', 'NK cells', 'B cells', 'Activated T cells', 'Senescence T cells', 'Terminal effector', 'Exhausted T cells', 'Stem_like T cells', 'Memory T cells', 'Memory CD4 T cells', 'Memory CD8 T cells', 'Memory CD4_CD8 T cells', 'Macrophage_immature', 'Macrophage_mature', 'Monocyte_immature', 'Monocyte_mature', 'cDCs_dendritic_cells', 'pDCs', 'myeloid cells_general_immature', 'myeloid cells_general_mature', 'Neutrophils', 'Granolocytes', 'CD4+CD8+ T cells']

In [7]:
# some helper functions
def flatten_list(l):
    return [item for sublist in l for item in sublist]
def sort(_counter):
    return sorted(_counter.items(), key=lambda pair: pair[0], reverse=False)

def search_in_list(count_list, key):
    d = {v[0]:v[1] for v in count_list}
    return d.get(key, 0)

def is_overlap(l1 , l2):
    return len([f for f in l1 if f in l2])!=0

Cells participating in T-cells cluster (CD8_B and CD8_G) amount:

In [8]:
print("total number of cells:")
print(len(patients_information))
print("number of cells participating in T-cell cluster:")
print(len([p['general 11 cluster'] for p in patients_information if p['T-cell 6 cluster']]))

total number of cells:
16291
number of cells participating in T-cell cluster:
6350


See distibutation of participated cells in cluster over supervised cell typs:

In [9]:
print(colored("For each cell-type shows amount of cells participating in CD8 clustering (CD8_B and CD8_G):", 'red'), end='\n\n')


total = sort(Counter(flatten_list([p['supervised classification'] for p in patients_information])))
is_t = sort(Counter(flatten_list([p['supervised classification'] for p in patients_information if p['T-cell 6 cluster']])))
is_n_t = sort(Counter(flatten_list([p['supervised classification'] for p in patients_information if not p['T-cell 6 cluster']])))

print("supervised classification        total cells   |   participating in CD8 Clustering   |   not participating in CD8 Clustering")
for ct in cell_types:
    print('%30s   %12d%32d%32d' % (ct, search_in_list(total,ct), search_in_list(is_t,ct), search_in_list(is_n_t,ct)))


[31mFor each cell-type shows amount of cells participating in CD8 clustering (CD8_B and CD8_G):[0m

supervised classification        total cells   |   participating in CD8 Clustering   |   not participating in CD8 Clustering
                       T cells           9990                            6350                            3640
            CD4 helper T cells           2196                               0                            2196
         CD8 Cytotoxic T cells           6350                            6350                               0
            Regulatory T cells            140                               0                             140
        Regulatory CD4 T cells            718                               0                             718
        Regulatory CD8 T cells             28                               0                              28
    Regulatory CD4_CD8 T cells             12                               0                              12
   

See distibutation of participated cells in cluster over general 11-clusters:

In [50]:
print(colored("Each cluster in General_11_Clusters shows amount of cells participating in T-cells cluster (CD8_B and CD8_G):", 'red'), end='\n\n')

is_t = sort(Counter([p['general 11 cluster'] for p in patients_information if p['T-cell 6 cluster']]))
is_n_t = sort(Counter([p['general 11 cluster'] for p in patients_information if not p['T-cell 6 cluster']]))
total = sort(Counter([p['general 11 cluster'] for p in patients_information]))
print("number of cells:   total cells   |   participating in t-cell Cluster   |   not participating in T-cell Cluster")
for i in range(1, 12):
    print('cluster %3s  %12d%32d%32d' % ('G'+str(i), search_in_list(total,i), search_in_list(is_t,i), search_in_list(is_n_t,i)))

[31mEach cluster in General_11_Clusters shows amount of cells participating in T-cells cluster (CD8_B and CD8_G):[0m

number of cells:   total cells   |   participating in t-cell Cluster   |   not participating in T-cell Cluster
cluster  G1          1455                               1                            1454
cluster  G2           305                              17                             288
cluster  G3          1391                               1                            1390
cluster  G4           290                               1                             289
cluster  G5          2165                            1246                             919
cluster  G6          2222                            1603                             619
cluster  G7          1740                              34                            1706
cluster  G8          2165                            1225                             940
cluster  G9          1656                        

General cluster distribution over cell-types

In [51]:
print(colored("Each cluster in General_11_Clusters shows amount of appearances of each supervised cell-type", 'red'), end='\n\n')

print("Supervised Classification|Cluster:", end=' ')

g_clusters = [[] for i in range(11)]
for i in range(11):
    print("%2s "%('G'+str(i+1)), end="     ")
    g_clusters[i] = sort(Counter(flatten_list([p['supervised classification'] for p in patients_information if p['general 11 cluster']==i+1])))
print()
for ct in cell_types:
    print(' %30s' % (ct), end=" ")
    for i in range(11):
        print('%5d |' % (search_in_list(g_clusters[i], ct)), end=" ")
    print()

[31mEach cluster in General_11_Clusters shows amount of appearances of each supervised cell-type[0m

Supervised Classification|Cluster: G1      G2      G3      G4      G5      G6      G7      G8      G9      G10      G11      
                        T cells     6 |    36 |    14 |    43 |  1849 |  1796 |   834 |  1660 |  1451 |  1481 |   820 | 
             CD4 helper T cells     3 |    17 |    10 |    42 |   295 |    87 |   630 |   209 |   119 |   611 |   173 | 
          CD8 Cytotoxic T cells     1 |    17 |     1 |     1 |  1246 |  1603 |    34 |  1225 |  1194 |   472 |   556 | 
             Regulatory T cells     2 |     0 |     0 |     0 |     6 |     0 |   107 |     1 |     0 |    23 |     1 | 
         Regulatory CD4 T cells     0 |     0 |     5 |     0 |    19 |     6 |   574 |     1 |     7 |    46 |    60 | 
         Regulatory CD8 T cells     0 |     0 |     0 |     0 |     2 |     5 |     7 |     0 |     9 |     3 |     2 | 
     Regulatory CD4_CD8 T cells     0 |     0