# Cell Types Updated
This notebook will calculate the average surface marker level for each cell dendrogram cluster. 

In [1]:
import pandas as pd
import numpy as np
from clustergrammer_widget import *
net = Network(clustergrammer_widget)

In [2]:
# load plasma.txt original data (e.g. not normalized, etc)
net.load_file('../cytof_data/Plasma_clean.txt')
df_plasma = net.export_df()

# load pma.txt original data
net.load_file('../cytof_data/PMA_clean.txt')
df_pma = net.export_df()

In [3]:
# set colors
net.set_cat_color('row', 1, 'Majority-Treatment: Plasma', 'blue')
net.set_cat_color('row', 1, 'Majority-Treatment: PMA', 'red')

# Downsample Data
Plasma and PMA datasets will be downsampled so that each will consist of 1000 cell clusters. I will also downsample based on surface marker data.

In [4]:
net.load_df(df_plasma)
net.filter_cat('col', 1, 'Marker-type: surface marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
ds_data_plasma = net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.clip(-10,10)
ds_plasma = net.export_df()

  init_size=init_size)


In [6]:
net.load_df(df_pma)
net.filter_cat('col', 1, 'Marker-type: surface marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
ds_data_pma = net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.clip(-10,10)
ds_pma = net.export_df()

# Merge Downsampled Plasma and PMA
Downsampled versions of the Plasma and PMA surface marker data will be merged and hierarchically clustered. Dendrogram level 3 clusters will be used to generate cell-cluster categories.  

In [21]:
ds_merge = pd.concat([ds_plasma, ds_pma])

# drop number in clust from row cateogory
rows = ds_merge.index.tolist()
new_rows = []
for inst_row in rows:
    inst_name = inst_row[0]
    inst_cat = inst_row[1]
    inst_tuple = (inst_name, inst_cat)
    new_rows.append(inst_tuple)
    
ds_merge.index = new_rows

## Level 4 Dendrogram Categories
Level 4 of the dendrogram defines 15 clusters. 

In [30]:
net.load_df(ds_merge)
net.cluster(views=[])
net.dendro_cats('row', dendro_level=4)
net.cluster(views=[])
net.widget()

In [31]:
ds_merge_4 = net.export_df()

In [37]:
ds_merge_4.to_csv('../cytof_data/ds_merge_level_4.txt', sep='\t')

get categories

In [75]:
rows = ds_merge_4.index.tolist()
dendro_cats = []
for inst_row in rows:
    dendro_cats.append(inst_row[2])
dendro_cats = sorted(list(set(dendro_cats)))
num_cats = len(dendro_cats)

## Calculate average values of each dendrogram cluster

In [69]:
mean_merge_4 = pd.DataFrame()
rows = []
for i in range(num_cats):
    inst_cat = 'Group 4: cat-' + str(i + 1)
    net.load_df(ds_merge_4)
    net.filter_cat('row', 2, inst_cat)
    tmp = net.export_df()
    tmp_mean = tmp.mean(axis=0)
    
    mean_merge_4 = pd.concat([mean_merge_4, tmp_mean], axis=1)
    rows.append(inst_cat.replace('cat-', 'cluster-'))
    
# transpose
mean_merge_4 = mean_merge_4.transpose()
mean_merge_4.index = rows

In [80]:
mean_merge_4.to_csv('../cytof_data/mean_merge_4.txt', sep='\t')
print(mean_merge_4.shape)

(15, 18)


In [71]:
net.load_df(mean_merge_4)
net.cluster(views=[])
net.widget()

## Level 3 Dendrogram Categories
Level 3 of the dendrogram defines 27 clusters.

In [33]:
net.load_df(ds_merge)
net.cluster(views=[])
net.dendro_cats('row', dendro_level=3)
net.cluster(views=[])
net.widget()

In [34]:
ds_merge_3 = net.export_df()

In [38]:
ds_merge_3.to_csv('../cytof_data/ds_merge_level_3.txt', sep='\t')

In [76]:
rows = ds_merge_3.index.tolist()
dendro_cats = []
for inst_row in rows:
    dendro_cats.append(inst_row[2])
dendro_cats = sorted(list(set(dendro_cats)))
num_cats = len(dendro_cats)

In [77]:
mean_merge_3 = pd.DataFrame()
rows = []
for i in range(num_cats):
    inst_cat = 'Group 3: cat-' + str(i + 1)
    net.load_df(ds_merge_3)
    net.filter_cat('row', 2, inst_cat)
    tmp = net.export_df()
    tmp_mean = tmp.mean(axis=0)
    
    mean_merge_3 = pd.concat([mean_merge_3, tmp_mean], axis=1)
    rows.append(inst_cat.replace('cat-', 'cluster-'))
    
# transpose
mean_merge_3 = mean_merge_3.transpose()
mean_merge_3.index = rows

In [81]:
mean_merge_3.to_csv('../cytof_data/mean_merge_3.txt', sep='\t')
mean_merge_3.shape

(27, 18)

In [79]:
net.load_df(mean_merge_3)
net.cluster(views=[])
net.widget()