# Overview of CyTOF Data
The original data was given as two tab-separated matrices
* ``Plasma.txt`` (original name: 160202_CGI002_Plasma_Plasma_singlets.fcs_raw_events.txt)
* ``PMA.txt`` (original name: 160202_CGI002_PMA_PMA_singlets.fcs_raw_events.txt)

These files had individual cell measurements as rows and dimensions (e.g. antibodies) as columns. I only kept the dimensions of interest surface marker and phospho marker antibody columns/dimensions and renamed these files ``Plasma_clean.txt`` and ``PMA_clean.txt``.

# Plasma

In [6]:
import pandas as pd
import numpy as np

from clustergrammer_widget import *
net = Network()

In [7]:
net.load_file('cytof_data/Plasma_clean.txt')
net.random_sample(axis='row',num_samples=110000, random_state=99)
df_plasma = net.export_df()
df_plasma.shape

(110000, 28)

In [8]:
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
ds_data_plasma = net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.dat['mat'].shape

(1000, 28)

In [12]:
# downsampling data can be used to link original to downsampled data
ds_data_plasma.shape

(110000,)

In [4]:
net.dat['mat'].shape

(1000, 28)

In [5]:
# clip z-scores since we do not are about extreme outliers
net.clip(-10,10)
net.write_matrix_to_tsv('cytof_data/ds_plasma.txt')
net.set_cat_color('row', 1, 'Majority-Treatment: Plasma', 'blue')
net.set_cat_color('row', 1, 'Majority-Treatment: PMA', 'red')

In [6]:
net.make_clust(views=[])
clustergrammer_widget(network=net.widget())

## Plasma Surface Markers Only

In [7]:
net.viz['cat_colors']

{'col': {'cat-0': {'Marker-type: phospho marker': '#aec7e8',
   'Marker-type: surface marker': '#ffbb78'}},
 'row': {'cat-0': {'Majority-Treatment: PMA': 'red',
   'Majority-Treatment: Plasma': 'blue',
   'Treatment: Plasma': '#393b79'},
  'cat-1': {}}}

In [8]:
net.load_df(df_plasma)
net.filter_cat('col', 1, 'Marker-type: surface marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.clip(-10,10)
net.dat['mat'].shape

(1000, 18)

In [9]:
net.viz['cat_colors']

{'col': {'cat-0': {'Marker-type: phospho marker': '#aec7e8',
   'Marker-type: surface marker': '#ffbb78'}},
 'row': {'cat-0': {'Majority-Treatment: PMA': 'red',
   'Majority-Treatment: Plasma': 'blue',
   'Treatment: Plasma': '#393b79'},
  'cat-1': {}}}

In [10]:
net.make_clust(views=[])
clustergrammer_widget(network=net.widget())

# PMA

In [11]:
net.load_file('cytof_data/PMA_clean.txt')
net.random_sample(axis='row',num_samples=110000, random_state=99)
df_pma = net.export_df()

In [12]:
net.load_df(df_pma)
df_pma.shape

(110000, 28)

In [13]:
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.dat['mat'].shape
net.clip(-10,10)
net.write_matrix_to_tsv('cytof_data/ds_pma.txt')

In [14]:
net.make_clust(views=[])
clustergrammer_widget(network=net.widget())

## PMA Surface Markers Only

In [15]:
net.load_df(df_pma)
net.filter_cat('col', 1, 'Marker-type: surface marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.clip(-10,10)
net.dat['mat'].shape

(1000, 18)

In [16]:
net.make_clust(views=[])
clustergrammer_widget(network=net.widget())

# Merge Plasma and PMA

In [17]:
df_merge = pd.concat([df_plasma, df_pma])

In [18]:
df_merge.shape

(220000, 28)

In [19]:
net.load_df(df_merge)

In [20]:
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.clip(-10,10)
net.dat['mat'].shape

(1000, 28)

In [21]:
net.make_clust(views=[])
clustergrammer_widget(network=net.widget())

# Plasma vs PMA based on Surface markers only

In [22]:
df_merge = pd.concat([df_plasma, df_pma])
net.load_df(df_merge)

In [23]:
net.filter_cat('col', 1, 'Marker-type: surface marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.clip(-10,10)
net.dat['mat'].shape

(1000, 18)

In [24]:
net.make_clust(views=[])
clustergrammer_widget(network=net.widget())

# Plasma vs PMA based on Phospho markers only

In [25]:
df_merge = pd.concat([df_plasma, df_pma])
net.load_df(df_merge)

In [26]:
net.filter_cat('col', 1, 'Marker-type: phospho marker')
net.normalize(axis='col', norm_type='zscore', keep_orig=False)
net.downsample(ds_type='kmeans', axis='row', num_samples=1000)
net.clip(-10,10)
net.dat['mat'].shape

(1000, 10)

In [27]:
net.make_clust(views=[])
clustergrammer_widget(network=net.widget())

PMA and Plasma treated cells separate more based on phospho markers than based on surface markers. This makes sense since PMA treatment is expected to influence phosphorylation levels.