-
Notifications
You must be signed in to change notification settings - Fork 10
/
imc_generatespillmat_long.Rmd
executable file
·270 lines (203 loc) · 8.12 KB
/
imc_generatespillmat_long.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
---
title: "Spillover estimation IMC"
author: 'Vito Zanotelli et al.'
output:
html_document:
df_print: paged
keep_md: true
---
# Aim
This script shows how to estimate spillover from single metal spots on an agarose coated slide.
Each spot should be imaged with a single acquisition. The name of the acquisition should be the metal that is used:
E.g. PanormaA_1_Yb176_23.txt
When run with the example data it reproduces the spillover estimation shown in Fig S5A as well as Fig 4A
# Script
## load all libraries
```{r Libraries, message=FALSE}
library(CATALYST)
library(data.table)
library(ggplot2)
library(flowCore)
library(dplyr)
library(dtplyr)
library(stringr)
library(ggpmisc)
source('spillover_imc_helpers.R')
```
## setup the configuration variables
```{r Setup}
# list of folders that contain each a complete single stain acquisition (e.g. in case that one wants to run and compare multiple single stains from different days)
fols_ss = c('../data/Figure_S5/Spillover_Matrix_2','../data/Figure_S5/Spillover_Matrix_1' )
# output folder
fol_out = '../data/Figure_S5/'
# name prefix for all output
prefix ='paper_version_'
```
## load single stains
### Data loading
```{r}
# load the data
list_img_ss <-lapply(fols_ss, load_ss_fol)
names(list_img_ss) <- fols_ss
```
### Adapt the column names to be recognized metal names by CATALYST
CATALYST needs to have the metal names in the format (METAL)(MASS)Di
```{r}
list_img_ss = lapply(list_img_ss, function(x) lapply(x, fixnames))
dats_raw = lapply(list_img_ss, imglist2dat)
```
### Extract the single stain masses from the acquisition name
This needs to be changed in case a different naming scheme is used!
```{r Get bc masses}
for (dat in dats_raw){
dat[, metal:= strsplit(.BY[[1]], '_')[[1]][3],by=file]
dat[, mass:= as.numeric(str_extract_all(.BY[[1]], "[0-9]+")[[1]]),by=metal]
}
```
## Visualization of the raw data
In the following section the raw data is visualized
### Calculate per-file medians
```{r}
dats_raw_sum = rbindlist(lapply(dats_raw, calc_file_medians),idcol = T)
```
### Visualize per-file medians
Plots the median of the data. It is recommended to have >200 counts for all the channels.
This is also a good plot to check if the metal spots really contain the correct metal!
```{r fig.height=13, fig.width=20}
dats_raw_sum %>%
ggplot(aes(x=1, y=med, color=.id))+
facet_wrap(~file+metal, scales = 'free_y')+
geom_label(aes(label=variable), size=4)
```
### Optional data bining
If the median per-pixel intensities are to low, it could be worth to sum up some consecuteive pixels to get a better accuracy for the estimation
(here not the case). This is valid because for segmentation based quantitative image analysis usually anyways pixels are aggregated. If the binning is choosen to big, there is however a potential accumulation of background noise.
```{r}
# defines over how many pixels the aggregation should happen
# 1 = no aggregation
npixelbin = 1
dats_agg <- lapply(dats_raw, function(x) aggregate_pixels(x, n=npixelbin))
dats_agg_sum = rbindlist(lapply(dats_agg, calc_file_medians), idcol = T)
```
### Visualize per-file medians after binning
The intensities increase according to the aggregation factor
```{r fig.width=17, fig.height=10}
dats_agg_sum %>%
ggplot(aes(x=1, y=med, color=.id))+
facet_wrap(~file+metal, scales = 'free_y')+
geom_label(aes(label=variable))
```
## CATALYST based compensation
## estimate the spillover
To estimate the spillover, the (aggregated) pixel values are first debarcoded using CATALYST, treating them like single cells. This step acts as a quality filter to remove background/noisy/weak pixels as well as pixels with artefacts (e.g. specles with strong signal in many channels).
If the true metal was correctly encoded in the filename, the 'remove_incorrect_bc' option will check the debarcoding and remove events assigned to the wrong barcode.
Then this identified, strong single stain pixels will be used for the spillover estimation.
```{r Binned}
res = lapply(dats_agg, function(x) re_from_dat(x,
ss_ms=x[!is.na(mass), unique(mass)],
minevents = 40,
correct_bc = T))
sms = lapply(res, function(x) computeSpillmat(x))
```
### save the spillover matrices
```{r}
for (i in seq_along(sms)){
outname = file.path(fol_out, paste0(prefix, basename(fols_ss[i]),'_sm.csv'))
write.csv(sms[[i]],file = outname)
}
```
### Visualization of the spillover matrix
```{r}
for (i in seq_along(sms)){
print(names(dats_agg)[i])
ss_ms = dats_agg[[i]][!is.na(mass), unique(mass)]
p = CATALYST::plotSpillmat(ss_ms,sms[[i]])
print(p)
}
```
### Some quality indicators
Here we calculate e.g. number of debarcoded events/metal, median levels of highest signal and second highest signal
```{r}
for (i in seq_along(res)){
dat = dats_agg[[i]]
re = res[[i]]
name = names(dats_agg)[i]
tdat = dat %>%
mutate(bcid = bc_ids(re)) %>%
filter(bcid != '0') %>%
dplyr::select(-c(Start_push, End_push, Pushes_duration, X , Y ,Z)) %>%
melt.data.table(id.vars = c('metal', 'mass','file', 'bcid')) %>%
do(data.table(.)[, list(med=median(value), n=.N), by=.(variable, metal, mass, bcid,file)])
# find the highest metal, second highest metal
sumdat = tdat[ , .(
highestvariable = variable[med == max(med)],
highestmed = max(med),
secondhighestvariable = variable[med == sort(med,partial=length(med)-1)[length(med)-1]],
secondhighestmed = sort(med,partial=length(med)-1)[length(med)-1],
n=max(n)
) ,by=.( mass, bcid,file)]
print(sumdat)
}
```
```{r Binned uncor}
res_uncor = lapply(dats_agg, function(x) re_from_dat(x,
ss_ms=x[!is.na(mass), unique(mass)],
minevents = 40,
correct_bc = NULL ))
sms_uncor = lapply(res_uncor, function(x) computeSpillmat(x))
```
Assure that the results are exactly the same when enforcing that no debarcoding error happened by using the annotation from the file names:
```{r}
ndig = 8
for (i in seq_along(sms)){
print('all equal?')
print(all(round(sms_uncor[[i]], digits=ndig) == round(sms[[i]],digits = ndig)))
#diffmat = abs(round(sms_uncor[[i]], digits=ndig)-round(sms[[i]],digits = ndig))/round(sms[[i]],digits = ndig)
#match(T,diffmat > 0.01)
}
```
-> The results are exactly equal. Thus just debarcoding - without using any information about where the pixels actually belong - seems to be a vaild option to estimate the spillover.
## The plot below reproduces plots to check the linearity of spillover
### Define a helper function
```{r}
plot_binplot <- function(imgs, fn, x_var, y_var, perc=0.99, nbins=100, fkt=median){
# This function makes a 'quantile' binning, binning the data in nbins that contain an equal number of events.
dat = copy(imgs[[fn]])
print(dat)
#dat[, bins:= cut(get(x_var), seq(0, quantile(get(x_var),perc),length.out = nbins), right=T, include.lowest = T)]
dat = subset(dat, get(x_var) < quantile(get(x_var),perc))
dat[, bins:= ntile(get(x_var), nbins)]
x = melt.data.table(dat, id.vars = 'bins')
x = x[, .(binmean=fkt(value)), by=.(bins, variable)]
x = dcast.data.table(x[!is.na(bins),], 'bins~variable', value.var='binmean')
ggplot(x, aes(x=get(x_var), y=get(y_var))) +
geom_smooth(method = 'lm', alpha =0.5)+
geom_point()+
xlab(x_var) +
ylab(y_var)+
expand_limits(x=0, y=0)+
stat_poly_eq(formula=as.formula('y~x'), aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
parse = TRUE) +
theme(aspect.ratio=1)
}
```
Plot the spillover relationships
```{r}
pltimgs=list_img_ss[[1]]
fn = "Er166_27_Er166_28.txt"
x_var = "Er166Di"
y_var= "Er167Di"
p = plot_binplot(pltimgs, fn, x_var, y_var, perc=0.95, nbins=20, fkt = median)
p= p+ggtitle('Fig4 A upper')
print(p)
fn ="Er166_27_Er166_28.txt"
x_var = "Er166Di"
y_var= "Er168Di"
p = plot_binplot(pltimgs, fn, x_var, y_var, perc=0.9, nbins=25, fkt = median)
p= p+ggtitle('Fig4 A lower')
print(p)
```
-> This reproduces Fig 4A
```{r}
sessionInfo()
```