/
04e_replicability_of_top_DMPs.R
236 lines (189 loc) · 12.5 KB
/
04e_replicability_of_top_DMPs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
library(dplyr)
library(tidyr)
check_replication= function(discovery_names = a1,
discovery_effect_size= a2,
replication_names = a3,
replication_p = a4,
replication_effect_size = a5) {
shared_names = unique(intersect(discovery_names, replication_names))
disc_i=match(shared_names, discovery_names)
rep_i=match(shared_names, replication_names)
N = sum(replication_p[rep_i] <0.05 & sign(discovery_effect_size[disc_i]) == sign(replication_effect_size[rep_i]),na.rm=F )
percent = N*100/length(shared_names)
return( c(N,percent) )
}
###Check replicability of our top genes
## Load our statistics
load('/dcl01/lieber/ajaffe/Steve/Alz/rdas/tidyStats_caseControl_DMC_singleRegion.rda')
## Load watson et al 2016: STG
load('/dcl01/lieber/ajaffe/Steve/Alz/GSE76105/GSE76105_Watson_mergedStats.rda')
colnames(Watson_mergedStats) = paste0(colnames(Watson_mergedStats), ": ", "Watson2016")
## Load lunnon et al 2014: Multiregion
load('/dcl01/lieber/ajaffe/Steve/Alz/GSE59685/GSE59685_Lunnon_mergedStats.rda')
colnames(Lunnon_mergedStats) = paste0(colnames(Lunnon_mergedStats), ": ", "Lunnon2014")
##
sigSingleRegion_tidyStats = singleRegion_tidyStats[ singleRegion_tidyStats[,'adj.P.Val']<0.1, ]
singleRegion_DMP_Replication=group_by(sigSingleRegion_tidyStats,Region, Model, CellTypeAdjustment) %>% summarise(
FDR_10=n() ,
Lunnon_PFC_Dx_N= check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`PFC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`PFC_Dx_logFC: Lunnon2014`)[1],
Lunnon_PFC_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`PFC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`PFC_Dx_logFC: Lunnon2014`)[2],
Lunnon_STG_Dx_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`STG_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`STG_Dx_logFC: Lunnon2014`)[1],
Lunnon_STG_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`STG_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`STG_Dx_logFC: Lunnon2014`)[2],
Lunnon_ERC_Dx_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`ERC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`ERC_Dx_logFC: Lunnon2014`)[1],
Lunnon_ERC_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`ERC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`ERC_Dx_logFC: Lunnon2014`)[2],
Lunnon_CRB_Dx_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`CRB_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`CRB_Dx_logFC: Lunnon2014`)[1],
Lunnon_CRB_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`CRB_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`CRB_Dx_logFC: Lunnon2014`)[2],
Watson_STG_Dx_unAdj_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_Unadjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_Unadjusted_logFC: Watson2016`)[1],
Watson_STG_Dx_unAdj_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_Unadjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_Unadjusted_logFC: Watson2016`)[2],
Watson_STG_Dx_Adj_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_cellCompAdjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_cellCompAdjusted_logFC: Watson2016`)[1],
Watson_STG_Dx_Adj_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_cellCompAdjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_cellCompAdjusted_logFC: Watson2016`)[2]) %>% as.data.frame()
top100SingleRegion_tidyStats = group_by(singleRegion_tidyStats, Region, Model, CellTypeAdjustment) %>% top_n(n = 100, wt = `P.Value`)
singleRegion_DMP_Replication_top100=group_by(top100SingleRegion_tidyStats,Region, Model, CellTypeAdjustment) %>% summarise(
N_CpG=n() ,
Lunnon_PFC_Dx_N= check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`PFC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`PFC_Dx_logFC: Lunnon2014`)[1],
Lunnon_PFC_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`PFC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`PFC_Dx_logFC: Lunnon2014`)[2],
Lunnon_STG_Dx_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`STG_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`STG_Dx_logFC: Lunnon2014`)[1],
Lunnon_STG_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`STG_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`STG_Dx_logFC: Lunnon2014`)[2],
Lunnon_ERC_Dx_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`ERC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`ERC_Dx_logFC: Lunnon2014`)[1],
Lunnon_ERC_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`ERC_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`ERC_Dx_logFC: Lunnon2014`)[2],
Lunnon_CRB_Dx_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`CRB_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`CRB_Dx_logFC: Lunnon2014`)[1],
Lunnon_CRB_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`CRB_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`CRB_Dx_logFC: Lunnon2014`)[2],
Watson_STG_Dx_unAdj_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_Unadjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_Unadjusted_logFC: Watson2016`)[1],
Watson_STG_Dx_unAdj_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_Unadjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_Unadjusted_logFC: Watson2016`)[2],
Watson_STG_Dx_Adj_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_cellCompAdjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_cellCompAdjusted_logFC: Watson2016`)[1],
Watson_STG_Dx_Adj_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_cellCompAdjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_cellCompAdjusted_logFC: Watson2016`)[2]) %>% as.data.frame()
singleRegion_DMP_Replication_top100 = singleRegion_DMP_Replication_top100[ ,-grep("_N", colnames(singleRegion_DMP_Replication_top100))]
openxlsx::write.xlsx(list(FDR10_replication=singleRegion_DMP_Replication,top100_replication=singleRegion_DMP_Replication_top100), file='/dcl01/lieber/ajaffe/Steve/Alz/csvs/LIBD_Discovery_Replication_Results.xlsx')
## Load our statistics: ALL REGION
## Load our statistics
load('/dcl01/lieber/ajaffe/Steve/Alz/rdas/tidyStats_caseControl_DMC_allRegion.rda')
##
sigAllRegion_tidyStats = allRegion_tidyStats[ allRegion_tidyStats[,'adj.P.Val']<0.1, ]
allRegion_DMP_Replication=sigAllRegion_tidyStats%>%filter(Interaction=="mainEffect")%>%group_by(Sensitivity, Model) %>% summarise(
FDR_10=n() ,
Lunnon_Main_Dx_N= check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`ALL_main_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`ALL_main_logFC: Lunnon2014`)[1],
Lunnon_Main_Dx_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Lunnon_mergedStats$`ALL_main_Dx_P.Value: Lunnon2014`,
replication_effect_size=Lunnon_mergedStats$`ALL_main_Dx_logFC: Lunnon2014`)[2],
Watson_STG_Dx_unAdj_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_Unadjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_Unadjusted_logFC: Watson2016`)[1],
Watson_STG_Dx_unAdj_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_Unadjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_Unadjusted_logFC: Watson2016`)[2],
Watson_STG_Dx_Adj_N = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_cellCompAdjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_cellCompAdjusted_logFC: Watson2016`)[1],
Watson_STG_Dx_Adj_Percent = check_replication( discovery_names=Name,
discovery_effect_size=logFC,
replication_names=Name,
replication_p=Watson_mergedStats$`STG_Dx_cellCompAdjusted_P.Value: Watson2016`,
replication_effect_size=Watson_mergedStats$`STG_Dx_cellCompAdjusted_logFC: Watson2016`)[2]) %>% as.data.frame()