forked from rkoeck/UCB_methylome
-
Notifications
You must be signed in to change notification settings - Fork 0
/
02.2_SexPredictionsEst.R
65 lines (39 loc) · 2.23 KB
/
02.2_SexPredictionsEst.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
###########################################################################################################################
# Author: Rebekka Koeck
# Lab: Cellular Genomic Medicine, Clinical Genetics, Maastricht University (Medical Centre +)
# script purpose: sex prediction the sEst package & visualisation of the results
# input: detection p-values (.csv) and raw beta values (.csv) generated in the script 02preprocessingSest.R
# each cohort is run separately as described in 02preprocessingSest.R
# output: updated annotation file containing pedicted sex columns (.csv)
# scatter plot of the results
###########################################################################################################################
# load packages
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(sest))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
# load the data
betas = fread("rawBetaValues.csv")
pvals = fread("detPval.csv")
ann = fread("annotation.csv")
# correctly format the betas and pvals for the sest tool
betas = betas %>% column_to_rownames("V1") %>% as.matrix()
pvals = pvals %>% column_to_rownames("V1") %>% as.matrix()
# conduct sex estimation
sex = estimateSex(beta.value = betas, detecP = pvals)
# plot the results
prediction = sex$test %>% as.data.frame() %>% rownames_to_column("Basename")
ann = full_join(ann, prediction, by = "Basename")
plot = ann %>% ggplot(aes(x = X.PC1, y = Y.PC1, colour = predicted, shape = gender)) +
geom_point() +
theme_classic() +
scale_shape_manual(values = c("F" = 16, "M" = 17), labels = c("Female", "Male")) +
scale_color_manual(values = c("F" = "plum2", "M" = "skyblue2", "N" = "grey"), labels = c("Female", "Male", "Not specified")) +
labs(shape = "Recorded sex", colour = "Predicted Sex")
# compare the annotated gender to the predicted gender
ann = sex$test %>% rownames_to_column("Basename") %>%
select(Basename, predicted.X, predicted.Y, predicted) %>%
full_join(ann, ., by = "Basename")
mismatch = ann %>% filter(gender != predicted)
# samples with a mismatched sex prediction and recorded sex are removed from downstream analyses