In [None]:
### twas signature

In [5]:
library(dplyr)
library(data.table)

In [7]:
# extract twas signature
extract_twas_signature <- function(trait_str, test_type) {    

    res_vec <- c("Liver.csv", "Liver_STARNET1.csv")
    twas_dir <- paste0("/ysm-gpfs/pi/zhao-data/zy92/update_utmost/", trait_str, "/", test_type, "/")
    output_df <- data.frame(gene = character(),
                            zscore = character(), 
                            effect_size = character(),
                            pvalue = double(),
                            pvalue_adj = double(),
                            tissue = character(),
                            stringsAsFactors = FALSE) 
    
    for (file_nam in res_vec) {
        cur_df <- as.data.frame(data.table::fread(paste0(twas_dir, file_nam))) 
        cur_df <- cur_df[complete.cases(cur_df),]
        cur_df <- cur_df %>%
            select(gene, zscore, effect_size, pvalue)
        cur_df$pvalue_adj <- p.adjust(cur_df$pvalue, method = "fdr", n = length(cur_df$pvalue))
        cur_df$tissue <- unlist(stringr::str_split(file_nam, "\\."))[1]
        
        output_df <- rbind(output_df, cur_df)
            
    }
    return(output_df)
}

In [1]:
list.files("/ysm-gpfs/pi/zhao-data/zy92/update_utmost/")

In [2]:
## main function

In [13]:
signature_list_twas <- list()

trait_vec <- c("BMI_GIANT_2015",
              "CAD",
              "Fatty_liver",
              "FC",
              "T2D")

for (trait_str in trait_vec) {
    test_type <- "single"
    signature_list_twas[[trait_str]] <- extract_twas_signature(trait_str, test_type) %>%
        arrange(pvalue_adj)
}

In [9]:
signature_list_twas[["FC"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
LIPC,-12.760447,-0.3921268,2.726337e-37,1.622988e-33,Liver_STARNET1
PSRC1,6.635514,0.5111294,3.233732e-11,9.625202e-08,Liver_STARNET1
PSRC1,-6.697075,-0.2970437,2.126322e-11,1.654278e-07,Liver
LIPC,-6.221472,-0.0119943,4.925108e-10,1.915867e-06,Liver
SORT1,6.075447,0.3889294,1.236431e-09,2.453492e-06,Liver_STARNET1
CELSR2,-5.925606,-1.7506088,3.111481e-09,8.069106e-06,Liver
CELSR2,5.802983,0.4866653,6.514532e-09,9.695252e-06,Liver_STARNET1
RP11-115J16.1,4.904522,0.1886279,9.365501e-07,1.115057e-03,Liver_STARNET1
PUM2,-4.918214,-3.4752338,8.733733e-07,1.698711e-03,Liver
NLRC5,-4.654694,-3.0163299,3.244629e-06,4.883212e-03,Liver


In [14]:
signature_list_twas[["Fatty_liver"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C2orf16,-4.458426,-0.67543882,8.256384e-06,0.04850626,Liver_STARNET1
NCF1C,3.905734,1.51229365,9.393980e-05,0.18840909,Liver_STARNET1
GTF2IRD2,3.899959,1.58501471,9.620890e-05,0.18840909,Liver_STARNET1
PMS2P5,-3.691793,-0.71500635,2.226786e-04,0.24795770,Liver_STARNET1
IRAK1BP1,3.662950,0.29575847,2.493276e-04,0.24795770,Liver_STARNET1
ZNF783,-3.658968,-0.63846101,2.532334e-04,0.24795770,Liver_STARNET1
LASP1,3.523096,9.72707656,4.265371e-04,0.35798646,Liver_STARNET1
NPW,4.039009,0.08434401,5.367750e-05,0.40547981,Liver
TMC4,-3.423291,-0.58818007,6.186789e-04,0.44686124,Liver_STARNET1
STAG3L2,-3.395692,-0.71270137,6.845534e-04,0.44686124,Liver_STARNET1


In [15]:
signature_list_twas[["CAD"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
CELSR2,-6.080397,-4.906468e-01,1.198854e-09,3.889522e-06,Liver_STARNET1
PSRC1,-6.039615,-4.659235e-01,1.544822e-09,3.889522e-06,Liver_STARNET1
SORT1,-6.001078,-3.927058e-01,1.960115e-09,3.889522e-06,Liver_STARNET1
CEP41,-5.980833,-6.123768e+02,2.220000e-09,8.090718e-06,Liver
CELSR2,5.931947,3.589004e+00,2.993642e-09,8.090718e-06,Liver
PSRC1,5.920755,3.487337e-01,3.204668e-09,8.090718e-06,Liver
CARF,5.486486,6.329079e-01,4.100084e-08,6.101950e-05,Liver_STARNET1
FAM109A,5.268340,3.642126e-01,1.376630e-07,2.606649e-04,Liver
PPP1CC,5.008477,2.458786e+00,5.486234e-07,8.310547e-04,Liver
RAI1,4.621070,2.820820e-01,3.817665e-06,4.545312e-03,Liver_STARNET1


In [16]:
signature_list_twas[["T2D"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
FRA10AC1,7.626288,4.2376205,2.416095e-14,1.828501e-10,Liver
TCF7L2,5.720536,1.6662900,1.061888e-08,4.018184e-05,Liver
CCDC88C,4.901529,0.5761672,9.509353e-07,3.536818e-03,Liver_STARNET1
SSR1,-4.857587,-0.5632075,1.188247e-06,3.536818e-03,Liver_STARNET1
HLA-DRB5,-4.387375,-0.1151142,1.147270e-05,1.901769e-02,Liver_STARNET1
HLA-DRA,4.363863,0.4865237,1.277856e-05,1.901769e-02,Liver_STARNET1
RFT1,-4.248127,-0.6006725,2.155655e-05,2.416756e-02,Liver_STARNET1
CYP21A2,4.220664,0.4015084,2.435837e-05,2.416756e-02,Liver_STARNET1
VPS13C,4.391237,1.8980788,1.127076e-05,2.843236e-02,Liver
FCHSD2,4.112799,0.3838096,3.908900e-05,3.250585e-02,Liver_STARNET1


In [17]:
signature_list_twas[["BMI_GIANT_2015"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C1QTNF4,-8.558122,-34.421578025,1.147205e-17,8.700403e-14,Liver
SH2B1,-8.444623,-0.477798745,3.050285e-17,1.156668e-13,Liver
CENPO,-8.195914,-0.633572522,2.486965e-16,6.287047e-13,Liver
SH2B1,-7.853227,-0.168106459,4.054672e-15,2.315707e-11,Liver_STARNET1
TUFM,-7.742102,-0.492387787,9.778635e-15,2.315707e-11,Liver_STARNET1
EIF3C,7.719595,0.111391350,1.166995e-14,2.315707e-11,Liver_STARNET1
NFATC2IP,7.528287,0.110901132,5.141027e-14,7.651134e-11,Liver_STARNET1
CLN3,7.309086,2.138821340,2.689661e-13,5.099597e-10,Liver
ULK3,-6.953543,-0.118658941,3.562239e-12,4.241201e-09,Liver_STARNET1
PSMC3,6.784423,0.148463886,1.165516e-11,1.156386e-08,Liver_STARNET1


In [18]:
significant_signatures <- function(signature_df, cutoff) {
    return(signature_df %>% 
          filter(pvalue_adj < cutoff))
}

In [19]:
names(signature_list_twas)

In [21]:
dim(significant_signatures(signature_list_twas[["BMI_GIANT_2015"]], 0.05))

In [26]:
length(unique(significant_signatures(signature_list_twas[["BMI_GIANT_2015"]], 0.05)$gene))

In [27]:
signature_list_twas_significant <- lapply(signature_list_twas, function(x) significant_signatures(x, 0.05))

In [28]:
signature_list_twas_significant

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C1QTNF4,-8.558122,-34.421578025,1.147205e-17,8.700403e-14,Liver
SH2B1,-8.444623,-0.477798745,3.050285e-17,1.156668e-13,Liver
CENPO,-8.195914,-0.633572522,2.486965e-16,6.287047e-13,Liver
SH2B1,-7.853227,-0.168106459,4.054672e-15,2.315707e-11,Liver_STARNET1
TUFM,-7.742102,-0.492387787,9.778635e-15,2.315707e-11,Liver_STARNET1
EIF3C,7.719595,0.111391350,1.166995e-14,2.315707e-11,Liver_STARNET1
NFATC2IP,7.528287,0.110901132,5.141027e-14,7.651134e-11,Liver_STARNET1
CLN3,7.309086,2.138821340,2.689661e-13,5.099597e-10,Liver
ULK3,-6.953543,-0.118658941,3.562239e-12,4.241201e-09,Liver_STARNET1
PSMC3,6.784423,0.148463886,1.165516e-11,1.156386e-08,Liver_STARNET1

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
CELSR2,-6.080397,-0.4906468,1.198854e-09,3.889522e-06,Liver_STARNET1
PSRC1,-6.039615,-0.4659235,1.544822e-09,3.889522e-06,Liver_STARNET1
SORT1,-6.001078,-0.3927058,1.960115e-09,3.889522e-06,Liver_STARNET1
CEP41,-5.980833,-612.3768,2.22e-09,8.090718e-06,Liver
CELSR2,5.931947,3.589004,2.993642e-09,8.090718e-06,Liver
PSRC1,5.920755,0.3487337,3.204668e-09,8.090718e-06,Liver
CARF,5.486486,0.6329079,4.100084e-08,6.10195e-05,Liver_STARNET1
FAM109A,5.26834,0.3642126,1.37663e-07,0.0002606649,Liver
PPP1CC,5.008477,2.458786,5.486234e-07,0.0008310547,Liver
RAI1,4.62107,0.282082,3.817665e-06,0.004545312,Liver_STARNET1

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C2orf16,-4.458426,-0.6754388,8.256384e-06,0.04850626,Liver_STARNET1

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
LIPC,-12.760447,-0.3921268,2.7263369999999998e-37,1.622988e-33,Liver_STARNET1
PSRC1,6.635514,0.5111294,3.233732e-11,9.625202e-08,Liver_STARNET1
PSRC1,-6.697075,-0.2970437,2.126322e-11,1.654278e-07,Liver
LIPC,-6.221472,-0.0119943,4.925108e-10,1.915867e-06,Liver
SORT1,6.075447,0.3889294,1.236431e-09,2.453492e-06,Liver_STARNET1
CELSR2,-5.925606,-1.7506088,3.111481e-09,8.069106e-06,Liver
CELSR2,5.802983,0.4866653,6.514532e-09,9.695252e-06,Liver_STARNET1
RP11-115J16.1,4.904522,0.1886279,9.365501e-07,0.001115057,Liver_STARNET1
PUM2,-4.918214,-3.4752338,8.733733e-07,0.001698711,Liver
NLRC5,-4.654694,-3.0163299,3.244629e-06,0.004883212,Liver

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
FRA10AC1,7.626288,4.2376205,2.416095e-14,1.828501e-10,Liver
TCF7L2,5.720536,1.66629,1.061888e-08,4.018184e-05,Liver
CCDC88C,4.901529,0.5761672,9.509353e-07,0.003536818,Liver_STARNET1
SSR1,-4.857587,-0.5632075,1.188247e-06,0.003536818,Liver_STARNET1
HLA-DRB5,-4.387375,-0.1151142,1.14727e-05,0.01901769,Liver_STARNET1
HLA-DRA,4.363863,0.4865237,1.277856e-05,0.01901769,Liver_STARNET1
RFT1,-4.248127,-0.6006725,2.155655e-05,0.02416756,Liver_STARNET1
CYP21A2,4.220664,0.4015084,2.435837e-05,0.02416756,Liver_STARNET1
VPS13C,4.391237,1.8980788,1.127076e-05,0.02843236,Liver
FCHSD2,4.112799,0.3838096,3.9089e-05,0.03250585,Liver_STARNET1


In [29]:
save(signature_list_twas_significant, file = "signature_list_twas_significant.RData")

In [57]:
get_duplicated_idx <- function(nums) {
        if (length(nums) == 1) {return(NULL)}
        nums = sort(nums)
        slow = 1
        duplicated_idx = c()
        for (fast in 2:length(nums)){
            if (nums[fast] != nums[slow]) {
                slow = slow + 1
                nums[slow] = nums[fast]
                }  else {
            duplicated_idx <- c(duplicated_idx, fast)
            }
        }
        #return(slow + 1)
        return(duplicated_idx)
}

In [58]:
get_unique_signature <- function(signature_df) {
    signature_df <- signature_df %>% 
        arrange(gene, pvalue_adj)
    gene_vec <- signature_df$gene
    duplicated_idx <- get_duplicated_idx(gene_vec)
    # there is no duplicate in the vector
    if (length(duplicated_idx) == 0) {
            signature_df <- signature_df %>% 
        arrange(pvalue_adj)
    } else {
        signature_df <- signature_df[-duplicated_idx,] %>% 
            arrange(pvalue_adj)
    }
    return(signature_df)
}

In [46]:
dim(get_unique_signature(signature_list_twas_significant[["BMI_GIANT_2015"]]))

In [40]:
get_duplicated_idx(c(1,4,2,3,4,1,5))

In [41]:
get_duplicated_idx(c("a", "b", "c", "c", "d"))

### Get the unique signatures

In [59]:
signature_list_twas_significant_unique <- list()
for (trait_str in trait_vec) {
    signature_list_twas_significant_unique[[trait_str]] <- get_unique_signature(signature_list_twas_significant[[trait_str]])
}


In [61]:
signature_list_twas_significant_unique[["BMI_GIANT_2015"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C1QTNF4,-8.558122,-34.421578025,1.147205e-17,8.700403e-14,Liver
SH2B1,-8.444623,-0.477798745,3.050285e-17,1.156668e-13,Liver
CENPO,-8.195914,-0.633572522,2.486965e-16,6.287047e-13,Liver
EIF3C,7.719595,0.111391350,1.166995e-14,2.315707e-11,Liver_STARNET1
TUFM,-7.742102,-0.492387787,9.778635e-15,2.315707e-11,Liver_STARNET1
NFATC2IP,7.528287,0.110901132,5.141027e-14,7.651134e-11,Liver_STARNET1
CLN3,7.309086,2.138821340,2.689661e-13,5.099597e-10,Liver
ULK3,-6.953543,-0.118658941,3.562239e-12,4.241201e-09,Liver_STARNET1
PSMC3,6.784423,0.148463886,1.165516e-11,1.156386e-08,Liver_STARNET1
WBSCR16,6.363272,14.030629445,1.975002e-10,2.496403e-07,Liver


In [62]:
signature_list_twas_significant_unique[["Fatty_liver"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C2orf16,-4.458426,-0.6754388,8.256384e-06,0.04850626,Liver_STARNET1


In [68]:
getwd()

In [69]:
save(signature_list_twas_significant_unique, file = "../../signature_list_twas_significant_unique.RData")

In [63]:
signature_list_twas_significant_unique[["FC"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
LIPC,-12.760447,-0.3921268,2.7263369999999998e-37,1.622988e-33,Liver_STARNET1
PSRC1,6.635514,0.5111294,3.233732e-11,9.625202e-08,Liver_STARNET1
SORT1,6.075447,0.3889294,1.236431e-09,2.453492e-06,Liver_STARNET1
CELSR2,-5.925606,-1.7506088,3.111481e-09,8.069106e-06,Liver
RP11-115J16.1,4.904522,0.1886279,9.365501e-07,0.001115057,Liver_STARNET1
PUM2,-4.918214,-3.4752338,8.733733e-07,0.001698711,Liver
CETP,-4.623897,-0.2630017,3.765973e-06,0.004883212,Liver
NLRC5,-4.654694,-3.0163299,3.244629e-06,0.004883212,Liver
DARS,-4.339186,-0.6944768,1.430112e-05,0.01253211,Liver_STARNET1
FCGR2B,4.251649,0.1527716,2.122026e-05,0.01579053,Liver_STARNET1


In [65]:
signature_list_twas_significant_unique[["T2D"]]

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
FRA10AC1,7.626288,4.2376205,2.416095e-14,1.828501e-10,Liver
TCF7L2,5.720536,1.66629,1.061888e-08,4.018184e-05,Liver
CCDC88C,4.901529,0.5761672,9.509353e-07,0.003536818,Liver_STARNET1
SSR1,-4.857587,-0.5632075,1.188247e-06,0.003536818,Liver_STARNET1
HLA-DRA,4.363863,0.4865237,1.277856e-05,0.01901769,Liver_STARNET1
HLA-DRB5,-4.387375,-0.1151142,1.14727e-05,0.01901769,Liver_STARNET1
CYP21A2,4.220664,0.4015084,2.435837e-05,0.02416756,Liver_STARNET1
RFT1,-4.248127,-0.6006725,2.155655e-05,0.02416756,Liver_STARNET1
VPS13C,4.391237,1.8980788,1.127076e-05,0.02843236,Liver
FCHSD2,4.112799,0.3838096,3.9089e-05,0.03250585,Liver_STARNET1


In [None]:
split_signature <- function(signat)

In [67]:
getwd()

In [66]:
signature_list_twas

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C1QTNF4,-8.558122,-34.421578025,1.147205e-17,8.700403e-14,Liver
SH2B1,-8.444623,-0.477798745,3.050285e-17,1.156668e-13,Liver
CENPO,-8.195914,-0.633572522,2.486965e-16,6.287047e-13,Liver
SH2B1,-7.853227,-0.168106459,4.054672e-15,2.315707e-11,Liver_STARNET1
TUFM,-7.742102,-0.492387787,9.778635e-15,2.315707e-11,Liver_STARNET1
EIF3C,7.719595,0.111391350,1.166995e-14,2.315707e-11,Liver_STARNET1
NFATC2IP,7.528287,0.110901132,5.141027e-14,7.651134e-11,Liver_STARNET1
CLN3,7.309086,2.138821340,2.689661e-13,5.099597e-10,Liver
ULK3,-6.953543,-0.118658941,3.562239e-12,4.241201e-09,Liver_STARNET1
PSMC3,6.784423,0.148463886,1.165516e-11,1.156386e-08,Liver_STARNET1

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
CELSR2,-6.080397,-4.906468e-01,1.198854e-09,3.889522e-06,Liver_STARNET1
PSRC1,-6.039615,-4.659235e-01,1.544822e-09,3.889522e-06,Liver_STARNET1
SORT1,-6.001078,-3.927058e-01,1.960115e-09,3.889522e-06,Liver_STARNET1
CEP41,-5.980833,-6.123768e+02,2.220000e-09,8.090718e-06,Liver
CELSR2,5.931947,3.589004e+00,2.993642e-09,8.090718e-06,Liver
PSRC1,5.920755,3.487337e-01,3.204668e-09,8.090718e-06,Liver
CARF,5.486486,6.329079e-01,4.100084e-08,6.101950e-05,Liver_STARNET1
FAM109A,5.268340,3.642126e-01,1.376630e-07,2.606649e-04,Liver
PPP1CC,5.008477,2.458786e+00,5.486234e-07,8.310547e-04,Liver
RAI1,4.621070,2.820820e-01,3.817665e-06,4.545312e-03,Liver_STARNET1

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
C2orf16,-4.458426,-0.67543882,8.256384e-06,0.04850626,Liver_STARNET1
NCF1C,3.905734,1.51229365,9.393980e-05,0.18840909,Liver_STARNET1
GTF2IRD2,3.899959,1.58501471,9.620890e-05,0.18840909,Liver_STARNET1
PMS2P5,-3.691793,-0.71500635,2.226786e-04,0.24795770,Liver_STARNET1
IRAK1BP1,3.662950,0.29575847,2.493276e-04,0.24795770,Liver_STARNET1
ZNF783,-3.658968,-0.63846101,2.532334e-04,0.24795770,Liver_STARNET1
LASP1,3.523096,9.72707656,4.265371e-04,0.35798646,Liver_STARNET1
NPW,4.039009,0.08434401,5.367750e-05,0.40547981,Liver
TMC4,-3.423291,-0.58818007,6.186789e-04,0.44686124,Liver_STARNET1
STAG3L2,-3.395692,-0.71270137,6.845534e-04,0.44686124,Liver_STARNET1

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
LIPC,-12.760447,-0.3921268,2.726337e-37,1.622988e-33,Liver_STARNET1
PSRC1,6.635514,0.5111294,3.233732e-11,9.625202e-08,Liver_STARNET1
PSRC1,-6.697075,-0.2970437,2.126322e-11,1.654278e-07,Liver
LIPC,-6.221472,-0.0119943,4.925108e-10,1.915867e-06,Liver
SORT1,6.075447,0.3889294,1.236431e-09,2.453492e-06,Liver_STARNET1
CELSR2,-5.925606,-1.7506088,3.111481e-09,8.069106e-06,Liver
CELSR2,5.802983,0.4866653,6.514532e-09,9.695252e-06,Liver_STARNET1
RP11-115J16.1,4.904522,0.1886279,9.365501e-07,1.115057e-03,Liver_STARNET1
PUM2,-4.918214,-3.4752338,8.733733e-07,1.698711e-03,Liver
NLRC5,-4.654694,-3.0163299,3.244629e-06,4.883212e-03,Liver

gene,zscore,effect_size,pvalue,pvalue_adj,tissue
FRA10AC1,7.626288,4.2376205,2.416095e-14,1.828501e-10,Liver
TCF7L2,5.720536,1.6662900,1.061888e-08,4.018184e-05,Liver
CCDC88C,4.901529,0.5761672,9.509353e-07,3.536818e-03,Liver_STARNET1
SSR1,-4.857587,-0.5632075,1.188247e-06,3.536818e-03,Liver_STARNET1
HLA-DRB5,-4.387375,-0.1151142,1.147270e-05,1.901769e-02,Liver_STARNET1
HLA-DRA,4.363863,0.4865237,1.277856e-05,1.901769e-02,Liver_STARNET1
RFT1,-4.248127,-0.6006725,2.155655e-05,2.416756e-02,Liver_STARNET1
CYP21A2,4.220664,0.4015084,2.435837e-05,2.416756e-02,Liver_STARNET1
VPS13C,4.391237,1.8980788,1.127076e-05,2.843236e-02,Liver
FCHSD2,4.112799,0.3838096,3.908900e-05,3.250585e-02,Liver_STARNET1
