# pQTL panel

In [1]:
library(readr)
library(dplyr)
library(tidyr)
library(purrr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
projfold <- "/Users/da1078co/Documents/Lund/PhD/Projects/BN"

## Olink protein map

Note: gene start and end coordinates are in hg38.

In [3]:
olinkmap <- read_tsv(
  file.path(projfold, "data", "olink_protein_map_3k_v1.tsv"),
  show_col_types = FALSE
)
head(olinkmap)

UKBPPP_ProteinID,olink_target_fullname,OlinkID,UniProt,Assay,Panel,Panel_Lot_Nr,UniProt2,HGNC.symbol,ensembl_id,chr,gene_start,gene_end,Strand,Dilution_factor,block,expansion
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
AARSD1:Q9BTE6:OID21311:v1,Alanyl-tRNA editing protein Aarsd1,OID21311,Q9BTE6,AARSD1,Oncology,B04412,Q9BTE6,AARSD1,ENSG00000266967,17,42950526,42964498,-1,1:1,B,no
ABHD14B:Q96IU4:OID20921:v1,Protein ABHD14B,OID20921,Q96IU4,ABHD14B,Neurology,B04414,Q96IU4,ABHD14B,ENSG00000114779,3,51968510,51983409,-1,1:1,B,no
ABL1:P00519:OID21280:v1,Tyrosine-protein kinase ABL1,OID21280,P00519,ABL1,Oncology,B04412,P00519,ABL1,ENSG00000097007,9,130713016,130887675,1,1:1,B,no
ACAA1:P09110:OID21269:v1,"3-ketoacyl-CoA thiolase, peroxisomal",OID21269,P09110,ACAA1,Oncology,B04412,P09110,ACAA1,ENSG00000060971,3,38103129,38137242,-1,1:1,A,no
ACAN:P16112:OID20159:v1,Aggrecan core protein,OID20159,P16112,ACAN,Cardiometabolic,B04413,P16112,ACAN,ENSG00000157766,15,88803436,88875353,1,1:10,B,no
ACE2:Q9BYF1:OID20105:v1,Angiotensin-converting enzyme 2,OID20105,Q9BYF1,ACE2,Cardiometabolic,B04413,Q9BYF1,ACE2,ENSG00000130234,X,15494566,15607236,-1,1:1,A,no


## UKB-PPP file map

In [4]:
olinkfilemap <- read_tsv(
  file.path(projfold, "data", "olinkfilemapUKB.tsv"),
  show_col_types = FALSE
)
head(olinkfilemap)

name,id
<chr>,<chr>
A1BG_P04217_OID30771_v1_Inflammation_II.tar,syn52363617
AAMDC_Q9H7C9_OID30236_v1_Cardiometabolic_II.tar,syn52362400
AARSD1_Q9BTE6_OID21311_v1_Oncology.tar,syn51471562
ABCA2_Q9BZC7_OID30146_v1_Cardiometabolic_II.tar,syn52361344
ABHD14B_Q96IU4_OID20921_v1_Neurology.tar,syn51470065
ABL1_P00519_OID21280_v1_Oncology.tar,syn51468700


Extractin Olink ID

In [5]:
olinkfilemap <- olinkfilemap %>%
  mutate(OlinkID = gsub(".*_(OID[^_]+)_.*", "\\1", name))
head(olinkfilemap)

name,id,OlinkID
<chr>,<chr>,<chr>
A1BG_P04217_OID30771_v1_Inflammation_II.tar,syn52363617,OID30771
AAMDC_Q9H7C9_OID30236_v1_Cardiometabolic_II.tar,syn52362400,OID30236
AARSD1_Q9BTE6_OID21311_v1_Oncology.tar,syn51471562,OID21311
ABCA2_Q9BZC7_OID30146_v1_Cardiometabolic_II.tar,syn52361344,OID30146
ABHD14B_Q96IU4_OID20921_v1_Neurology.tar,syn51470065,OID20921
ABL1_P00519_OID21280_v1_Oncology.tar,syn51468700,OID21280


## Edge table

In [7]:
edge_tbf <- read_tsv(
  file.path(projfold, "data", "edge_table_totest.tsv"),
  show_col_types = FALSE
)
head(edge_tbf)

node1,node2,atype,CHROM,start,end
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
AGRP,CD4,SMRHEIDI,16.0,67516474.0,67517450.0
AGRP,Glucose,SMRHEIDI,16.0,67516474.0,67517450.0
AGRP,TG,SMRHEIDI,16.0,67516474.0,67517450.0
APOM,HDL,SMRHEIDI,6.0,31620193.0,31625987.0
APOM,TG,SMRHEIDI,6.0,31620193.0,31625987.0
BasalISR,GlucoseSens,IVWMR,,,


## Query table

In [17]:
protquery_ukbppp <- edge_tbf |>
  filter(atype == "SMRHEIDI") |>
  select(
    HGNC.symbol = node1,
    start_hg19 = start, end_hg19 = end
  ) |>
  unique() |>
  inner_join(olinkmap, by = "HGNC.symbol") |>
  inner_join(olinkfilemap, by = c("OlinkID"))
head(protquery_ukbppp)

HGNC.symbol,start_hg19,end_hg19,UKBPPP_ProteinID,olink_target_fullname,OlinkID,UniProt,Assay,Panel,Panel_Lot_Nr,⋯,ensembl_id,chr,gene_start,gene_end,Strand,Dilution_factor,block,expansion,name,id
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>
AGRP,67516474,67517450,AGRP:O00253:OID20658:v1,Agouti-related protein,OID20658,O00253,AGRP,Inflammation,B04411,⋯,ENSG00000159723,16,67482571,67483547,-1,1:1,C,no,AGRP_O00253_OID20658_v1_Inflammation.tar,syn51471023
APOM,31620193,31625987,APOM:O95445:OID20374:v1,Apolipoprotein M,OID20374,O95445,APOM,Cardiometabolic,B04413,⋯,ENSG00000204444,6,31652416,31658210,1,1:1000,D,no,APOM_O95445_OID20374_v1_Cardiometabolic.tar,syn51471413
CD4,6896024,6929965,CD4:P01730:OID20584:v1,T-cell surface glycoprotein CD4,OID20584,P01730,CD4,Inflammation,B04411,⋯,ENSG00000010610,12,6786858,6820799,1,1:1,B,no,CD4_P01730_OID20584_v1_Inflammation.tar,syn51470975
CDH5,66400593,66438687,CDH5:P33151:OID20243:v1,Cadherin-5,OID20243,P33151,CDH5,Cardiometabolic,B04413,⋯,ENSG00000179776,16,66366622,66404784,1,1:100,C,no,CDH5_P33151_OID20243_v1_Cardiometabolic.tar,syn51469161
CTRC,15764938,15775737,CTRC:Q99895:OID20752:v1,Chymotrypsin-C,OID20752,Q99895,CTRC,Inflammation,B04411,⋯,ENSG00000162438,1,15438442,15449242,1,1:10,D,no,CTRC_Q99895_OID20752_v1_Inflammation.tar,syn51471283
CTSD,1773982,1785803,CTSD:P07339:OID20358:v1,Cathepsin D,OID20358,P07339,CTSD,Cardiometabolic,B04413,⋯,ENSG00000117984,11,1752752,1764573,-1,1:1000,D,no,CTSD_P07339_OID20358_v1_Cardiometabolic.tar,syn51469587


In [18]:
unique(protquery_ukbppp$HGNC.symbol)

## Saving query table

In [19]:
write_tsv(
  protquery_ukbppp,
  file.path(projfold, "data", "protquery_ukbppp.tsv")
)