# 2. Exploration

In [1]:
try(library(tidyverse), silent=TRUE)
library(lubridate)
library(glue)
library(cowplot)
library(survminer)
library(survival)
library(ggsci)
library(arsenal)
library(yaml)

“package ‘tidyverse’ was built under R version 4.0.3”
── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.6     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.4     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

“package ‘ggplot2’ was built under R version 4.0.5”
“package ‘readr’ was built under R version 4.0.5”
“package ‘purrr’ was built under R version 4.0.3”
“package ‘dplyr’ was built under R version 4.0.5”
“package ‘stringr’ was built under R version 4.0.5”
“package ‘forcats’ was built under R version 4.0.3”
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag

In [2]:
if (grepl("sc", Sys.info()[["nodename"]], fixed=TRUE)) {
    base_path = "/sc-projects/sc-proj-ukb-cvd"
} else {
    base_path = "/data/analysis/ag-reils/ag-reils-shared/cardioRS"}
print(base_path)

dataset_name = "210714_metabolomics"
path = "/data/analysis/ag-reils/steinfej/code/umbrella/pre/ukbb"
data_path = glue("{base_path}/data")
dataset_path = glue("{data_path}/3_datasets_post/{dataset_name}")

project_label="21_metabolomics_multitask"
project_path = glue("{base_path}/results/projects/{project_label}")
figures_path = glue("{project_path}/figures")
data_results_path = glue("{project_path}/data")

figure_path = glue("{figures_path}/Figure1")

[1] "/sc-projects/sc-proj-ukb-cvd"


## Load data

In [3]:
data = arrow::read_feather(glue("{dataset_path}/data_merged.feather")) %>% filter(NMR_FLAG == TRUE)
data_description = arrow::read_feather(glue("{dataset_path}/description.feather"))

In [4]:
eids_withdraws = c("XXX")

In [5]:
data = data %>% filter(!eid %in% eids_withdraws) %>% mutate(erectile_dysfunction = case_when(sex=="Female" ~ FALSE, TRUE ~ erectile_dysfunction))
data

eid,age_at_recruitment,sex,ethnic_background,townsend_deprivation_index_at_recruitment,date_of_attending_assessment_centre,uk_biobank_assessment_centre,birth_date,overall_health_rating,smoking_status,⋯,death_cvd_comp_event,death_cvd_comp_event_time,SCORE_comp_event,SCORE_comp_event_time,ASCVD_comp_event,ASCVD_comp_event_time,QRISK3_comp_event,QRISK3_comp_event_time,MACE_comp_event,MACE_comp_event_time
<int>,<dbl>,<fct>,<fct>,<dbl>,<date>,<chr>,<date>,<ord>,<ord>,⋯,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>
1000084,43,Male,White,7.566100,2007-10-18,Sheffield,1964-10-18,Fair,Never,⋯,0,13.42916,0,13.42916,0,13.429158,0,13.429158,0,13.429158
1000107,57,Female,White,-5.399390,2008-08-19,Sheffield,1951-08-19,Excellent,Never,⋯,0,12.59138,0,12.59138,0,12.591376,0,12.591376,0,12.591376
1000128,50,Female,White,2.531930,2010-02-11,Sheffield,1960-02-11,Good,Previous,⋯,0,11.11020,0,11.11020,0,11.110198,0,11.110198,0,11.110198
1000135,46,Female,White,-3.258670,2009-04-03,Sheffield,1963-04-03,Excellent,Never,⋯,0,11.96988,0,11.96988,0,11.969884,0,11.969884,0,11.969884
1000161,42,Male,White,2.551810,2008-10-11,Sheffield,1966-10-11,Fair,Current,⋯,0,12.44627,0,12.44627,0,12.446270,0,12.446270,0,12.446270
1000233,65,Female,White,-5.824130,2008-08-05,Sheffield,1943-08-05,Fair,Never,⋯,0,12.62971,0,12.62971,1,3.671458,1,3.671458,1,3.671458
1000257,50,Male,White,-2.758960,2009-01-10,Sheffield,1959-01-10,Excellent,Never,⋯,0,12.19713,0,12.19713,0,12.197125,0,12.197125,0,12.197125
1000409,55,Female,White,-4.607670,2008-12-12,Sheffield,1953-12-12,Good,Never,⋯,0,12.27652,0,12.27652,0,12.276523,0,12.276523,0,12.276523
1000438,47,Female,White,-2.936650,2009-04-18,Sheffield,1962-04-18,Good,Never,⋯,0,11.92882,0,11.92882,0,11.928816,0,11.928816,0,11.928816
1000532,60,Male,White,-0.158504,2009-01-31,Sheffield,1949-01-31,Fair,Never,⋯,0,12.13963,0,12.13963,0,12.139630,0,12.139630,0,12.139630


In [7]:
covariates = (data_description %>% filter(isTarget==FALSE) %>% filter(based_on!="PGS"))$covariate[-1]
targets = (data_description %>% filter(isTarget==TRUE))$covariate[-1]
pgs = (data_description %>% filter(isTarget==FALSE) %>% filter(based_on=="PGS") %>% filter(!dtype=="Date"))$covariate

In [8]:
data = data %>% mutate_at(c("sex", "overall_health_rating", "smoking_status", "ethnic_background"), as.factor)
data = data %>% mutate(sex=fct_relevel(sex, c("Male", "Female")),
                       overall_health_rating=fct_relevel(overall_health_rating, c("Excellent", "Good", "Fair", "Poor")),
                       smoking_status=fct_relevel(smoking_status, c("Current", "Previous", "Never")))

In [9]:
f = list()
f$basics = c('age_at_recruitment','sex', 'ethnic_background',"townsend_deprivation_index_at_recruitment")
f$questionnaire = c('overall_health_rating','smoking_status')
f$measurements = c('body_mass_index_bmi','weight',"standing_height",'systolic_blood_pressure','diastolic_blood_pressure')
f$labs = c("cholesterol", "hdl_cholesterol", "ldl_direct","triglycerides")
f$family_history = c('fh_heart_disease')
f$diagnoses = c("diabetes1", "diabetes2", "chronic_kidney_disease", "atrial_fibrillation", "migraine", 
                      "rheumatoid_arthritis", "systemic_lupus_erythematosus", "severe_mental_illness", "erectile_dysfunction")
f$medications = c("antihypertensives", "ass", "atypical_antipsychotics", "glucocorticoids")
f$metabolomics = c(
'NMR_3hydroxybutyrate',
 'NMR_acetate',
 'NMR_acetoacetate',
 'NMR_acetone',
 'NMR_alanine',
 'NMR_albumin',
 'NMR_apolipoprotein_a1',
 'NMR_apolipoprotein_b',
 'NMR_average_diameter_for_hdl_particles',
 'NMR_average_diameter_for_ldl_particles',
 'NMR_average_diameter_for_vldl_particles',
 'NMR_cholesterol_in_chylomicrons_and_extremely_large_vldl',
 'NMR_cholesterol_in_idl',
 'NMR_cholesterol_in_large_hdl',
 'NMR_cholesterol_in_large_ldl',
 'NMR_cholesterol_in_large_vldl',
 'NMR_cholesterol_in_medium_hdl',
 'NMR_cholesterol_in_medium_ldl',
 'NMR_cholesterol_in_medium_vldl',
 'NMR_cholesterol_in_small_hdl',
 'NMR_cholesterol_in_small_ldl',
 'NMR_cholesterol_in_small_vldl',
 'NMR_cholesterol_in_very_large_hdl',
 'NMR_cholesterol_in_very_large_vldl',
 'NMR_cholesterol_in_very_small_vldl',
 'NMR_cholesteryl_esters_in_chylomicrons_and_extremely_large_vldl',
 'NMR_cholesteryl_esters_in_hdl',
 'NMR_cholesteryl_esters_in_idl',
 'NMR_cholesteryl_esters_in_ldl',
 'NMR_cholesteryl_esters_in_large_hdl',
 'NMR_cholesteryl_esters_in_large_ldl',
 'NMR_cholesteryl_esters_in_large_vldl',
 'NMR_cholesteryl_esters_in_medium_hdl',
 'NMR_cholesteryl_esters_in_medium_ldl',
 'NMR_cholesteryl_esters_in_medium_vldl',
 'NMR_cholesteryl_esters_in_small_hdl',
 'NMR_cholesteryl_esters_in_small_ldl',
 'NMR_cholesteryl_esters_in_small_vldl',
 'NMR_cholesteryl_esters_in_vldl',
 'NMR_cholesteryl_esters_in_very_large_hdl',
 'NMR_cholesteryl_esters_in_very_large_vldl',
 'NMR_cholesteryl_esters_in_very_small_vldl',
 'NMR_citrate',
 'NMR_clinical_ldl_cholesterol',
 'NMR_concentration_of_chylomicrons_and_extremely_large_vldl_particles',
 'NMR_concentration_of_hdl_particles',
 'NMR_concentration_of_idl_particles',
 'NMR_concentration_of_ldl_particles',
 'NMR_concentration_of_large_hdl_particles',
 'NMR_concentration_of_large_ldl_particles',
 'NMR_concentration_of_large_vldl_particles',
 'NMR_concentration_of_medium_hdl_particles',
 'NMR_concentration_of_medium_ldl_particles',
 'NMR_concentration_of_medium_vldl_particles',
 'NMR_concentration_of_small_hdl_particles',
 'NMR_concentration_of_small_ldl_particles',
 'NMR_concentration_of_small_vldl_particles',
 'NMR_concentration_of_vldl_particles',
 'NMR_concentration_of_very_large_hdl_particles',
 'NMR_concentration_of_very_large_vldl_particles',
 'NMR_concentration_of_very_small_vldl_particles',
 'NMR_creatinine',
 'NMR_degree_of_unsaturation',
 'NMR_docosahexaenoic_acid',
 'NMR_free_cholesterol_in_chylomicrons_and_extremely_large_vldl',
 'NMR_free_cholesterol_in_hdl',
 'NMR_free_cholesterol_in_idl',
 'NMR_free_cholesterol_in_ldl',
 'NMR_free_cholesterol_in_large_hdl',
 'NMR_free_cholesterol_in_large_ldl',
 'NMR_free_cholesterol_in_large_vldl',
 'NMR_free_cholesterol_in_medium_hdl',
 'NMR_free_cholesterol_in_medium_ldl',
 'NMR_free_cholesterol_in_medium_vldl',
 'NMR_free_cholesterol_in_small_hdl',
 'NMR_free_cholesterol_in_small_ldl',
 'NMR_free_cholesterol_in_small_vldl',
 'NMR_free_cholesterol_in_vldl',
 'NMR_free_cholesterol_in_very_large_hdl',
 'NMR_free_cholesterol_in_very_large_vldl',
 'NMR_free_cholesterol_in_very_small_vldl',
 'NMR_glucose',
 'NMR_glutamine',
 'NMR_glycine',
 'NMR_glycoprotein_acetyls',
 'NMR_hdl_cholesterol',
 'NMR_histidine',
 'NMR_isoleucine',
 'NMR_ldl_cholesterol',
 'NMR_lactate',
 'NMR_leucine',
 'NMR_linoleic_acid',
 'NMR_monounsaturated_fatty_acids',
 'NMR_omega3_fatty_acids',
 'NMR_omega6_fatty_acids',
 'NMR_phenylalanine',
 'NMR_phosphatidylcholines',
 'NMR_phosphoglycerides',
 'NMR_phospholipids_in_chylomicrons_and_extremely_large_vldl',
 'NMR_phospholipids_in_hdl',
 'NMR_phospholipids_in_idl',
 'NMR_phospholipids_in_ldl',
 'NMR_phospholipids_in_large_hdl',
 'NMR_phospholipids_in_large_ldl',
 'NMR_phospholipids_in_large_vldl',
 'NMR_phospholipids_in_medium_hdl',
 'NMR_phospholipids_in_medium_ldl',
 'NMR_phospholipids_in_medium_vldl',
 'NMR_phospholipids_in_small_hdl',
 'NMR_phospholipids_in_small_ldl',
 'NMR_phospholipids_in_small_vldl',
 'NMR_phospholipids_in_vldl',
 'NMR_phospholipids_in_very_large_hdl',
 'NMR_phospholipids_in_very_large_vldl',
 'NMR_phospholipids_in_very_small_vldl',
 'NMR_polyunsaturated_fatty_acids',
 'NMR_pyruvate',
 'NMR_remnant_cholesterol_nonhdl_nonldl_cholesterol',
 'NMR_saturated_fatty_acids',
 'NMR_sphingomyelins',
 'NMR_total_cholesterol',
 'NMR_total_cholesterol_minus_hdlc',
 'NMR_total_cholines',
 'NMR_total_concentration_of_branchedchain_amino_acids_leucine_isoleucine_valine',
 'NMR_total_concentration_of_lipoprotein_particles',
 'NMR_total_esterified_cholesterol',
 'NMR_total_fatty_acids',
 'NMR_total_free_cholesterol',
 'NMR_total_lipids_in_chylomicrons_and_extremely_large_vldl',
 'NMR_total_lipids_in_hdl',
 'NMR_total_lipids_in_idl',
 'NMR_total_lipids_in_ldl',
 'NMR_total_lipids_in_large_hdl',
 'NMR_total_lipids_in_large_ldl',
 'NMR_total_lipids_in_large_vldl',
 'NMR_total_lipids_in_lipoprotein_particles',
 'NMR_total_lipids_in_medium_hdl',
 'NMR_total_lipids_in_medium_ldl',
 'NMR_total_lipids_in_medium_vldl',
 'NMR_total_lipids_in_small_hdl',
 'NMR_total_lipids_in_small_ldl',
 'NMR_total_lipids_in_small_vldl',
 'NMR_total_lipids_in_vldl',
 'NMR_total_lipids_in_very_large_hdl',
 'NMR_total_lipids_in_very_large_vldl',
 'NMR_total_lipids_in_very_small_vldl',
 'NMR_total_phospholipids_in_lipoprotein_particles',
 'NMR_total_triglycerides',
 'NMR_triglycerides_in_chylomicrons_and_extremely_large_vldl',
 'NMR_triglycerides_in_hdl',
 'NMR_triglycerides_in_idl',
 'NMR_triglycerides_in_ldl',
 'NMR_triglycerides_in_large_hdl',
 'NMR_triglycerides_in_large_ldl',
 'NMR_triglycerides_in_large_vldl',
 'NMR_triglycerides_in_medium_hdl',
 'NMR_triglycerides_in_medium_ldl',
 'NMR_triglycerides_in_medium_vldl',
 'NMR_triglycerides_in_small_hdl',
 'NMR_triglycerides_in_small_ldl',
 'NMR_triglycerides_in_small_vldl',
 'NMR_triglycerides_in_vldl',
 'NMR_triglycerides_in_very_large_hdl',
 'NMR_triglycerides_in_very_large_vldl',
 'NMR_triglycerides_in_very_small_vldl',
 'NMR_tyrosine',
 'NMR_vldl_cholesterol',
 'NMR_valine')

In [55]:
length(PANEL_map)

In [10]:
library(ggforestplot)
ng_names = df_NG_biomarker_metadata %>% mutate(metabolite = str_replace_all(tolower(description), " ", "_"))
ng_names %>% sample_n(10)

abbreviation,machine_readable_name,name,description,alternative_names,group,subgroup,unit,metabolite
<chr>,<chr>,<chr>,<chr>,<list>,<chr>,<chr>,<chr>,<chr>
Total BCAA,Total_BCAA,Total BCAA,Total concentration of branched-chain amino acids (leucine + isoleucine + valine),"Total BCAA, Total_BCAA",Amino acids,Branched-chain amino acids,deprecated,total_concentration_of_branched-chain_amino_acids_(leucine_+_isoleucine_+_valine)
L-VLDL-C %,L_VLDL_C_pct,L-VLDL-C %,Cholesterol to total lipids ratio in large VLDL,"L-VLDL-C_% , L-VLDL-C % , L_VLDL_C_pct",Relative lipoprotein lipid concentrations,Large VLDL ratios,deprecated,cholesterol_to_total_lipids_ratio_in_large_vldl
M-VLDL-L,M_VLDL_L,M-VLDL-L,Total lipids in medium VLDL,"M-VLDL-L, M_VLDL_L",Lipoprotein subclasses,Medium VLDL,deprecated,total_lipids_in_medium_vldl
IDL-C,IDL_C,IDL-C,Cholesterol in IDL,"IDL-C, IDL_C",Lipoprotein subclasses,IDL,deprecated,cholesterol_in_idl
IDL-TG,IDL_TG,IDL-TG,Triglycerides in IDL,"IDL-TG, IDL_TG",Lipoprotein subclasses,IDL,deprecated,triglycerides_in_idl
VLDL size,VLDL_size,VLDL particle size,Average diameter for VLDL particles,"VLDL-D , VLDL_D , VLDL size, VLDL_size",Lipoprotein particle sizes,Lipoprotein particle sizes,deprecated,average_diameter_for_vldl_particles
LDL size,LDL_size,LDL particle size,Average diameter for LDL particles,"LDL-D , LDL_D , LDL size, LDL_size",Lipoprotein particle sizes,Lipoprotein particle sizes,deprecated,average_diameter_for_ldl_particles
S-LDL-TG %,S_LDL_TG_pct,S-LDL-TG %,Triglycerides to total lipids ratio in small LDL,"S-LDL-TG_% , S-LDL-TG % , S_LDL_TG_pct",Relative lipoprotein lipid concentrations,Small LDL ratios,deprecated,triglycerides_to_total_lipids_ratio_in_small_ldl
S-VLDL-PL %,S_VLDL_PL_pct,S-VLDL-PL %,Phospholipids to total lipids ratio in small VLDL,"S-VLDL-PL_% , S-VLDL-PL % , S_VLDL_PL_pct",Relative lipoprotein lipid concentrations,Small VLDL ratios,deprecated,phospholipids_to_total_lipids_ratio_in_small_vldl
TG/PG,TG_by_PG,TG/PG,Ratio of triglycerides to phosphoglycerides,"TGtoPG , TG/PG , TG_by_PG",Other lipids,Other lipids,deprecated,ratio_of_triglycerides_to_phosphoglycerides


In [49]:
library(fuzzyjoin)
mets1 = tibble(metabolite=f$metabolomics) %>% mutate(metabolite=str_remove_all(metabolite, "NMR_")) %>% distinct() %>% select(metabolite) %>% distinct() %>% left_join(ng_names, by = "metabolite")
mets2 = mets1 %>% filter(is.na(name)) %>% select(metabolite) %>% stringdist_left_join(ng_names, by = "metabolite", max_dist = 1) %>% 
    rename(metabolite = metabolite.x) %>% select(-metabolite.y) %>% distinct()
mets3 = mets2 %>% filter(is.na(name)) %>% select(metabolite) %>% stringdist_left_join(ng_names, by = "metabolite", max_dist = 8) %>% 
    rename(metabolite = metabolite.x) %>% select(-metabolite.y) %>% distinct()
mets = bind_rows(mets1 %>% filter(!is.na(name)), mets2 %>% filter(!is.na(name)), mets3) %>% arrange(group, subgroup, description)
subgroup_order = c( 'Amino acids',
                    'Branched-chain amino acids',
                   'Aromatic amino acids',
                   'Fluid balance',
                   'Inflammation',
                    'Fatty acids',
                    'Glycolysis related metabolites',
                    'Ketone bodies',
         
                   'Total lipids',
                    'Cholesterol',
                    'Free cholesterol',
                   'Cholesteryl esters',
                   'Phospholipids',
                   'Triglycerides',
                   'Other lipids',
                   
                    'Lipoprotein particle sizes',
                    'Lipoprotein particle concentrations',
                    'Chylomicrons and extremely large VLDL',
                   'Very large VLDL',
                   'Large VLDL',
                   'Medium VLDL',
                   'Small VLDL',
                   'Very small VLDL',
              
                   'Large LDL',
                   'Medium LDL',
                   'Small LDL',
                    'IDL',
                   'Very large HDL',
                   'Large HDL',
                   'Medium HDL',
                   'Small HDL',
                   'Apolipoproteins'
                  )
mets = mets %>% mutate(subgroup = factor(subgroup, levels=subgroup_order)) %>% arrange(subgroup, abbreviation)
mets %>% head()

metabolites = paste0("NMR_", mets$metabolite)
names(metabolites) = mets$machine_readable_name

metabolite,abbreviation,machine_readable_name,name,description,alternative_names,group,subgroup,unit
<chr>,<chr>,<chr>,<chr>,<chr>,<list>,<chr>,<fct>,<chr>
alanine,Ala,Ala,Alanine,Alanine,Ala,Amino acids,Amino acids,deprecated
glutamine,Gln,Gln,Glutamine,Glutamine,Gln,Amino acids,Amino acids,deprecated
glycine,Gly,Gly,Glycine,Glycine,Gly,Amino acids,Amino acids,deprecated
histidine,His,His,Histidine,Histidine,His,Amino acids,Amino acids,deprecated
isoleucine,Ile,Ile,Isoleucine,Isoleucine,Ile,Amino acids,Branched-chain amino acids,deprecated
leucine,Leu,Leu,Leucine,Leucine,Leu,Amino acids,Branched-chain amino acids,deprecated


In [56]:
PANEL = c( # 38 clinical predictors
    # basics
    "age_at_recruitment", 
    "sex", 
   # "ethnic_background", # added
    "education_years",
    'smoking_status', # current smoker
    'alcohol_intake_frequency', # 'Daily or almost daily'
    "daily_physical_activity",

    "daily_healthy_food",
    
    # family history
    "fh_diabetes",
    
    # diagnoses
    "diabetes2",
        
    # physical
    "weight", 
    "standing_height", 
    "body_mass_index_bmi", 
    'waist_hip_ratio',
    "waist_circumference",
    "systolic_blood_pressure", 
    
    # lipids
    "cholesterol", 
    "ldl_direct", 
    "hdl_cholesterol",
    "triglycerides",
    
    # diabetes
    'glucose',
    'glycated_haemoglobin_hba1c',
    
    # kidney
    'creatinine',
    'cystatin_c',
    'urea',
    'urate',
        
    # liver
    'aspartate_aminotransferase',
    'alanine_aminotransferase',
    'alkaline_phosphatase',
    'albumin',
    
    # inflammation
    'creactive_protein',
    
    # Blood counts
    'red_blood_cell_erythrocyte_count',
    'white_blood_cell_leukocyte_count',
    'platelet_count',
    'haemoglobin_concentration',
    'haematocrit_percentage',
        'mean_corpuscular_volume',
    'mean_corpuscular_haemoglobin',
    'mean_corpuscular_haemoglobin_concentration',
    
    # medications
    'antihypertensives'
        )

PANEL_map = c(
                age_at_recruitment = "Age at Recruitment",
                sex = "Biological Sex",
                smoking_status =  "Current Smoker",
                alcohol_intake_frequency =  "Daily Alcohol Intake",
                daily_physical_activity =  "Daily Moderate to Vigorous Physical Activity",
                education_years = "Education years",
                daily_healthy_food =  "Daily Healthy Food",
                fh_diabetes =  "Family History Diabetes",
                diabetes2 =  "Type 2 Diabetes",
                body_mass_index_bmi =  "BMI",
                waist_hip_ratio =  "Waist-Hip-Ratio",
                waist_circumference =  "Waist Circumference",
                weight =  "Weight (kg)",
                standing_height =  "Standing Height (cm)",
                systolic_blood_pressure =  "Systolic Blood Pressure (mmHg)",
                 cholesterol= "Total Cholesterol (mmol/L)",
                hdl_cholesterol = "HDL Cholesterol (mmol/L)",
                ldl_direct= "LDL Cholesterol (mmol/L)",
                triglycerides= "Triglycerides (mmol/L)",                                   
                glucose =  "Glucose (mmol/L)",
                glycated_haemoglobin_hba1c =  "Glycated Hemoglobin (%)",
                creatinine =  "Creatinine (umol/L)",
                cystatin_c =  "Cystatin C (mg/L)",
                urea =  "Urea (mmol/L)", 
                urate =  "Urate (umol/L)",
                aspartate_aminotransferase =  "Aspartate Aminotransferase (U/L)",
                alanine_aminotransferase =  "Alanine Aminotransferase (U/L)",
                alkaline_phosphatase =  "Alkaline Phosphatase (U/L)",
                albumin =  "Albumin (g/L)",
                creactive_protein =  "C-Reactive Protein (mg/L)",
                red_blood_cell_erythrocyte_count =  "Erythrocytes (10^12 cells/L)",
                white_blood_cell_leukocyte_count =  "Leucocytes (10^9 cells/L)",
                platelet_count =  "Platelets (10^9 cells/L)",
                haemoglobin_concentration =  "Haemoglobin (g/dL)",
                haematocrit_percentage =  "Haematocrit (%)",
                  mean_corpuscular_volume =  "Mean Corpuscular Volume",
                mean_corpuscular_haemoglobin =  "Mean Corpuscular Haemaglobin (pg)",
                 mean_corpuscular_haemoglobin_concentration =  "Mean Corpuscular Haemaglobin (g/dL)",
                antihypertensives =  "Antihypertensives")

In [29]:
extra_data = arrow::read_feather(glue("{data_results_path}/extra_data_211015.feather"))
corr_data = data %>% left_join(extra_data, by="eid") %>% select(c(all_of(PANEL), all_of(f$metabolomics)))

In [30]:
data_all = arrow::read_feather(glue("{data_results_path}/data_all_COX_211007_metabolomics.feather"))

In [31]:
data_all_corr = data_all %>% filter(split=="test") %>% select(c(all_of(PANEL), all_of(f$metabolomics)))

In [32]:
corr = cor(data_all_corr)# %>% dplyr::select(all_of(PANEL)) # default pearson

In [33]:
library(ggcorrplot)

In [93]:
temp = corr[PANEL, metabolites]
colnames(temp) = recode(colnames(temp), !!!setNames(names(metabolites), metabolites))
rownames(temp) = recode(rownames(temp), !!!PANEL_map)

In [97]:
library(gt)
plot_name = "Suppl_Table_NMRPANEL_CORRMATRIX"
rownames_to_column(t(temp) %>% as.data.frame(), "metabolite") %>% write_csv(glue("/home/steinfej/code/21_metabolomics_analysis/Round1/Figures/outputs/{plot_name}.csv"))