In [None]:
library(lmPerm)
library(psych)
library(car)
library(ggplot2)
library(lme4)
library(mediation)
#install.packages('MatchIt')
library(MatchIt)
library(dplyr)
library(ukbtools)
library(lavaan)
#install.packages('fsbrain')
#library(fsbrain)

In [None]:
ADNI=read.table('/dagher/dagher11/filip/OBAD/data/missing_listafter_QC.csv',sep=',', header=T)
colnames(ADNI)=c('ID','List','Available','Group','Group2','Years','Age','Sex','Weight','Height','BMI')
ADNI=select(ADNI, ID, Group, Group2, Available, Years, Age, Sex, Weight, Height, BMI)
ADNI=subset(ADNI, (ADNI$Group=='CON' & ADNI$Group2=='CON') | (ADNI$Group=='AD' & ADNI$Group2=='AD') 
            | (ADNI$Group=='AD' & ADNI$Group2=='') | (ADNI$Group=='CON' & ADNI$Group2==''))
ADNI=na.omit(ADNI)
ADNI=subset(ADNI, ADNI$Available==1)

In [None]:
di=read.table('/dagher/dagher11/filip/PRSOB/data/brain_data_table.csv', header=TRUE, na.strings = "", sep=',', quote = "\"")

### Exclude outliers - bipolar disorder - self-reported
di$bipolar_dis <- NA
di$bipolar_dis[di$bipolar_disorder_status_f20122_0_0=="Bipolar Type I (Mania)"] <- 1
di$bipolar_dis[di$bipolar_disorder_status_f20122_0_0=="Bipolar Type II (Hypomania)"] <- 1

# Mental health issues - self-reported
exclusions_mhi=c('Schizophrenia','Depression','Mania, hypomania, bipolar of manic-depression','Bulimia nervosa',
                'Anorexia nervosa','Attention deficit or attention deficit and hyperactivity disorder',
                'Panic attacks','A personality disorder','Obsessive compulsive disorder (OCD)',
                'Psychological over-eating or binge-eating',"Autism, Asperger's or autistic spectrum disorder",
                'Anxiety, nerves or generalized anxiety disorder','Agoraphobia','Social anxiety or social phobia',
                'Any other type of psychosis or psychotic illness')

di$mental_health_issue <- NA
for (i in as.numeric(grep('mental_health_problems_ever_diagnosed_by_a_professional', colnames(di)))) {
    for (d in 1:length(exclusions_mhi)) {
        di$mental_health_issue[grepl(exclusions_mhi[d], di[[i]])] = 1
    }
}


# Mark participants who have diabetes - self-reported
di$diabetes <- NA
for (i in as.numeric(grep('diabetes_diagnosed_by_doctor', colnames(di)))) { 
    di$diabetes[di[i] =="Yes"] <- 1
}

# Mark participants who have diabetes - diagnoses
exclusions_diabetes=c('^E10','^E11','^E12','^E13','^E14')
di$diabetes2 <- NA
for (i in as.numeric(grep('icd10', colnames(di)))) {
    for (d in 1:length(exclusions_diabetes)) {
        di$diabetes2[grepl(exclusions_diabetes[d], di[[i]])] = 1
    }
}

# Mark participants with hypertension - self-reported
di$hypertension <- NA
for (i in as.numeric(grep('vascularheart_problems_diagnosed_by_doctor', colnames(di)))) { 
    di$hypertension[di[i] == 'High blood pressure'] <- 1 # Hypertension
}


# Mark participants with hypertension - diagnosed
di$hypertension2 <- NA
for (i in as.numeric(grep('icd10', colnames(di)))) { 
    di$hypertension2[di[i] == 'I10 Essential (primary) hypertension'] <- 1 # Hypertension
    }

# Exclude participants who had a heart attack, angina or stroke - self-reported
exclusions_vd=c('Angina','Heart attack','Stroke')
di$vascular_heart_diagnoses <- NA
for (i in as.numeric(grep('vascularheart_problems_diagnosed_by_doctor', colnames(di)))) {
    for (d in 1:length(exclusions_vd)) {
        di$vascular_heart_diagnoses[grepl(exclusions_vd[d], di[[i]])] = 1
    }
}
             
exclusions_neuro=c('^G','^F','^E0','^E10','^E2','^E3','^A8','^C70','^C71','^C72','^I6')
# G - nervous system; F - Mental and behavioural; E0 - thyroid; E11-14 - diabetes, E2-3 endocrine glands; 
# A8 - viral infections of the CNS; C70-72 malignant neoplasms of brain, meninges and spinal cord; I6 - cerebrovscular

di$neurological_disorder = NA
for (i in as.numeric(grep('icd10', colnames(di)))) {
    for (d in 1:length(exclusions_neuro)) {
        di$neurological_disorder[grepl(exclusions_neuro[d], di[[i]])] = 1
    }
}

#If BMI < 18.5 
di$underweight=NA
di$underweight[di$body_mass_index_bmi_21001.0.0 < 18.5] <- 1

#Exclude participants if bipolar_dis == 1; personality_dis == 1; mhi == 1; 
#    vascular_heart_diagnoses ==1, underweight == 1 and neurological ==1

di$excluded <- NA
#di$excluded[di$bipolar_dis ==1] <- 1
#di$excluded[di$mental_health_issue == 1] <- 1
#di$excluded[di$vascular_heart_diagnoses ==1] <- 1
#di$excluded[di$underweight == 1] <- 1
di$excluded[di$neurological_disorder ==1] <- 1

di$included<-car::recode(di$excluded, "1='excluded'; else='included'")

#Select subjects if included ==1
di_excluded <- subset(di, included=="included")

ukbb_all=di_excluded

nrow(ukbb_all)

ukbb_all$Hypertension=0
ukbb_all$Hypertension[ukbb_all$hypertension=='1' | ukbb_all$hypertension2=='1']=1
ukbb_all$Diabetes=0
ukbb_all$Diabetes[ukbb_all$diabetes=='1' | ukbb_all$diabetes2=='1']=1
ukbb_all$WHR = ukbb_all$waist_circumference_48.0.0/ukbb_all$hip_circumference_49.0.0
ukbb_all$agesq=ukbb_all$age_when_attended_assessment_centre_21003.2.0^2

UKBB=ukbb_all


## Drop non-imaging participants from UKB

In [None]:
UKBB=UKBB[!is.na(UKBB$mean_thickness_of_caudalanteriorcingulate_left_hemisphere_27174.2.0),]

In [None]:
write.table(UKBB,'/dagher/dagher11/filip/OBAD/data/UKBB_fullsample_excluded.csv',sep=' ', quote=T)

## Create categories in ADNI and UKB:
1. AD Obese
2. AD Lean
3. Controls Obese
4. Controls Lean

1. Obese
2. Lean

In [None]:
set.seed(2010)
ADNI$Sample[ADNI$BMI>30 & ADNI$Group=='AD']='ADOB'
ADNI$Sample[ADNI$BMI<25 & ADNI$Group=='AD']='ADLE'
ADNI$Sample[ADNI$BMI>30 & ADNI$Group=='CON']='CONOB'
ADNI$Sample[ADNI$BMI<25 & ADNI$Group=='CON']='CONLE'
ADNI=ADNI[!(is.na(ADNI$Sample)),]
ADNI$Sex=as.factor(ADNI$Sex)

UKBB$Sample[UKBB$body_mass_index_bmi_21001.2.0>30]='OB'
UKBB$Sample[UKBB$body_mass_index_bmi_21001.2.0<25]='LE'

ADNI_AD=select(ADNI, ID, Age, Sex, BMI, Sample)

UKBB_AD=select(UKBB, eid, age_when_attended_assessment_centre_21003.2.0, sex_31.0.0, 
               body_mass_index_bmi_21001.2.0, Sample)
colnames(UKBB_AD)=c('ID', 'Age','Sex','BMI','Sample')
UKBB_AD$Sex<-car::recode(UKBB_AD$Sex, "'Female'=0; 'Male'=1")
UKBB_AD=UKBB_AD[!(is.na(UKBB_AD$Sample)),]

## Match participants within ADNI:
1. ADOB with CONOB
2. ADLE with CONLE

In [None]:
Match=ADNI_AD
Match=Match[!(Match$Sample!='ADOB' & Match$Sample!='CONOB'),]
Match$Log <- as.logical(Match$Sample == 'ADOB')
matched_groups <- matchit(Log ~  BMI + Age + Sex, data = Match, method="nearest")
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adob=matched_ad[-(6:9)]

Match=ADNI_AD
Match=Match[!(Match$Sample!='ADLE' & Match$Sample!='CONLE'),]
Match=na.omit(Match)
Match$Log <- as.logical(Match$Sample == 'CONLE') ## Because there is more AD than CON
matched_groups <- matchit(Log ~  BMI + Age + Sex, data = Match, method="nearest")
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adle=matched_ad[-(6:9)]

# Match UKB OB with ADNI ADOB participants and LE with ADNI CONLE

### DF for comparisons with ADOB
1. OB with ADOB
2. LE with ADOB (without BMI)

### DF for comparisons with ADLE
1. OB with ADLE (without BMI)
2. LE with ADLE

In [None]:
Match=rbind(UKBB_AD,matched_adob)
Match=Match[!(Match$Sample!='ADOB' & Match$Sample!='OB'),]
Match$Log <- as.logical(Match$Sample == 'ADOB')
matched_groups <- matchit(Log ~ BMI + Age + Sex, data = Match, method="nearest", ratio=1) #20
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adob_ob=matched_ad

Match=rbind(UKBB_AD,matched_adob)
Match=Match[!(Match$Sample!='ADOB' & Match$Sample!='LE'),]
Match$Log <- as.logical(Match$Sample == 'ADOB')
matched_groups <- matchit(Log ~ Age + Sex, data = Match, method="nearest", ratio=1) #20
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adob_le=matched_ad

UKBB_matched_adob=rbind(matched_adob_ob[matched_adob_ob$Sample=='OB',],
                       matched_adob_le[matched_adob_le$Sample=='LE',])


Match=rbind(UKBB_AD,matched_adle)
Match=Match[!(Match$Sample!='ADLE' & Match$Sample!='LE'),]
Match$Log <- as.logical(Match$Sample == 'ADLE')
matched_groups <- matchit(Log ~  BMI + Age + Sex, data = Match, method="nearest", ratio=1) #23
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adle_le=matched_ad

Match=rbind(UKBB_AD,matched_adle)
Match=Match[!(Match$Sample!='ADLE' & Match$Sample!='OB'),]
Match$Log <- as.logical(Match$Sample == 'ADLE')
matched_groups <- matchit(Log ~ Age + Sex, data = Match, method="nearest", ratio=1) #23
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adle_ob=matched_ad


UKBB_matched_adle=rbind(matched_adle_ob[matched_adle_ob$Sample=='OB',],
                       matched_adle_le[matched_adle_le$Sample=='LE',])

## Match full AD sample, without differentiating between OB and LE

In [None]:
ADNI=read.table('/dagher/dagher11/filip/OBAD/data/missing_listafter_QC.csv',sep=',', header=T)
colnames(ADNI)=c('ID','List','Available','Group','Group2','Years','Age','Sex','Weight','Height','BMI')
ADNI=select(ADNI, ID, Group, Group2, Available, Years, Age, Sex, Weight, Height, BMI)
ADNI=subset(ADNI, (ADNI$Group=='CON' & ADNI$Group2=='CON') | (ADNI$Group=='AD' & ADNI$Group2=='AD') 
            | (ADNI$Group=='AD' & ADNI$Group2=='') | (ADNI$Group=='CON' & ADNI$Group2==''))
ADNI=na.omit(ADNI)
ADNI=subset(ADNI, ADNI$Available==1)
nrow(ADNI)

In [None]:
Match=ADNI
Match$Sample=Match$Group
Match=Match[!(Match$Group!='AD' & Match$Group!='CON'),]
Match=na.omit(Match)
Match$Log <- as.logical(Match$Group == 'AD')
matched_groups <- matchit(Log ~  BMI + Age + Sex, data = Match, method="nearest")
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_ad_full=select(matched_ad, ID, Age, Sex, BMI, Sample)

## Save IDs

In [None]:
All_IDs=rbind(matched_ad_full, matched_adle, matched_adob)

In [None]:
IDs=(All_IDs[!duplicated(All_IDs$ID),])
IDs=select(IDs, ID)

In [None]:
write.table(IDs, '/dagher/dagher11/filip/OBAD/data/IDs_afterQC.csv', sep=',')

## Match UKB participants to full AD sample - both obese and lean but without matching for BMI this time

In [None]:
Match=rbind(UKBB_AD,matched_ad_full)
Match=Match[!(Match$Sample!='AD' & Match$Sample!='OB'),]
Match$Log <- as.logical(Match$Sample == 'AD')
matched_groups <- matchit(Log ~  Age + Sex, data = Match, method="nearest", ratio=1) # 8
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adob_ob=matched_ad

Match=rbind(UKBB_AD,matched_ad_full)
Match=Match[!(Match$Sample!='CON' & Match$Sample!='LE'),]
Match$Log <- as.logical(Match$Sample == 'CON')
matched_groups <- matchit(Log ~ Age + Sex, data = Match, method="nearest", ratio=1) # 9
summary(matched_groups, standardize = TRUE)
matched_ad <- match.data(matched_groups)
matched_adob_le=matched_ad

UKBB_matched_full=rbind(matched_adob_ob[matched_adob_ob$Sample=='OB',],
                       matched_adob_le[matched_adob_le$Sample=='LE',])

# 6 dataframes:
* matched_adob
* matched_adle
* matched_ad_full
* UKBB_matched_adob
* UKBB_matched_adle
* UKBB_matched_full

In [None]:
write.table(matched_adob,'/dagher/dagher11/filip/OBAD/data/matched_adob_QC.csv',row.names=F,col.names=T, quote=F)
write.table(matched_adle,'/dagher/dagher11/filip/OBAD/data/matched_adle_QC.csv',row.names=F,col.names=T, quote=F)
write.table(matched_ad_full,'/dagher/dagher11/filip/OBAD/data/matched_ad_full_QC.csv',row.names=F,col.names=T, quote=F)
write.table(UKBB_matched_adob,'/dagher/dagher11/filip/OBAD/data/UKBB_matched_adob_QC.csv',row.names=F,col.names=T, quote=F)
write.table(UKBB_matched_adle,'/dagher/dagher11/filip/OBAD/data/UKBB_matched_adle_QC.csv',row.names=F,col.names=T, quote=F)
write.table(UKBB_matched_full,'/dagher/dagher11/filip/OBAD/data/UKBB_matched_full_QC.csv',row.names=F,col.names=T, quote=F)

## Get ethnicity table for the full sample

### UKBB

In [None]:
IDs=read.table('/dagher/dagher11/filip/OBAD/data/UKBB_matched_full_QC.csv', header=T)

In [None]:
Ethn=merge(UKBB, IDs, by.x='eid',by.y='ID', all.y=T)

In [None]:
Ethn_UKB=data.frame('Cohort'='UKB','Ethnic_background'=Ethn$ethnic_background_21000.0.0)

In [None]:
unique(Ethn$ethnic_background_21000.0.0)

In [None]:
Ethn_UKB$Ethnic_background=dplyr::recode(Ethn_UKB$Ethnic_background, 'British'='White',
                                        'Any other white background'='White',
                                        'Irish'='White',
                                        'Prefer not to answer'='Unknown',)

### ADNI

In [None]:
IDs_ADNI=read.table('/dagher/dagher11/filip/OBAD/data/matched_ad_full_QC.csv', header=T)
Dem_ADNI=read.table('/dagher/dagher11/filip/Downloads/PTDEMOG.csv', sep=',', header=T)
ADNI_dict=select(read.table('/dagher/dagher11/filip/OBAD/data/adni_characteristics.csv', header=T, sep=','), 
                 patient_id, rid)
colnames(ADNI_dict)=c('ID','RID')

In [None]:
IDs_ADNI=merge(IDs_ADNI,ADNI_dict, by='ID', all.x=T)

In [None]:
ADNI_ethn=merge(IDs_ADNI, Dem_ADNI, by='RID', all.x=T)
ADNI_ethn=ADNI_ethn[!duplicated(ADNI_ethn$RID),]


In [None]:
Ethn_ADNI=data.frame('Cohort'='ADNI','Ethnic_background'=ADNI_ethn$PTRACCAT)

In [None]:
Ethn_ADNI$Ethnic_background=dplyr::recode(Ethn_ADNI$Ethnic_background, '5'='White',
                                        '1'='American Indian/Alaskan',
                                        '4'='African',
                                        '2'='Asian',
                                         '3'='Hawaiian/Pacific Islander',
                                         '7'='Unknown',
                                         '6'='More than 1',
                                         '-4'='Unknown')

### Compare ethnicities between full samples

In [None]:
Ethnicities_bothsamples=rbind(Ethn_ADNI, Ethn_UKB)


In [None]:
with(Ethnicities_bothsamples, chisq.test(Cohort,Ethnic_background))