# Building null model with GMMAT

Author: Jose Jaime Martinez-Magana

Day: 29 March 2023

This script was developed to build the null model with GMMAT

In [None]:
# move to path
cd /vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/pca_grm

# request computational resources
srun --pty --mem=32G -p interactive bash

# load miniconda
module load miniconda

# load environment
conda activate tractor_mix

# start R
R

# load GMMAT
library(GMMAT)

# read phenofile
pheno=read.csv("/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/phenofile/tractor_mix_pheno_ht.csv")

# read pca and GRM
grm=readRDS("GWGO_GWCIDR.1kg_phase3_v5.chrmerged.dose.rsids.filtered.nodup.phased.shapeit.gds_prunned_grm_pca_v03292023.rds")

# fitting nul model
# example from tractor_mix tutorial
# continuous:
Model_Null=glmmkin(fixed=Pheno ~  Covariates,
                   data=YourDf, id = "ID", kins=GRM,
                   family=gaussian(link = "identity"))

# dichotomous:
Model_Null=glmmkin(fixed=Pheno ~  Covariates,
                   data=YourDf, id = "ID", kins=GRM,
                   family=binomial(link = "logit"))

# fitting null model for heigth
# merging PCs for model
pcs=grm$PCair$vectors[,c(1:10)]
pcs=as.data.frame(pcs)
colnames(pcs)=paste0(rep("PC",10),rep(1:10)) 
# add sample ID
pcs$SampleID=rownames(pcs)

# remove FID from pheno file
pheno$SampleID=gsub(".*_","",pheno$FID_IID)

# merging 
pheno_pcs=merge(pheno,pcs, by=c("SampleID"))

# verify the order of the matrix
pheno_pcs$SampleID

# extract ordering index
reorde_idx=match(pheno_pcs$SampleID, rownames(grm$grm_sparse))

# reorder pheno based on index
pheno_pcs_or=pheno_pcs[reorde_idx,]

# fitting null model with ordered phenofile
model_null=glmmkin(fixed = hgt_totalm_inv ~ age + sex + PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
                  data=pheno_pcs_or, id="SampleID",kins=grm$grm_sparse,
                  family=gaussian(link = "identity"))


# save linear null model 1
saveRDS(file="GWGO_GWCIDR.1kg_phase3_v5.chrmerged.dose.rsids.filtered.nodup.phased.shapeit.gds_prunned_ht.null.model.rds", model_null)

# modify null model to adjust SampleID for FID_IID
model_null_fid=model_null

# adjust sampleID to FID_IID
model_null_fid$id_include=pheno_pcs$FID_IID

# save linear null model 1
saveRDS(file="GWGO_GWCIDR.1kg_phase3_v5.chrmerged.dose.rsids.filtered.nodup.phased.shapeit.gds_prunned_ht_fid.null.model.rds", model_null_fid)

# exit R
q()

In [None]:
# testing tractor mix
cd /vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/scripts/tractor_mix/

# upload tractor mix
source("/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/scripts/tractor_mix/TractorMix.score_updated_v03282023.R")

# path to local ancestry tracts
lpath="/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/databases/local_ancestry/local_ancestry_tracks"

# set file name
f_anc0="GWGO_GWCIDR.1kg_phase3_v5.chr22.dose.rsids.filtered.nodup.phased.shapeit.localancestrytracks.anc0.hapcount.txt"
f_anc1="GWGO_GWCIDR.1kg_phase3_v5.chr22.dose.rsids.filtered.nodup.phased.shapeit.localancestrytracks.anc1.hapcount.txt"

# create file paths files
ipf_anc0=paste0(lpath,"/",f_anc0)
ipf_anc1=paste0(lpath,"/",f_anc1)

# load grm
Model_Null=readRDS("/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/pca_grm/GWGO_GWCIDR.1kg_phase3_v5.chrmerged.dose.rsids.filtered.nodup.phased.shapeit.gds_prunned_ht_fid.null.model.rds")

# set output file
outfile="/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/tractor_mix/height/yalepenn2_admixed_tractormix_heigth_chr22.tsv"

# testing tractor mix
TractorMix.score(obj = Model_Null,
                 infiles = c(ipf_anc0,ipf_anc1),
                 outfiles = outfile)

In [None]:
# testing with ancestry dosages in Grace - HPC
# move to path
cd /vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/scripts/tractor_mix/

# request computational resources
srun --pty --mem=32G -p interactive bash

# load miniconda
module load miniconda

# load environment
conda activate tractor_mix

# open R
R

# upload tractor mix
source("/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/scripts/tractor_mix/TractorMix.score_updated_v03282023.R")

# path to local ancestry tracts
lpath="/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/databases/local_ancestry/local_ancestry_tracks"

# set file name
f_anc0="GWGO_GWCIDR.1kg_phase3_v5.chr22.dose.rsids.filtered.nodup.phased.shapeit.localancestrytracks.anc0.dosage.txt"
f_anc1="GWGO_GWCIDR.1kg_phase3_v5.chr22.dose.rsids.filtered.nodup.phased.shapeit.localancestrytracks.anc1.dosage.txt"

# create file paths files
ipf_anc0=paste0(lpath,"/",f_anc0)
ipf_anc1=paste0(lpath,"/",f_anc1)

# load grm
Model_Null=readRDS("/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/pca_grm/GWGO_GWCIDR.1kg_phase3_v5.chrmerged.dose.rsids.filtered.nodup.phased.shapeit.gds_prunned_ht_fid.null.model.rds")

# set output file
outfile="/vast/palmer/scratch/montalvo-ortiz/jjm262/genomics/yalepenn/tractor_mix/tractor_mix/height/yalepenn2_admixed_tractormix_heigth_chr22.tsv"

# testing tractor mix
TractorMix.score(obj = Model_Null,
                 infiles = c(ipf_anc0,ipf_anc1),
                 outfiles = outfile)


# getting the following erros
# Error in solve.default(VarScore) :
#  Lapack routine dgesv: system is exactly singular: U[2,2] = 0