In [1]:
#-------------------------- arguments ---------------------------#
options(stringsAsFactors=F)
library(glmnet)
library(foreach)
library(genio)
library(Rcpp)


# testing
if (T) {
    args <- c("1", "3")
} 
#args = commandArgs(trailingOnly = TRUE)
chr = as.numeric(args[1])
gene_idx = as.numeric(args[2])
    

#-------------------------- data import ---------------------------#  


genotype_dir <- "/ysm-gpfs/project/wl382/GTEx_v8/genotype/cis_loc/"
gtex_dir <- "/gpfs/loomis/project/zhao/zy92/GTEX/" 
glist = dir(paste0(gtex_dir, "adjusted_expr/chr", chr))
chr_str <- paste0("chr", chr, "/")
gene_vec <- list.files(paste0(genotype_dir, chr_str))
gene_id <- gene_vec[gene_idx]
g <- glist[gene_idx]
dose_path = paste0(genotype_dir, chr_str, gene_id, "/", gene_id)
Yt = dir(paste0(gtex_dir, "adjusted_expr/", chr_str, g, "/"))
P = length(Yt)
output_dir = "/gpfs/loomis/project/zhao/zy92/GTEX/output/" 
ntune <- 5



#-------------------------- functions ---------------------------#  

grad_prep <- function(X, Y){
	## pre-calculate some metrics for gradient
	ll = length(Y)
	P = ncol(X[[1]])
	XY = matrix(0,P,ll)
	for(i in 1:ll){
		XY[,i] = t(X[[i]])%*%Y[[i]]/nrow(X[[i]])
	}
	XY
}

cv_helper <- function(N, fold){
	valid_num = floor(N/fold)
	set.seed(123)
	perm = sample(1:N, size = N)
	idx1 = seq(1,N,valid_num)
	idx2 = c(idx1[-1]-1,N)
	list(perm=perm, idx=cbind(idx1,idx2))
}

minmax_lambda <- function(lst){
	max_lam = max(unlist(lapply(lst, function(x){max(x$lambda)})))
	min_lam = min(unlist(lapply(lst, function(x){min(x$lambda)})))
	c(min_lam, max_lam)
}

elastic_net_mse <- function(lst, X_tune, Y_tune, X_test, Y_test){
	P = length(lst)
	M = ncol(X_tune[[1]])
	lam_V = rep(0, P)
	test_res = list()
	test_beta = matrix(0, M, P)
	for(t in 1:P){
		ncv = length(lst[[t]]$lambda)
		tmp_mse = rep(0, ncv)
		for(k in 1:ncv){
			tmp_mse[k] = mean((Y_tune[[t]] - X_tune[[t]]%*%lst[[t]]$glmnet.fit$beta[,k])^2)
		}
		ss = which.min(tmp_mse)
		test_beta[,t] = lst[[t]]$glmnet.fit$beta[,ss]
		lam_V[t] = lst[[t]]$lambda[ss]
		predicted = X_test[[t]]%*%lst[[t]]$glmnet.fit$beta[,ss]
		test_res[[t]] = cbind(Y_test[[t]], predicted)
	}
	list(lam = lam_V, mse = test_res, est = test_beta)
}

multi_mse <- function(theta_est, X_test, Y_test){
	answer = list()
	P = ncol(theta_est)
	for(t in 1:P){
		predicted = X_test[[t]]%*%theta_est[,t]
		answer[[t]] = cbind(Y_test[[t]], predicted)
	}
	answer
}

avg_perm <- function(mse_lst){
	fd = length(mse_lst)
	P = length(mse_lst[[1]])
	rsq = mse = adj_mse = matrix(0, fd, P)
	for(f in 1:fd){
		for(t in 1:P){
			rsq[f,t] = (cor(mse_lst[[f]][[t]])[1,2])^2
			mse[f,t] = mean((mse_lst[[f]][[t]][,1]-mse_lst[[f]][[t]][,2])^2)
			adj_mse[f,t] = mse[f,t]/var(mse_lst[[f]][[t]][,1])
		}
	}
	cbind(apply(rsq, 2, mean), apply(mse, 2, mean), apply(adj_mse, 2, mean))

	#list(rsq = apply(rsq, 2, mean), mse = apply(mse, 2, mean), adj_mse = apply(adj_mse, 2, mean))
}

glasso <- function(X, Y, X1, Y1, XX, XY, Xnorm, lambda1, lambda2, theta, stepsize = 1e-4, maxiter = 10, eps = 1e-4){
	bgt = Sys.time()
	M = nrow(XY)
	P = length(X)
	NN = unlist(lapply(X, nrow))
	old_objV1 = 0
	for(t in 1:P){
		old_objV1 = old_objV1 + 1/2*mean((Y[[t]]-X[[t]]%*%theta[,t])^2)
	}
	cat(paste0("Training error: ", old_objV1, '\n'))
	old_objV2 = 0
	for(t in 1:P){
		old_objV2 = old_objV2 + 1/2*mean((Y1[[t]]-X1[[t]]%*%theta[,t])^2)
	}
	cat(paste0("Testing error: ", old_objV2, '\n'))
	beta_j_lasso = rep(0, P)
	tmp_XYj = 0
	if(!is.loaded("wrapper")){
		dyn.load("/ysm-gpfs/pi/zhao/from_louise/yh367/GTEX/codes/glasso.so")
	}
    
	for(i in 1:maxiter){
		bgt = Sys.time()
		res = .Call("wrapper", XX, XY, theta, M, P, beta_j_lasso, lambda1, lambda2, Xnorm)
		edt = Sys.time()
		#print(edt-bgt)
		new_objV1 = new_objV2 = 0
		for(t in 1:P){
			new_objV1 = new_objV1 + 1/2*mean((Y[[t]]-X[[t]]%*%theta[,t])^2)
            #IRdisplay::display_html(sum(is.na(X[[t]])))
            #IRdisplay::display_html(sum(is.na(Y[[t]])))
		}
		cat(paste0("Training error: ", new_objV1, '\n'))
		for(t in 1:P){
			new_objV2 = new_objV2 + 1/2*mean((Y1[[t]]-X1[[t]]%*%theta[,t])^2)
           # IRdisplay::display_html(sum(is.na(X[[t]])))
            #IRdisplay::display_html(sum(is.na(Y[[t]])))
		}
		cat(paste0("Testing error: ", new_objV2, '\n'))
		if(new_objV2 > old_objV2|new_objV1 > old_objV1){
			break
		}else{
			old_objV2 = new_objV2
		}
		if(abs(new_objV1-old_objV1) < eps){
			break
		}else{
			old_objV1 = new_objV1
		}
	}
	#edt = Sys.time()
	#print(edt-bgt)
	list(est = theta, tune_err = new_objV2)
}

glasso_no_early_stopping <- function(X, Y, XX, XY, Xnorm, lambda1, lambda2, theta, stepsize = 1e-4, maxiter = 10, eps = 1e-3){
	M = nrow(XY)
	P = length(X)
	NN = unlist(lapply(X, nrow))
	old_objV1 = 0
	for(t in 1:P){
		old_objV1 = old_objV1 + 1/2*mean((Y[[t]]-X[[t]]%*%theta[,t])^2)
	}
	cat(paste0("Training error: ", old_objV1, '\n'))
	beta_j_lasso = rep(0, P)
	tmp_XYj = 0
	if(!is.loaded("wrapper")){
		dyn.load("/ysm-gpfs/pi/zhao/from_louise/yh367/GTEX/codes/glasso.so")
	}
	for(i in 1:maxiter){
		res = .Call("wrapper", XX, XY, theta, M, P, beta_j_lasso, lambda1, lambda2, Xnorm)
		new_objV1 = 0
		for(t in 1:P){
			new_objV1 = new_objV1 + 1/2*mean((Y[[t]]-X[[t]]%*%theta[,t])^2)
		}
		cat(paste0("Training error: ", new_objV1, '\n'))
		if(abs(new_objV1-old_objV1) < eps|new_objV1 > old_objV1){
			break
		}else{
			old_objV1 = new_objV1
		}
	}
	list(est = theta, train_err = new_objV1)
}

extract_genotype <- function(gene_idx, chr_str, genotype_dir, gene_vec) {
    gene_dir <- paste0(genotype_dir, chr_str, gene_vec[gene_idx], "/", gene_vec[gene_idx])
    genotype_info <- read_plink(gene_dir)
    
    print(paste0("INFO: genotype matrix dimension:", dim(genotype_info$X)[1], " * ", dim(genotype_info$X)[2]))
    return(genotype_info)
}


#-------------------------- main ---------------------------#  




# create dirs
dir.create(paste0(output_dir, "chr", chr), showWarnings = FALSE)
dir.create(paste0(output_dir, "chr", chr, "/", gene_id), showWarnings = FALSE)
setwd(paste0(output_dir, "chr", chr, "/", gene_id))

# expr files 
cat("INFO: loading expression files...")
Y = list()
T_idx = c()
for(t in 1:P){
    tmp_exp = read.table(paste0(gtex_dir, "adjusted_expr/chr", chr, "/", g, "/", Yt[t]), header=F)
    # check if the y is constant
    if (!all(tmp_exp[,2] == tmp_exp[1,2])) {
        T_idx = c(T_idx, t)
        Y[[length(T_idx)]] = tmp_exp 
    }
     
}
Yt = Yt[T_idx]
ssize = unlist(lapply(Y, nrow))
T_num = length(Yt)

# genotype files 
cat("INFO: loading genotype files...")	
genotype_info <- read_plink(dose_path)
dose = genotype_info$X
dose_std = dose

# center the dose
for(j in 1:nrow(dose)){
    dose_std[j, is.na(dose[j, ])] <- mean(dose[j, ], na.rm = T)
    dose_std[j, ] <- dose_std[j, ] - mean(dose[j, ], na.rm = T)
}

# covariance matrix 
n_sample = ncol(dose_std)
tmp = t(as.matrix(dose_std))
XX = t(tmp)%*%as.matrix(tmp)/n_sample
Xnorm = diag(XX)
remove(tmp)
remove(XX)
sub_id = colnames(dose_std)

# number of snps
M = nrow(dose_std)
sub_id_map = list()
sub_id_map_exp = list()

# sample matching 
for(t in 1:T_num){
    #tmp = rep(0, nrow(Y[[t]]))
    exp_id <- as.character(sapply(Y[[t]][,1], function(x) substr(x, 1, 10)))
    # index of ids that have matched genotypes 
    # exp_id based on order of exp_id
    match_id <- match(exp_id, sub_id)
    sub_id_map[[t]] <- na.omit(match_id)
    sub_id_map_exp[[t]] <- !is.na(match_id)  
}

# cv                                   
fold = 5
print("INFO: CV preparation")
cv_config = cv_helper(n_sample, fold)
cv_perm = cv_config$perm
cv_idx = cv_config$idx

single_res_test = list()
single_lam = matrix(0,5,T_num)
single_theta_est = list()

multi_res_test = list()
multi_lam = rep(0,5)
multi_theta_est = list()  
                                  
# loading fast matrix operations 
#sourceCpp("/gpfs/project/zhao/zy92/utmost_update/CTIMP/MatrixMtp.cpp") # call the C++ file and we have three functions as armaMatMult，eigenMatMult，eigenMapMatMult.
sourceCpp("/gpfs/loomis/project/zhao/zy92/utmost_update/CTIMP/MatrixMtp.cpp") # call the C++ file and we have three functions as armaMatMult，eigenMatMult，eigenMapMatMult.


                                  
                                  
for(f in 1:fold){
#for(f in 1:1){
    bgt = Sys.time()
    cat(paste0("INFO: fold ", f))
    test_index = cv_perm[cv_idx[f,1]:cv_idx[f,2]]
    test_id = sub_id[test_index]

    # move the tuning idx to another idx block
    tuning_index = cv_perm[cv_idx[f%%fold+1,1]:cv_idx[f%%fold+1,2]]
    tuning_id = sub_id[tuning_index]

    # idx list
    X_test = list()
    Y_test = list()
    X_tune = list()
    Y_tune = list()
    X_train = list()
    Y_train = list()
    for(t in 1:T_num){
        # idx matching  
        id_filter = !(sub_id_map[[t]]%in%c(test_index,tuning_index))
        X_train_tmp = sub_id_map[[t]][id_filter]
        Y_train_tmp = which(sub_id_map_exp[[t]] == T)[id_filter]
        tuning_id_filter = sub_id_map[[t]]%in%tuning_index
        X_tuning_tmp = sub_id_map[[t]][tuning_id_filter]
        Y_tuning_tmp = which(sub_id_map_exp[[t]] == T)[tuning_id_filter]
        testing_id_filter = sub_id_map[[t]]%in%test_index
        X_test_tmp = sub_id_map[[t]][testing_id_filter]
        Y_test_tmp = which(sub_id_map_exp[[t]] == T)[testing_id_filter]
        # training data
        X_train[[t]] = apply(as.matrix(dose_std[,X_train_tmp]),1,as.numeric)
        Y_train[[t]] = Y[[t]][Y_train_tmp, 2]
        X_tune[[t]] = apply(as.matrix(dose_std[,X_tuning_tmp]),1,as.numeric)
        Y_tune[[t]] = Y[[t]][Y_tuning_tmp, 2]
        X_test[[t]] = apply(as.matrix(dose_std[,X_test_tmp]),1,as.numeric)
        Y_test[[t]] = Y[[t]][Y_test_tmp, 2]
        #IRdisplay::display_html(length(X_train_tmp))
        #IRdisplay::display_html(length(Y_train_tmp))
        #IRdisplay::display_html(dim(X_train[[t]]))
        #IRdisplay::display_html(length(Y_train[[t]]))
    }

    ## model training ##	
    ## train elastic net and used average lambda as tuning parameters ##
    single_initial_est = matrix(0, ncol(X_train[[1]]), T_num)
    single_summary = list()
    for(t in 1:T_num) {
        #print(Y_train[[t]])
        #print(sum(is.na(X_train[[t]])))
        if (t %% 5 == 0) {
            cat(paste0("INFO: glmnet cv tissue", t))
        }
        
        
        tt = cv.glmnet(X_train[[t]], Y_train[[t]], alpha = 0.5, nfolds = 5)
        single_summary[[t]] = tt
        single_initial_est[,t] = tt$glmnet.fit$beta[,which.min(tt$cvm)]
    }
    ## performance of Elastic net on tuning and testing data with various tuning parameters
    els_output = elastic_net_mse(single_summary, X_tune, Y_tune, X_test, Y_test)
    single_res_test[[f]] = els_output$mse
    single_lam[f,] = els_output$lam
    single_theta_est[[f]] = els_output$est
    remove(els_output)

    initial_numeric = as.numeric(single_initial_est)
    lam_range = minmax_lambda(single_summary)
    lam_V = seq(lam_range[1], lam_range[2], length.out = ntune)
    #remove(single_summary)
    #remove(single_initial_est)

    ## preparation
    XY = grad_prep(X_train, Y_train)
    #bgt = Sys.time()
    XX_train = lapply(X_train, function(x) { eigenMatMult(t(x), x)/nrow(x)})
    #edt = Sys.time()
    #print(edt-bgt)
    
    res_tune = rep(0, ntune)
    best.lam = 0

    #	bgt = Sys.time()
    for(lam in 1:ntune){
        single_est = matrix(initial_numeric, M, T_num)
        ans = glasso(X=X_train, Y=Y_train, X1=X_tune, Y1=Y_tune, 
                     XX=XX_train, XY=XY, Xnorm=Xnorm, lambda1=lam_V[lam], 
                     lambda2=lam_V[lam], theta=single_est)
        res_tune[lam] = ans$tune_err
        remove(single_est)
        remove(ans)
    }
    #		edt = Sys.time()
    #		print(edt-bgt)

    best.lam = lam_V[which.min(res_tune)]
    single_est = matrix(initial_numeric, M, T_num)
    ans = glasso(X=X_train, Y=Y_train, X1=X_tune, Y1=Y_tune, XX=XX_train, XY=XY, Xnorm=Xnorm, lambda1=best.lam, lambda2=best.lam, theta=single_est)
    multi_res_test[[f]] = multi_mse(ans$est, X_test, Y_test)
    multi_lam[f] = best.lam
    multi_theta_est[[f]] = ans$est
    remove(single_est)
    remove(ans)
    edt = Sys.time()
    print(edt-bgt)
} # end of cv loop
                                  

   

Loading required package: Matrix

Loading required package: foreach

Loaded glmnet 2.0-13




INFO: loading expression files...INFO: loading genotype files...

Reading: /ysm-gpfs/project/wl382/GTEx_v8/genotype/cis_loc/chr1/ENSG00000000938.12/ENSG00000000938.12.bim

Reading: /ysm-gpfs/project/wl382/GTEx_v8/genotype/cis_loc/chr1/ENSG00000000938.12/ENSG00000000938.12.fam

Reading: /ysm-gpfs/project/wl382/GTEx_v8/genotype/cis_loc/chr1/ENSG00000000938.12/ENSG00000000938.12.bed



[1] "INFO: CV preparation"
INFO: fold 1INFO: glmnet cv tissue5INFO: glmnet cv tissue10INFO: glmnet cv tissue15INFO: glmnet cv tissue20INFO: glmnet cv tissue25INFO: glmnet cv tissue30INFO: glmnet cv tissue35INFO: glmnet cv tissue40INFO: glmnet cv tissue45Training error: 1087.53335130054
Testing error: 1306.43982820191
Training error: 1.41513320960738e+33
Testing error: 5.8752219711302e+36
Training error: 1087.53335130054
Testing error: 1306.43982820191
Training error: 1164.86032445502
Testing error: 1289.23082764931
Training error: 1087.53335130054
Testing error: 1306.43982820191
Training error: 1164.86032445502
Testing error: 1289.23082764931
Training error: 1087.53335130054
Testing error: 1306.43982820191
Training error: 1164.86032445502
Testing error: 1289.23082764931
Training error: 1087.53335130054
Testing error: 1306.43982820191
Training error: 1164.86032445502
Testing error: 1289.23082764931
Training error: 1087.53335130054
Testing error: 1306.43982820191
Training error: 1164.860

In [3]:
save.image(file='/gpfs/loomis/project/zhao/zy92/GTEX/output/chr1_ENSG00000000938_env.RData')

In [5]:
getwd()

In [6]:
single_res_test[[1]]

0,1
-1.6327540,-0.7974449
1.3571760,-3.3052240
-0.6940936,0.2286623
-0.1427320,0.2286623
1.9456888,-0.2843913
4.2640251,-0.7974449
7.0567544,0.2286623
4.1287534,-0.2843913
-7.8604529,0.2286623
-1.6007280,0.2286623

0,1
-6.37907695,0.21778999
-1.66584680,0.21778999
0.95509100,-0.30739594
-1.10647471,0.23913805
1.69923015,-2.76964592
2.21115538,0.21778999
2.95726879,0.21778999
3.50819950,0.21778999
15.25603057,0.23913805
-2.64415666,0.21778999

0,1
-1.2114246,-1.08243614
3.5365579,4.54057771
-3.1040617,5.93438769
13.6307821,-1.52081288
0.5384715,-4.93061076
2.3635335,-7.1426743
-0.8743729,-3.00799051
2.5535317,-0.40001848
-2.1903009,0.90325824
-0.356053,7.52417152

0,1
-0.9841025,0
4.8577213,0
-3.6880785,0
-7.4559768,0
-2.3348258,0
-3.2406298,0
5.1872155,0
4.1931957,0
-2.2890839,0
10.6835457,0

0,1
6.5120268,-1.0581558
1.5347455,-1.1216306
6.6422287,0.2257501
2.548226,0.2257501
13.2812711,3.6088361
9.1747441,0.2284001
-5.4736861,0.2257501
7.7079293,-0.592428
13.1492903,-0.592428
-12.09869,-1.1216306

0,1
4.8055097,0.19301167
5.1167541,0.19301167
-9.8776818,0.15516365
0.4561196,0.15516365
10.9442295,0.13515589
-7.3658862,0.15516365
3.2339231,0.15516365
-3.6848971,0.15516365
8.3616792,0.19301167
-1.1803432,0.15516365

0,1
-2.16586072,0
-1.1033059,0
-0.20925626,0
1.96646059,0
1.24671797,0
3.11005263,0
1.7383974,0
-3.80590908,0
0.51847333,0
-1.0553668,0

0,1
-0.72528252,0
1.57566843,0
0.60376822,0
2.68400708,0
3.06313376,0
6.06554292,0
5.05123451,0
5.02269749,0
0.82343972,0
-0.10142317,0

0,1
1.7186442,0.080892415
0.226135,-0.369013709
0.2009644,-0.179545219
2.0249081,-0.13612066
-2.0071925,-1.784265803
4.9874508,-1.055010282
-2.7172731,-0.24997811
1.5263954,-1.029395052
1.0402884,0.03075393
1.6392426,0.939542305

0,1
14.7248383,-0.08975294
1.6218412,-0.08975294
7.7396504,2.43285454
0.4590533,-0.08975294
-6.387,-0.08975294
7.7734058,-0.08975294
-4.9629408,-0.08975294
2.7285389,-0.08975294
5.031235,-0.08975294
-1.5255034,-0.08975294

0,1
4.2923153,0
1.0912444,0
21.5285185,0
15.231325,0
-2.0453947,0
3.9402031,0
3.9858724,0
0.9533885,0
17.1472993,0
-7.3253403,0

0,1
-3.47650038,0.0003739317
-0.95528999,0.0003739317
0.18575091,0.0003739317
4.63227208,0.0003739317
9.26726371,0.0003739317
-10.59567284,0.0003739317
2.59350716,0.0003739317
1.33085796,0.0003739317
2.67834796,0.0003739317
2.6801108,-0.2822673731

0,1
-0.31522098,-0.17365587
0.58931465,0.05537089
2.58616359,1.81519285
5.82483858,0.07506558
-1.12879441,0.3417923
1.85134578,1.32656741
2.29356928,0.12337105
-2.44332832,0.2944202
2.20570981,-0.17620477
4.99359609,3.92292544

0,1
2.78904136,0
-1.93684524,0
1.74596421,0
-2.45575823,0
1.34993737,0
1.8168036,0
10.11952418,0
-8.2006072,0
7.05681279,0
-4.91728141,0

0,1
-6.43152916,0
1.77171101,0
-2.5786258,0
-2.0300617,0
-0.21679763,0
-0.64759016,0
-0.85896147,0
8.6440397,0
-2.24573841,0
6.65361938,0

0,1
-0.03513463,0
-1.32556926,0
1.77650091,0
2.0196681,0
-4.67049049,0
2.78403516,0
1.23327309,0
-1.57102605,0
-1.74332947,0
3.60164158,0

0,1
0.0635175792,-0.14093909
4.4535057886,-0.04071992
-0.0824565351,-0.08415693
0.769211577,-0.04071992
-1.8796927558,-0.04071992
2.1942204331,-0.04071992
-4.6779153396,0.38656713
-1.283560759,0.46199175
0.5460803536,-0.14093909
-1.9780343277,-0.11614454

0,1
-5.4557374,0
-7.4420647,0
2.2224333,0
0.7980998,0
4.7722215,0
-3.0023236,0
-0.1702846,0
-3.5192961,0
5.4420357,0
10.1836684,0

0,1
0.5581374,0.12194044
-4.72447588,0.02971529
-1.04067072,-0.06250986
2.41170709,-0.60084634
-3.3561437,0.02971529
3.14505597,0.12194044
-4.65434217,-0.06250986
-1.33379488,-0.06250986
-2.00384043,0.02971529
-0.93383322,-0.41639605

0,1
-6.87874247,-0.26231812
-0.01252884,-0.36698248
-11.51529409,-0.36785514
4.48922554,-0.54439886
-3.85497649,1.27515342
2.08532766,-2.76010602
1.09902173,-0.52978385
-5.41042073,-0.60737914
-0.27301156,-1.40785184
0.3568789,-2.18530728

0,1
0.254763095,0
-1.025357073,0
-3.541418732,0
2.913289463,0
-2.075657763,0
-0.790367945,0
2.769432044,0
2.556958048,0
0.169182339,0
-0.140078113,0

0,1
-6.505678,-0.6414704
1.540839,4.764965
-3.970471,1.3713167
1.618699,7.0729417
5.033559,-4.5934103
-5.12389,-1.8833666
-11.709083,-1.2212669
2.976307,-2.52

0,1
2.0802909,0.10843909
5.18749432,0.42413689
1.99227138,0.46567318
6.96567847,0.09060516
8.80631267,0.52006363
4.698582,0.27713829
-15.08196115,-0.24422269
9.94941012,-0.08473823
5.39463372,0.46567318
-4.73871979,0.44405845

0,1
-1.421144,0
0.7312004,0
-4.8743576,0
6.2373058,0
7.8796901,0
-5.7063089,0
-2.569557,0
-1.7444828,0
-1.1612242,0
-4.8461399,0

0,1
-2.5780732,0
8.5431313,0
0.238132,0
-8.1272827,0
-1.110188,0
-1.8869352,0
4.3921978,0
5.7414913,0
4.0542019,0
9.5815939,0

0,1
1.8501996,-0.01136847
-1.92472418,-0.22466555
1.10573852,0.02907035
-2.35872639,-0.01136847
-14.50290825,-0.01136847
-0.57974959,-0.01136847
-6.1509678,-0.01136847
1.5434056,0.06670573
3.40179276,-0.01136847
3.44067252,-0.01136847

0,1
0.68703636,-0.130004649
0.97790914,1.387618402
-0.06328854,0.02374317
0.65971778,-0.133909373
-0.08713383,-0.497470266
0.54145587,0.67574089
0.37147635,-0.19646033
4.02981918,-0.101358745
3.26433862,-0.19646033
2.08949355,0.423471337

0,1
-4.81318898,0.0147929
3.70471185,-0.7144099
1.96970215,0.0147929
-1.61695396,0.0147929
-3.65394907,0.0147929
-1.46133268,0.0147929
0.6120761,0.0147929
1.92753159,0.0147929
-2.0078241,0.0147929
-2.420736,0.0147929

0,1
3.3730665,0
0.5699968,0
9.0527917,0
-0.2371706,0
3.4335596,0
-4.2333048,0
-4.8195409,0
-1.2310249,0
2.153598,0
-0.4149284,0

0,1
-0.58560212,-0.33677615
-2.99662929,-0.81930496
0.60058199,0.679748
-5.62450825,-0.54726485
-3.92795221,1.00325463
10.23681807,0.4692593
8.5896257,-1.19529521
-4.37628976,-0.06473604
-1.77281354,0.19721919
2.85596418,-1.19529521

0,1
2.1951864,-0.14448808
5.6229068,-0.07477101
0.6843246,-0.08402064
-6.4452234,-0.03157528
-0.6075075,-0.0725288
1.1192983,-0.13414963
3.4472182,0.1381942
-0.6286987,-0.07477101
-0.9597539,-0.22978112
-0.3689482,-0.20748716

0,1
6.1295227,0
-3.7297613,0
8.1629079,0
-11.5533757,0
-6.2129636,0
-4.9445650,0
10.8117109,0
-0.5294193,0
-6.6974164,0
0.4547511,0

0,1
15.6236609,4.81257196
11.7112929,0.1251255
-4.1936239,-5.01853083
-3.7513296,-0.13256135
-2.0339492,-1.30741696
-10.7177971,-2.59420174
-11.4655497,4.06790606
8.6465497,1.66463888
-0.7363134,3.08078576
-3.1148991,2.74920631

0,1
0.69005071,0
-4.27992964,0
-2.09259340,0
0.30284704,0
-3.30231247,0
5.39123114,0
-0.69114311,0
11.84303839,0
0.50229293,0
4.15616874,0

0,1
0.3646861,0
6.2788110,0
-2.0438972,0
16.4450858,0
14.9513935,0
-1.7899397,0
-9.5505993,0
1.4862909,0
-1.3007714,0
5.4912416,0

0,1
-0.1961773,-3.007599
-13.4797755,1.8199451
-8.1655578,-1.096247
-5.3018045,-1.0247313
11.1135056,-0.8778164
20.7979289,-2.6018285
1.9346151,-3.928033
-8.6881572,-0.0883228
-12.3825881,-3.6683633
-3.0670615,8.351761

0,1
3.4885917,0
-0.36366437,0
4.14492594,0
-0.052302,0
2.23737748,0
1.65137694,0
-1.18282362,0
2.38967237,0
1.71491276,0
1.18812919,0

0,1
-2.8704704,0
13.1478314,0
1.2828835,0
4.3464241,0
3.9303003,0
-2.0753972,0
8.0728004,0
-10.5367498,0
-5.6878846,0
-5.9082112,0

0,1
1.6967632,1.66049456
-12.4123011,6.3563771
7.2726229,0.28290597
7.3823668,-0.36860972
23.5175492,-1.14002684
13.7693534,-0.06405626
11.1621525,-3.50256394
-7.7061591,-2.88033908
-5.2511969,0.01889819
-1.7918451,-1.51415189

0,1
-1.6793178,0.585855841
-3.7887731,0.092617269
0.8663900,0.382818746
-0.6247584,0.092617269
-11.5776940,0.266946033
9.6190363,0.443675627
-2.6101294,0.092617269
4.2552261,-0.833705441
-7.1641386,0.092617269
1.5164831,0.161040032

0,1
3.3554387,0
-4.0092208,0
-1.1526894,0
-3.8308022,0
8.4172049,0
-5.4677127,0
-0.3498589,0
10.2327887,0
4.7060869,0
-0.8727808,0

0,1
-2.6214338,0
2.3271322,0
-12.49474,0
1.8900975,0
-5.3718129,0
-11.0362304,0
8.3583818,0
7.6720571,0
-4.1911892,0
2.0238725,0

0,1
-0.87939282,-0.08614889
2.299626042,-0.08614889
1.817231237,-0.08614889
4.15952143,-0.08614889
3.85937272,-0.08614889
-3.52464837,-0.08614889
-2.634067798,-0.08614889
1.598244396,-0.08614889
3.492267615,-0.08614889
16.142740018,-0.08614889

0,1
-2.5579805,-0.5257164
5.6408505,0.44608443
-3.9138477,0.34452965
-6.2371492,-2.8646762
-1.5387632,0.23981042
7.5995874,-2.15255109
-3.6739206,0.56844105
3.1252266,-0.39305413
23.0515383,-3.35996466
0.72082,2.64518923

0,1
2.68068827,0.05939953
-2.7946217,0.05939953
-1.43464014,0.05939953
0.75510634,-0.82506623
-4.09192742,0.05939953
-0.41669286,0.05939953
4.1540835,0.05939953
1.92138531,0.05939953
0.76530402,0.05939953
3.08520661,0.05939953

0,1
4.66822459,0
-2.63605403,0
-9.04384864,0
-4.62670741,0
9.98736140,0
6.55224284,0
-17.12122131,0
-1.50698960,0
4.41315495,0
0.14205104,0

0,1
24.28588,-0.1059859
-7.935143,-0.1059859
-0.3203668,0.317211
-6.2952393,-0.1059859
27.0208065,0.731655
-7.051989,-0.1059859
2.825969,-0.1059859
47.7161787,-0.1059859
-1.3071618,-0.1059859
-14.2810405,-0.1059859

0,1
5.1851257,-0.1195768
-10.6039683,-0.1195768
-13.162571,-0.1195768
9.4318497,-0.1195768
10.2998595,-0.1195768
-14.0253336,-0.1195768
19.4021807,-0.1195768
12.8862105,-0.1195768
-3.8585713,-0.1195768
2.0812462,-0.1195768

0,1
0.8800748,0
-0.4016776,0
2.3107676,0
2.3432270,0
-0.3312332,0
7.9814260,0
-2.8822231,0
-3.6383308,0
2.3619125,0
3.1690818,0


In [5]:
class(single_res_test)

In [4]:
res_single = avg_perm(single_res_test)

“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the standard deviation is zero”
“the stand

In [None]:
#save(single_res_test, single_lam, single_theta_est, multi_res_test, multi_lam, multi_theta_est, res_tune, rec_lamv, file = paste0(output_dir, '/', gene_id, ".cv.evaluation.RData"))
save(single_res_test, single_lam, single_theta_est, multi_res_test, multi_lam, multi_theta_est, res_tune, file = paste0(output_dir, '/', gene_id, ".cv.evaluation.RData"))
                                  
	res_single = avg_perm(single_res_test)
	res_multi = avg_perm(multi_res_test)
	cat("Elastic net average testing error (all): ", apply(res_single, 2, mean), '\n')
	cat("glasso averge testing error (all): ", apply(res_multi, 2, mean), '\n')
	cat("Number of all zero tissues in elastic net is ", sum(is.na(res_single[,1])), '\n')
	cat("Number of all zero tissues in glasso is ", sum(is.na(res_multi[,1])), '\n')
	cat("Elastic net average testing error (non-zero): ", apply(res_single[!is.na(res_multi[,1]),], 2, mean), '\n')
	cat("glasso averge testing error (non-zero): ", apply(res_multi[!is.na(res_multi[,1]),], 2, mean), '\n')

  #------------ use tuning parameter chosen above to train model on entire dataset -------------#
	## generate an estimate with whole data ##
  cat('training a model on entire data with parameters chosen from cv\n')
	X_all = list()
	Y_all = list()
	for(t in 1:T_num){
		X_all_tmp = sub_id_map[[t]]
        X_all[[t]] = apply(as.matrix(dose_std[,X_all_tmp]),1,as.numeric)
		#X_all[[t]] = apply(as.matrix(dose[X_all_tmp,-c(1)]),2,as.numeric)
		Y_all[[t]] = Y[[t]][which(sub_id_map_exp[[t]] == T), 2]
	}
	# initial values 
	single_initial_est = matrix(0, ncol(X_train[[1]]), T_num)
	for(t in 1:T_num){
		tt = cv.glmnet(X_all[[t]], Y_all[[t]], alpha = 0.5, nfolds = 5)
		single_initial_est[,t] = tt$glmnet.fit$beta[,which.min(tt$cvm)]
	}

	sig_norm = apply(single_initial_est, 1, function(x){sqrt(sum(x^2))})
	sig_norm[sig_norm==0] = rep(min(sig_norm[sig_norm>0]), sum(sig_norm==0))/2
	sig_norm = sig_norm/sum(sig_norm)
	weights2 = 1/sig_norm; weights2 = weights2/sum(weights2);

	tis_norm = apply(single_initial_est, 2, function(x){sum(abs(x))})
	tis_norm[tis_norm==0] = rep(min(tis_norm[tis_norm>0]), sum(tis_norm==0))/2
	tis_norm = tis_norm/sum(tis_norm)
	weights1 = 1/tis_norm; weights1 = weights1/sum(weights1);

	spsz = unlist(lapply(X_all,nrow))
	initial_numeric = as.numeric(single_initial_est)
	#remove(single_initial_est)
	XY = grad_prep(X_all, Y_all)
	XX_all = lapply(X_all, function(x){t(x)%*%x/nrow(x)})
	tmp_res = rep(0, fold)
	for(f in 1:fold){
		ans = glasso_no_early_stopping(X=X_all, Y=Y_all, XX=XX_all, XY=XY, Xnorm=Xnorm, lambda1=multi_lam[f,1]/spsz, lambda2=multi_lam[f,2], theta=matrix(initial_numeric,M,P))#, verbose = if_verbose)
		tmp_res[f] = ans$avg_train_err
	}
	final.lam = multi_lam[which.min(tmp_res),]
	ans = glasso_no_early_stopping(X=X_all, Y=Y_all, XX=XX_all, XY=XY, Xnorm=Xnorm, lambda1=final.lam[1]/spsz, lambda2=final.lam[2], theta=matrix(initial_numeric,M,P))(#, verbose = if_verbose)
	info = read.table(info_path, header=T, sep='\t')
	downstream_est = data.frame(info[,1:3], ans$est)
	multi_all_res = multi_mse(ans$est, X_all, Y_all)
	single_all_res = multi_mse(single_initial_est, X_all, Y_all)
  cat('writing final estimates\n')
	write.table(downstream_est, paste0(output_dir, '/', gene_id, ".est"), quote = F, row.names = F, col.names = c("SNP", "REF.0.", "ALT.1.", Yt))

  cat('saving the prediction on all data for future analysis\n')
	save(multi_all_res, single_all_res, final.lam, ans, file = paste0(output_dir, '/', gene_id, ".prediction_on_all_data.RData"))
  cat('done!\n')
    