/
analysis_template.m
142 lines (105 loc) · 3.69 KB
/
analysis_template.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
% stop the execution temporarily for 1-10 minutes based on the job ID
% this is to avoid license issues when running many jobs in the same time
n_time = abs(mod(case_id, 10))*60;
pause(n_time);
% set up parallel computing environment
pc = parcluster('local');
parpool(pc, str2num(getenv('SLURM_CPUS_ON_NODE'))); %#ok<ST2NM>
% specify file prefix
file_prefix = strcat(gwas_name,'_',net_name,'_',cis_name);
% specify total number of autosome protein-coding genes
data.num_gene = ngene;
% specify GWAS sample size
data.num_sam = nsam;
% turn off RSS program progress bar
options.verbose = false;
% set the convergence tolerance of RSS program
options.tolerance = 1e-3;
% set the maximum wall time for RSS program (unit: seconds)
options.max_walltime = 12*3600;
% create a folder for output files if not exist
out_path = strcat(out_path,file_prefix,'/');
if exist(out_path,'dir') ~= 7
mkdir(out_path);
end
% add search paths
addpath(strcat(src_path,'rss/src_vb/')); % RSS-E codes
addpath(strcat(src_path,'rss-net/src/')); % RSS-NET codes
% specify single-SNP GWAS summary statistics, if they are not specified
if ~isfield(data,'sumstats_file')
data.sumstats_file = strcat(dat_path,gwas_name,'_sumstat.mat');
end
% specify network-related data files, if they are not specified
if ~isfield(data,'snp2net_file')
data.snp2net_file = strcat(dat_path,gwas_name,'_',net_name,'_snp2net.mat');
end
if ~isfield(data,'snp2gene_file')
data.snp2gene_file = strcat(dat_path,gwas_name,'_snp2gene.mat');
end
if ~isfield(data,'gene2gene_file')
data.gene2gene_file = strcat(dat_path,net_name,'_gene2gene.mat');
end
if ~isfield(data,'snp2gene_cis_file')
data.snp2gene_cis_file = strcat(dat_path,gwas_name,'_',cis_name,'_snp2gene_cis.mat');
end
% do not use gene expression in current RSS-NET analysis
data.expression_file = [];
% specify network-related parameters, if they are not specified
if ~isfield(options,'snp2gene_par')
options.snp2gene_par = 'dist_bin';
end
if ~isfield(options,'snp2gene_opt')
options.snp2gene_opt = 1e6;
end
if ~isfield(options,'gene2gene_par')
options.gene2gene_par = 'asis';
end
if ~isfield(options,'gene2gene_opt')
options.gene2gene_opt = [];
end
if ~isfield(options,'snp2gene_cis_par')
options.snp2gene_cis_par = 'asis';
end
if ~isfield(options,'snp2gene_cis_opt')
options.snp2gene_cis_opt = [];
end
% initialize RSS-NET with RSS-E baseline results, if provided
if exist(base_file,'file') == 2
rsse_base = matfile(base_file);
logw0_vec = rsse_base.logw;
alpha0_mat = rsse_base.alpha;
mu0_mat = rsse_base.mu;
data.num_snp = size(mu0_mat,1); % total # of genome-wide SNPs
[~,mi] = max(logw0_vec(:));
alpha0 = alpha0_mat(:,mi);
mu0 = mu0_mat(:,mi);
if (length(alpha0) ~= data.num_snp) || (length(mu0) ~= data.num_snp)
error('Inconsistent number of SNPs in baseline results ...');
end
options.alpha = alpha0;
options.mu = mu0;
clear base_file rsse_base alpha0* mu0* logw0 mi;
end
% specify hyper-parameters for RSS-NET, using `make_hpgrid.m`
param_data = make_hpgrid(hyper_data);
hyper.theta0 = param_data(case_id,1);
hyper.theta = param_data(case_id,2);
hyper.eta = param_data(case_id,3);
hyper.rho = param_data(case_id,4);
clear param_* hyper_*;
% run RSS-NET analysis
tic;
[logw,alpha,mu,s,param] = rss_net(data,hyper,options);
run_time = toc;
clear data hyper options;
% create name of output file for this analysis
out_file = strcat(out_path,file_prefix,'_out_',num2str(case_id),'.mat');
% save RSS-NET analysis output
theta0 = param(1,1);
theta = param(1,2);
sigb = param(1,3);
sige = param(1,4);
save(out_file,'logw','alpha','mu','s','run_time',...
'theta0','theta','sigb','sige');
% terminate existing parallel session
delete(gcp('nocreate'));