-
Notifications
You must be signed in to change notification settings - Fork 6
/
PECA_network_mm10.m
152 lines (146 loc) · 4.5 KB
/
PECA_network_mm10.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
fileID = fopen('openness2.bed');
C = textscan(fileID,'%s %f32 %f32');
fclose(fileID);
Element_name=C{1,1};
Opn=double(C{1,2});
Opn_median=double(C{1,3});
Opn_median(isnan(Opn_median))=1;
load('../../Data/MotifMatch_mouse_rmdup.mat')
load('TFTG_corr.mat')
TF_binding=mfbs(TFName,Element_name,motifName,motifWeight,Match2);
fileID = fopen('../../Input/toreplace.txt');
C = textscan(fileID,'%s %f32');
fclose(fileID);
Symbol=C{1,1};
G=C{1,2};
%%%%CR binding
load('../../Data/CRInfo_mouse.mat')
eita0=-30.4395;
eita1=0.8759;
[d f]=ismember(C_TFName,TFName);
C_TFName=C_TFName(d==1,:);
TFS=TFS(d==1,:);
CR_TF=CR_TF(:,d==1);
TFB=full(TF_binding(f(d==1),:));
C_TFExp=zeros(length(C_TFName),1);
[d f]=ismember(C_TFName,Symbol);
C_TFExp(d==1,:)=log2(1+G(f(d==1),:));
[d1 f1]=ismember(C_TFName,TFName);
TFBO=(repmat(C_TFExp,1,length(Opn)).*repmat(C_TFExp./TFS,1,length(Opn)).*TF_binding(d1==1,:).*repmat(Opn',length(C_TFName),1)).^0.25;
CRB=eita0+eita1*CR_TF*TFBO;
CRB_P=1-exp(CRB')./(1+exp(CRB'));
filename='CR_binding_pval.txt';
fid=fopen(filename,'wt');
for i=1:length(CRName)-1
fprintf(fid, '%s\t',CRName{i,1});
end
fprintf(fid, '%s\n',CRName{i+1,1});
fclose(fid);
dlmwrite('CR_binding_pval.txt',CRB_P,'-append','delimiter','\t')
%%%%%%%%%%%%
alhfa=0.5;
Opn_median=log2(1+Opn_median);
Opn1=log2(1+Opn);
Opn=double(Opn1.*(Opn1./(Opn_median+0.5)));
geneName=intersect(List,Symbol);
[d f]=ismember(geneName,List);
R2=R2(:,f);
Exp_median=Exp_median(f);
[d f]=ismember(geneName,Symbol);
G=G(f);
[d1 f1]=sort(G);
[d2 f2]=sort(Exp_median);
G1(f1,1)=d2;
G=(G1.^(alhfa)).*(G1./(Exp_median+0.5));
[d f]=ismember(TFName,geneName);
TFName=TFName(d==1,:);
TF_binding=TF_binding(d==1,:);
TFExp=G(f(d==1));
R2=R2(d==1,:);
%fileID = fopen('./Enrichment/knownResults_TFrank.txt');
%C = textscan(fileID,'%s %f32');
%fclose(fileID);
%[d f]=ismember(TFName,C{1,1});
%TF_motif=zeros(length(TFName),1);
%TF_motif(d==1)=C{1,2}(f(d==1));
%TFExp=TFExp.*(TF_motif);
fileID = fopen('peak_gene_100k_corr.bed');
C = textscan(fileID,'%s %s %f32 %f32');
fclose(fileID);
[d f]=ismember(C{1,1},Element_name);
[d1 f1]=ismember(C{1,2},geneName);
[f2 ia ic]=unique([f(d.*d1==1) f1(d.*d1==1)],'rows');
c3=accumarray(ic,C{1,3}(d.*d1==1),[],@min);
c4=accumarray(ic,C{1,4}(d.*d1==1),[],@min);
RETG_w=c4;
RETG_name=[Element_name(f2(:,1)) geneName(f2(:,2))];
c4(c4<0.2)=0;
d0=500000;
c=double(exp(-1*c3/d0).*c4);
H1=sparse(f2(:,2),f2(:,1),c,length(geneName),length(Element_name));
TFO=TF_binding.*repmat(Opn',size(TF_binding,1),1);
BOH=TFO*H1';
Score=(TFExp*G').*(2.^abs(R2)).*full(BOH);
Score(isnan(Score))=0;
dlmwrite('TFTG_regulationScore.txt',Score,'\t')
Score_norm=(2.^abs(R2)).*full(BOH);
Score_norm(isnan(Score_norm))=0;
dlmwrite('TFTG_regulationScore_norm.txt',Score_norm,'\t')
filename='TFName.txt';
fid=fopen(filename,'wt');
for i=1:size(TFName,1)
fprintf(fid, '%s\n',TFName{i,1});
end
fclose(fid);
filename='TGName.txt';
fid=fopen(filename,'wt');
for i=1:size(geneName)
fprintf(fid, '%s\n',geneName{i,1});
end
fclose(fid);
%%%%%%%%%
load('../../Data/TFTG_mouse_nagetriveControl.mat')
[d f]=ismember(Back_net(:,1),TFName);
[d1 f1]=ismember(Back_net(:,2),geneName);
f2=[f(d.*d1==1) f1(d.*d1==1)];
Back_score=Score((f2(:,2)-1)*size(Score,1)+f2(:,1));
Cut=prctile(Back_score,99);
[a b]=find((Score>Cut)==1);
c=find((Score>Cut)==1);
c1=full(Score(c));
Net=[TFName(a) geneName(b)];
d=ismember(RETG_name(:,2),unique(Net));
RETG_name=RETG_name(d,:);
RETG_w=RETG_w(d,:);
filename='toreplace_RE_TG.txt';
fid=fopen(filename,'wt');
fprintf(fid, '%s\t','RE');
fprintf(fid, '%s\t','TG');
fprintf(fid, '%s\n','Weight');
for i=1:size(RETG_w,1)
fprintf(fid, '%s\t',RETG_name{i,1});
fprintf(fid, '%s\t',RETG_name{i,2});
fprintf(fid, '%g\n',RETG_w(i,1));
end
fclose(fid);
[a1,a2]=sort(H1','descend');
a1=a1(1:10,:);
a2=a2(1:10,:);
TFTG_RE=arrayfun(@(i) strjoin(Element_name(a2((TFO(a(i),a2(:,b(i)))>0)'.*(a1(:,b(i))>0)==1,b(i)))',';'),[1:length(a)]','UniformOutput',false);
[d f]=sort(c1,'descend');
Net=[Net(f,:) num2cell(d) TFTG_RE(f)];
filename='toreplace_network.txt';
fid=fopen(filename,'wt');
fprintf(fid, '%s\t','TF');
fprintf(fid, '%s\t','TG');
fprintf(fid, '%s\t','Score');
fprintf(fid, '%s\t','FDR');
fprintf(fid, '%s\n','REs');
for i=1:size(Net,1)
fprintf(fid, '%s\t',Net{i,1});
fprintf(fid, '%s\t',Net{i,2});
fprintf(fid, '%g\t',Net{i,3});
fprintf(fid, '%g\t',(sum(Back_score>d(i))+1)/length(Back_score));
fprintf(fid, '%s\n',Net{i,4});
end
fclose(fid);