/
get_ON_probes
executable file
·185 lines (168 loc) · 5.54 KB
/
get_ON_probes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env /vol/public-pseed/FIGdisk/bin/run_perl
use strict;
use Data::Dumper;
#use SAPserver;
#my $sapO = new SAPserver;
use SeedUtils;
use ExpressionDir;
my $usage = "usage: get_ON_probes ExprDir outProbesOnFile outPegsOnFile";
#my $usage = "usage: get_ON_probes Genome probe.occ.table peg.probe.table raw_data.tab > Probe-PEG-Func Table";
my($genome,$probe_occF,$probe_pegF,$raw_dataF, $expr_dir, $output_probesF, $output_pegsF);
(
($expr_dir = shift @ARGV) &&
($output_probesF = shift @ARGV) &&
($output_pegsF = shift @ARGV)
)
|| die $usage;
my $exprO = ExpressionDir->new($expr_dir);
$genome = $exprO->genome_id;
$probe_occF = $exprO->expr_dir . "/probe.occ.table";
$probe_pegF = $exprO->expr_dir . "/peg.probe.table";
$raw_dataF = $exprO->expr_dir . "/rma_normalized.tab";
my %known_pegs = map { ($_ =~ /^(fig\|\d+\.\d+\.[^\.]+\.\d+)/) ? ($1 => 1) : () } `cut -f1 $raw_dataF`;
#my $hashG = $sapO->all_features( -ids => [$genome], -type => 'peg' );
#my @pegs = grep { $known_pegs{$_} } @{$hashG->{$genome}};
#my $hashF = $sapO->ids_to_functions(-ids => \@pegs);
my @pegs = $exprO->all_features('peg');
my $hashF = $exprO->ids_to_functions(\@pegs);
my %ONfunc = map { chomp; $_ => 1 } <DATA>;
my %ON = map { $ONfunc{$hashF->{$_}} ? ($_ => $hashF->{$_}) : () } keys(%$hashF);
my $good_probes;
if (open(PO, "<", $probe_occF))
{
while (<PO>)
{
if ($_ =~ /^(\S+)\t0\t/)
{
$good_probes->{$1} = 1;
}
}
close(PO);
}
else
{
warn "Cannot open $probe_occF: $!. Treating all probes as good.";
}
my @keep;
my %pegs;
if (open(PP, "<", $probe_pegF))
{
while (<PP>)
{
if (($_ =~ /^(\S+)\t(\S+)/) && $ON{$1} && (!defined($good_probes) || $good_probes->{$2}))
{
push(@keep,"$2\t$1\t$ON{$1}");
$pegs{$1}++;
}
}
close(PP);
open(PFILE, ">", $output_probesF) or die "cannot write $output_probesF: $!";
foreach my $x (sort { my($a1,$a2) = ($a =~ /^(\d+)_(\d+)/);
my($b1,$b2) = ($b =~ /^(\d+)_(\d+)/);
($a1 <=> $b1) or ($a2 <=> $b2) } @keep)
{
print PFILE "$x\n";
}
close(PFILE);
}
else
{
warn "Cannot open $probe_pegF: $!; using all pegs";
$pegs{$_}++ for keys %ON;
}
open(PFILE, ">", $output_pegsF) or die "cannot write $output_pegsF: $!";
for my $peg (sort { SeedUtils::by_fig_id($a, $b) } keys %pegs)
{
if ($known_pegs{$peg})
{
print PFILE "$peg\n";
}
}
close(PFILE);
__DATA__
Alanyl-tRNA synthetase (EC 6.1.1.7)
Arginyl-tRNA synthetase (EC 6.1.1.19)
Asparaginyl-tRNA synthetase (EC 6.1.1.22)
Aspartyl-tRNA synthetase (EC 6.1.1.12)
Cysteinyl-tRNA synthetase (EC 6.1.1.16)
DNA-directed RNA polymerase alpha subunit (EC 2.7.7.6)
DNA-directed RNA polymerase beta subunit (EC 2.7.7.6)
DNA-directed RNA polymerase beta' subunit (EC 2.7.7.6)
DNA-directed RNA polymerase omega subunit (EC 2.7.7.6)
Glutaminyl-tRNA synthetase (EC 6.1.1.18)
Glutamyl-tRNA synthetase (EC 6.1.1.17)
Glycyl-tRNA synthetase alpha chain (EC 6.1.1.14)
Glycyl-tRNA synthetase beta chain (EC 6.1.1.14)
Histidyl-tRNA synthetase (EC 6.1.1.21)
Isoleucyl-tRNA synthetase (EC 6.1.1.5)
LSU ribosomal protein L10p (P0)
LSU ribosomal protein L11p (L12e)
LSU ribosomal protein L13p (L13Ae)
LSU ribosomal protein L14p (L23e)
LSU ribosomal protein L15p (L27Ae)
LSU ribosomal protein L16p (L10e)
LSU ribosomal protein L17p
LSU ribosomal protein L18p (L5e)
LSU ribosomal protein L19p
LSU ribosomal protein L1p (L10Ae)
LSU ribosomal protein L20p
LSU ribosomal protein L21p
LSU ribosomal protein L22p (L17e)
LSU ribosomal protein L23p (L23Ae)
LSU ribosomal protein L24p (L26e)
LSU ribosomal protein L25p
LSU ribosomal protein L27p
LSU ribosomal protein L28p
LSU ribosomal protein L29p (L35e)
LSU ribosomal protein L2p (L8e)
LSU ribosomal protein L30p (L7e)
LSU ribosomal protein L31p
LSU ribosomal protein L32p
LSU ribosomal protein L33p
LSU ribosomal protein L34p
LSU ribosomal protein L35p
LSU ribosomal protein L36p
LSU ribosomal protein L3p (L3e)
LSU ribosomal protein L4p (L1e)
LSU ribosomal protein L5p (L11e)
LSU ribosomal protein L6p (L9e)
LSU ribosomal protein L7/L12 (L23e)
LSU ribosomal protein L9p
Leucyl-tRNA synthetase (EC 6.1.1.4)
Lysyl-tRNA synthetase (class II) (EC 6.1.1.6)
Methionyl-tRNA synthetase (EC 6.1.1.10)
Phenylalanyl-tRNA synthetase alpha chain (EC 6.1.1.20)
Phenylalanyl-tRNA synthetase beta chain (EC 6.1.1.20)
Prolyl-tRNA synthetase (EC 6.1.1.15)
SSU ribosomal protein S10p (S20e)
SSU ribosomal protein S11p (S14e)
SSU ribosomal protein S12p (S23e)
SSU ribosomal protein S13p (S18e)
SSU ribosomal protein S14p (S29e)
SSU ribosomal protein S15p (S13e)
SSU ribosomal protein S16p
SSU ribosomal protein S17p (S11e)
SSU ribosomal protein S18p
SSU ribosomal protein S19p (S15e)
SSU ribosomal protein S1p
SSU ribosomal protein S20p
SSU ribosomal protein S21p
SSU ribosomal protein S2p (SAe)
SSU ribosomal protein S3p (S3e)
SSU ribosomal protein S4p (S9e)
SSU ribosomal protein S5p (S2e)
SSU ribosomal protein S6p
SSU ribosomal protein S7p (S5e)
SSU ribosomal protein S8p (S15Ae)
SSU ribosomal protein S9p (S16e)
Seryl-tRNA synthetase (EC 6.1.1.11)
Threonyl-tRNA synthetase (EC 6.1.1.3)
Tryptophanyl-tRNA synthetase (EC 6.1.1.2) ## proteobacterial type
Tyrosyl-tRNA synthetase (EC 6.1.1.1)
Valyl-tRNA synthetase (EC 6.1.1.9)
LSU ribosomal protein L36p @ LSU ribosomal protein L36p, zinc-independent
LSU ribosomal protein L32p @ LSU ribosomal protein L32p, zinc-independent
SSU ribosomal protein S4p (S9e) @ SSU ribosomal protein S4p (S9e), zinc-independent
LSU ribosomal protein L36p @ LSU ribosomal protein L36p, zinc-dependent
LSU ribosomal protein L28p @ LSU ribosomal protein L28p, zinc-independent
Tryptophanyl-tRNA synthetase (EC 6.1.1.2) ## cluster 1