-
Notifications
You must be signed in to change notification settings - Fork 0
/
RVIS-uORF.pl
executable file
·79 lines (71 loc) · 2.09 KB
/
RVIS-uORF.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/perl
use strict;
use ProgramName;
$|=1;
my $name=ProgramName::get();
die "$name <MIN_COUNT> <homozygotes_only:0/1> <uORFs.txt>\n" unless @ARGV==3;
my ($MIN_COUNT,$HOMOZYGOTES_ONLY,$BROKEN)=@ARGV;
my $RVIS="/home/bmajoros/intolerance/RVIS/RVIS.txt";
#my $RVIS="/home/bmajoros/intolerance/RVIS/ncRVIS-parsed.txt";
#my $BROKEN="/home/bmajoros/1000G/assembly/broken.txt";
my $NAMES="/home/bmajoros/ensembl/gene-names/combined.txt";
my %toEnsembl;
open(IN,$NAMES) || die $NAMES;
while(<IN>) {
chomp; my @fields=split; next unless @fields>=2;
my ($ensembl,$id)=@fields;
$toEnsembl{$id}=$ensembl;
}
close(IN);
my %RVIS; # indexed by ensembl id
open(IN,$RVIS) || die $RVIS;
while(<IN>) {
chomp; my @fields=split; next unless @fields>=3;
my ($id,$rvis,$percentile)=@fields;
my $ensembl=$toEnsembl{$id};
next unless $ensembl;
#$RVIS{$ensembl}=$rvis;
$RVIS{$ensembl}=$percentile;
}
close(IN);
my %alleles;
open(IN,$BROKEN) || die $BROKEN;
while(<IN>) {
chomp; my @fields=split; next unless @fields>=11;
my ($indiv,$allele,$gene,$transcript,$uORFbegin,$uORFend,
$dORFbegin,$dORFend,$L,$status,$reason)=@fields;
#next if $chr eq "chrX" || $chr eq "chrY";
next unless $uORFend<$dORFbegin;
#next unless $uORFend<$dORFend;
if($gene=~/(\S+)\./) { $gene=$1 }
$alleles{$gene}->{$indiv}->{$allele}=1;
}
close(IN);
my %counts;
my @genes=keys %alleles;
foreach my $gene (@genes) {
my $alleles=$alleles{$gene};
my @indivs=keys %$alleles;
foreach my $indiv (@indivs) {
my $hash=$alleles->{$indiv};
my @keys=keys %$hash;
#if($HOMOZYGOTES_ONLY && @keys==2 || !$HOMOZYGOTES_ONLY && @keys>0) {
if($HOMOZYGOTES_ONLY && @keys==2 || !$HOMOZYGOTES_ONLY && @keys==1) {
my $rvis=$RVIS{$gene};
next unless defined $rvis;
++$counts{$gene};
}
}
}
#open(CARSON,">carson.txt");
my @genes=keys %counts;
foreach my $gene (@genes) {
my $count=$counts{$gene};
my $rvis=$RVIS{$gene};
die unless defined $rvis;
next unless $count>=$MIN_COUNT;
next unless $rvis=~/\d/;
print "$count\t$rvis\n";
#if($rvis<20) { print CARSON "$gene\t$count\t$rvis\n" }
}
#close(CARSON);