/
match.pl
112 lines (103 loc) · 3.09 KB
/
match.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
=cut header
running format:
perl matching.pl prediction_complexes known_complexes
function:
The function of this procedure is compute the the number of matching between prediction complexes and
known complexes.
date:
07,21,2010
=cut
use warnings;
use strict;
my $firstPara = shift; #prediction_complexes file
my $secondPara = shift; #known complexes file
my $outputfile=shift;
open(my $prediction, $firstPara)||die("Could not open prediction_complexes file\n$!");
open(my $output, ">outputfile.txt")||die("Could not open output_matching.txt\n$!");
print $output "PComplex\tPSize\tKComplex\tKSize\tOverlap\t\tOS\n";
my $count = 0; #the number of prediction complexes
while(<$prediction>){
chomp $_;
if($_ =~ m/Complex\s+(\d+)\s+(\d+)/){
$count++;
my $first = $1;
my $num1 = $2;
my @Pcomplex;
for(my $i=1; $i<=$num1; $i++){
my $line1 = <$prediction>;
chomp $line1;
push @Pcomplex, $line1;
}
my $Pcomplex = @Pcomplex;
open(my $known, $secondPara)||die("Could not open known complexes file\n$!");
while(<$known>){
chomp $_;
if($_ =~ m/Complex\s+(\d+)\s+(\d+)/){
my $second = $1;
my $num2 = $2;
my @Kcomplex;
for(my $j=1; $j<=$num2; $j++){
my $line2 = <$known>;
chomp $line2;
push @Kcomplex, $line2;
}
my $Kcomplex = @Kcomplex;
#compute the number of proteins between a prediction complex and a konwn complex which are same
my $number = 0;
for(my $m=0; $m<$num1; $m++){
for(my $n=0; $n<$num2; $n++){
if($Pcomplex[$m] eq $Kcomplex[$n]){
$number++;
}
}
}
#compute the matching value, formular_1: M=(i**2)/(a*b)
my $M1 = ($number**2)/($num1*$num2);
if($M1 >= 0.1){
print $output "Complex".$first."\t".$Pcomplex."\t"."Complex".$second."\t".$Kcomplex."\t".$number."\t".$M1."\n";
}
}
}
close($known);
}
}
close($output);
close($prediction);
my @result;
push @result, "\n\nStatistical Information:\n";
push @result, "OS\t\tPc\t\tMPc\t\tMKc\t\t\tSn\t\t\t\t\t\t\tSp\t\t\t\t\t\t\tF\n";
for(my $threshold=0.1; $threshold<=1.0; $threshold+=0.1){
my (@pc, @kc, @pcUniq, @kcUniq);
open($output, "outputfile.txt")||die("Could not open output_matching.txt\n$!");
while(<$output>){
chomp $_;
my @temp = split("\t", $_);
if($temp[5] ge $threshold){
push @pc, $temp[0];
my %hashOne = ();
@pcUniq = grep { ! $hashOne{$_} ++ } @pc;
push @kc, $temp[2];
my %hashTwo = ();
@kcUniq = grep { ! $hashTwo{$_} ++ } @kc;
}
}
my $numPc = @pcUniq;
my $numKc = @kcUniq;
close($output);
my $sn = $numPc/($numPc+(408-$numKc));
my $sp = $numPc/($numPc+($count-$numPc));
my $f;
if($sn+$sp != 0){
$f = 2*$sp*$sn/($sp+$sn);
}else{
$f=0;
}
push @result, "$threshold\t\t$count\t\t$numPc\t\t$numKc\t\t$sn\t\t$sp\t\t$f\n";
}
open($output, ">>$outputfile")||die("Could not open output_matching.txt\n$!");
print $output $firstPara."---------------------------\n";
foreach(@result){
print $output $_;
}
print $output "--------------------------------\n";
close($output);