Skip to content

Commit

Permalink
parse_result.pl code move to DatacheckTapToJson module
Browse files Browse the repository at this point in the history
  • Loading branch information
vinay-ebi committed Jun 14, 2020
1 parent 494caee commit 7915339
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 86 deletions.
63 changes: 48 additions & 15 deletions lib/Bio/EnsEMBL/DataCheck/Pipeline/DataCheckTapToJson.pm
Original file line number Diff line number Diff line change
Expand Up @@ -34,40 +34,54 @@ use TAP::Parser;


use base ('Bio::EnsEMBL::Hive::Process');

sub run {

my $self = shift;

my $output_dir = $self->param('output_dir');
my @tap_files = map { $_->stringify } path($output_dir)->children;

my $to_json = 1;

parse_datachecks($output_dir, $output_dir, 1, 1, $to_json);
}

sub parse_datachecks {

my ($tap, $output_file, $by_species, $passed, $to_json) = @_;

my @tap_files;
if (-d $tap) {
@tap_files = map { $_->stringify } path($tap)->children;
} else {
push @tap_files, $tap;
}

my %results;
my $datacheck;
my $species;
my $test;
my %tests;
my $passed = 1;
my $by_species = 1;

foreach my $tap_file (@tap_files) {
my $tap = path($tap_file)->slurp;
my $parser = TAP::Parser->new( { tap => $tap } );

while (my $result = $parser->next) {
if ($result->is_comment) {
if ($result->as_string =~ /^# Subtest: (.+)/) {
$datacheck = $1;
}
if ($result->as_string =~ /^# Subtest: (.+)/) {
$datacheck = $1;
}
} elsif ($result->is_unknown) {
if ($result->as_string =~ /^\s+# Subtest: (.+)/) {
if ($result->as_string =~ /^\s+# Subtest: (.+)/) {
$species = $1;
%tests = ();
} elsif ($result->as_string =~ /^\s{8}((?:not ok|# No tests run).*)/) {
} elsif ($result->as_string =~ /^\s{8}((?:not ok|# No tests run).*)/) {
$test = $1;
$tests{$test} = [];
} elsif ($result->as_string =~ /^\s{8}((?:ok|.* # SKIP).*)/ && $passed) {
} elsif ($result->as_string =~ /^\s{8}((?:ok|.* # SKIP).*)/ && $passed) {
$test = $1;
$tests{$test} = [];
} elsif ($result->as_string =~ /^\s{8}#\s(\s*.*)/) {
} elsif ($result->as_string =~ /^\s{8}#\s(\s*.*)/) {
if (defined $test) {
push @{$tests{$test}}, $1;
} else {
Expand All @@ -93,12 +107,31 @@ sub run {
}

my $json = JSON->new->canonical->pretty->encode(\%results);
my $basename = path($output_dir)->basename;
my $output_file = $output_dir . '/' . $basename . '.json';
path($output_file)->parent->mkpath;
path($output_file)->spew($json)

if ($output_file) {
if($to_json){
my $basename = path($output_file)->basename;
$output_file = $output_file . '/' . $basename . '.json';
}
path($output_file)->parent->mkpath;
path($output_file)->spew($json)

} else {
print($json);
}











}


1;

6 changes: 5 additions & 1 deletion lib/Bio/EnsEMBL/DataCheck/Pipeline/DbDataChecks_conf.pm
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ sub default_options {
email => undef,
report_per_db => 0,
report_all => 0,

};
}

Expand Down Expand Up @@ -301,14 +302,17 @@ sub pipeline_analyses {
-max_retry_count => 0,
-rc_name => 'default',
-flow_into => {
'1' => ['?table_name=result', 'DataCheckTapToJson'],
'1' => ['?table_name=result', 'DataCheckTapToJson']
},
},
{
-logic_name => 'DataCheckTapToJson',
-module => 'Bio::EnsEMBL::DataCheck::Pipeline::DataCheckTapToJson',
-analysis_capacity => 10,
-max_retry_count => 0,
-parameters => {
output_dir => $self->o('output_dir'),
},
-rc_name => 'default',
},

Expand Down
77 changes: 7 additions & 70 deletions scripts/parse_results.pl
Original file line number Diff line number Diff line change
Expand Up @@ -60,90 +60,27 @@ =head1 OPTIONS
use Path::Tiny;
use Pod::Usage;
use TAP::Parser;
use Bio::EnsEMBL::DataCheck::Pipeline::DataCheckTapToJson;

my ($help, $tap, $output_file, $by_species, $passed);
my ($help, $tap);
my $output_file = '';
my $by_species = 0;
my $passed = 0;

GetOptions(
"help!", \$help,
"tap:s", \$tap,
"output_file:s", \$output_file,
"by_species!", \$by_species,
"passed!", \$passed,
"passed!", \$passed
);

pod2usage(1) if $help;

if (! defined $tap) {
die "Need a source of TAP data";
} elsif (! -e $tap) {
die "TAP source does not exist: $tap";
}

my @tap_files;
if (-d $tap) {
@tap_files = map { $_->stringify } path($tap)->children;
} else {
push @tap_files, $tap;
}

my %results;
my $datacheck;
my $species;
my $test;
my %tests;

foreach my $tap_file (@tap_files) {
my $tap = path($tap_file)->slurp;
my $parser = TAP::Parser->new( { tap => $tap } );

# to-do: extract test number, use that as key with test message and diag messages as lists.
while (my $result = $parser->next) {
if ($result->is_comment) {
# Unindented 'Subtest' comment is the name of the datacheck,
# indented 'Subtest's are the species/database.
if ($result->as_string =~ /^# Subtest: (.+)/) {
$datacheck = $1;
}
} elsif ($result->is_unknown) {
if ($result->as_string =~ /^\s+# Subtest: (.+)/) {
$species = $1;
%tests = ();
} elsif ($result->as_string =~ /^\s{8}((?:not ok|# No tests run).*)/) {
$test = $1;
$tests{$test} = [];
} elsif ($result->as_string =~ /^\s{8}((?:ok|.* # SKIP).*)/ && $passed) {
$test = $1;
$tests{$test} = [];
} elsif ($result->as_string =~ /^\s{8}#\s(\s*.*)/) {
if (defined $test) {
push @{$tests{$test}}, $1;
} else {
warn "Premature diagnostication: diagnostics incomplete ".
"for $species because they cannot be linked to a test";
}
} elsif ($result->as_string =~ /^\s{4}((?:ok|not ok))/) {
my $ok = $1 eq 'ok' ? 1 : 0;
if (!$ok || $passed) {
my %datacheck_tests = %tests;
if ($by_species) {
$results{$species}{$datacheck}{'ok'} = $ok;
$results{$species}{$datacheck}{'tests'} = \%datacheck_tests;
} else {
$results{$datacheck}{$species}{'ok'} = $ok;
$results{$datacheck}{$species}{'tests'} = \%datacheck_tests;
}
}
$test = undef;
}
}
}
}

my $json = JSON->new->canonical->pretty->encode(\%results);
Bio::EnsEMBL::DataCheck::Pipeline::DataCheckTapToJson::parse_datachecks($tap, $output_file, $by_species, $passed, 0);

if ($output_file) {
path($output_file)->parent->mkpath;
path($output_file)->spew($json)
} else {
say $json;
}

0 comments on commit 7915339

Please sign in to comment.