Skip to content

Commit

Permalink
Merge pull request #880 from Clinical-Genomics/feature/read_feature_file
Browse files Browse the repository at this point in the history
Feature/read feature file
  • Loading branch information
henrikstranneheim committed Jul 13, 2019
2 parents d0aaf52 + a33ba6b commit da0f402
Show file tree
Hide file tree
Showing 8 changed files with 697 additions and 348 deletions.
431 changes: 431 additions & 0 deletions lib/MIP/File/Format/Feature_file.pm

Large diffs are not rendered by default.

164 changes: 1 addition & 163 deletions lib/MIP/Vcfparser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ BEGIN {
our $VERSION = 1.00;

# Functions and variables which can be optionally exported
our @EXPORT_OK =
qw{ build_interval_tree define_select_data_headers parse_feature_file_header set_vcf_header_info };
our @EXPORT_OK = qw{ build_interval_tree define_select_data_headers };
}

sub build_interval_tree {
Expand Down Expand Up @@ -147,165 +146,4 @@ q{##INFO=<ID=No_hgnc_symbol,Number=.,Type=String,Description="Clinically relevan
return %select_data;
}

sub set_vcf_header_info {

## Function : Adds arbitrary INFO fields to hash based on supplied header key
## unless header key is already defined
## Returns :
## Arguments: $feature_file_key => Feature file key
## : $feature_file_path => Feature file path
## : $header_key => Header key from feature file
## : $meta_data_href => Hash to store meta_data in {REF}
## : $position => Column position in supplied range file

my ($arg_href) = @_;

## Flatten argument(s)
my $feature_file_key;
my $feature_file_path;
my $header_key;
my $meta_data_href;
my $position;

my $tmpl = {
feature_file_key => {
defined => 1,
required => 1,
store => \$feature_file_key,
strict_type => 1,
},
feature_file_path => {
defined => 1,
required => 1,
store => \$feature_file_path,
strict_type => 1,
},
header_key => {
defined => 1,
required => 1,
store => \$header_key,
strict_type => 1,
},
meta_data_href => {
default => {},
defined => 1,
required => 1,
store => \$meta_data_href,
strict_type => 1,
},
position => {
defined => 1,
required => 1,
store => \$position,
strict_type => 1,
},
};

check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};

## For not previously defined header keys in feature files definition
my $arbitrary_info_field =
q{##INFO=<ID=}
. $header_key
. q{,Number=.,Type=String,Description="String taken from }
. $feature_file_path . q{">};

## Add INFO from predefined entries
if ( defined $meta_data_href->{$feature_file_key}{$header_key} ) {

$meta_data_href->{present}{$header_key}{info} =
$meta_data_href->{$feature_file_key}{$header_key}{info};
}
else {
## Add arbitrary INFO field using feature file header key

$meta_data_href->{present}{$header_key}{info} = $arbitrary_info_field;
}

## Column position in supplied tsv feature file
$meta_data_href->{present}{$header_key}{column_order} =
$position;

return;
}

sub parse_feature_file_header {

## Function : Get feature file header
## Returns :
## Arguments: $feature_columns_ref => Feature columns to include {REF}
## : $feature_data_href => Feature file hash {REF}
## : $feature_file_key => Feature file key used to distinguish feature file(s) i.e., select or range
## : $feature_file_path => Feature file path
## : $header_line => Header line

my ($arg_href) = @_;

## Flatten argument(s)
my $feature_columns_ref;
my $feature_data_href;
my $feature_file_key;
my $feature_file_path;
my $header_line;

my $tmpl = {
feature_data_href => {
default => {},
defined => 1,
required => 1,
store => \$feature_data_href,
strict_type => 1,
},
feature_columns_ref => {
default => [],
defined => 1,
required => 1,
store => \$feature_columns_ref,
strict_type => 1,
},
feature_file_key => {
defined => 1,
required => 1,
store => \$feature_file_key,
strict_type => 1,
},
feature_file_path => {
defined => 1,
required => 1,
store => \$feature_file_path,
strict_type => 1,
},
header_line => {
defined => 1,
required => 1,
store => \$header_line,
strict_type => 1,
},
};

check( $tmpl, $arg_href, 1 ) or croak q{Could not parse arguments!};

use MIP::Vcfparser qw{ set_vcf_header_info };

## Split headers into array elements
my @headers = split $TAB, $header_line;

## Defines what headers to store from feature file
while ( my ( $feature_index, $feature_position ) = each @{$feature_columns_ref} ) {

## Alias
my $header_key = $headers[$feature_position];

set_vcf_header_info(
{
feature_file_key => $feature_file_key,
feature_file_path => $feature_file_path,
header_key => $header_key,
meta_data_href => $feature_data_href,
position => $feature_index,
}
);
}
return 1;
}
1;
117 changes: 117 additions & 0 deletions t/parse_feature_file_data.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/env perl

use 5.026;
use Carp;
use charnames qw{ :full :short };
use English qw{ -no_match_vars };
use File::Basename qw{ dirname };
use File::Spec::Functions qw{ catdir };
use FindBin qw{ $Bin };
use open qw{ :encoding(UTF-8) :std };
use Params::Check qw{ allow check last_error };
use Test::More;
use utf8;
use warnings qw{ FATAL utf8 };

## CPANM
use autodie qw { :all };
use Modern::Perl qw{ 2017 };
use Readonly;

## MIPs lib/
use lib catdir( dirname($Bin), q{lib} );
use MIP::Constants qw{ $COMMA $SPACE $TAB };
use MIP::Test::Fixtures qw{ test_standard_cli };

my $VERBOSE = 1;
our $VERSION = 1.01;

$VERBOSE = test_standard_cli(
{
verbose => $VERBOSE,
version => $VERSION,
}
);

BEGIN {

use MIP::Test::Fixtures qw{ test_import };

### Check all internal dependency modules and imports
## Modules with import
my %perl_module = (
q{MIP::File::Format::Feature_file} => [qw{ parse_feature_file_data }],
q{MIP::Vcfparser} => [qw{ define_select_data_headers }],
q{MIP::Test::Fixtures} => [qw{ test_standard_cli }],
);

test_import( { perl_module_href => \%perl_module, } );
}

use MIP::File::Format::Feature_file qw{ parse_feature_file_data };
use MIP::Vcfparser qw{ define_select_data_headers };

diag( q{Test parse_feature_file_data from Feature_file.pm v}
. $MIP::File::Format::Feature_file::VERSION
. $COMMA
. $SPACE . q{Perl}
. $SPACE
. $PERL_VERSION
. $SPACE
. $EXECUTABLE_NAME );

## Constants
Readonly my $GENE_START => 1234;
Readonly my $GENE_STOP => 1235;
Readonly my $HGNC_ID_NR => 3;
Readonly my $HGNC_SYMBOL_NR => 4;
Readonly my $MATCHING_COLUMN => 3;
Readonly my $MATCHING_COLUMN_WITH_SPACE => 4;

## Given feature data line
my @data_features = ( 1, $GENE_START, $GENE_STOP, $HGNC_ID_NR, q{a gene} );
my $data_line = join $TAB, @data_features;
my @feature_columns = ( $HGNC_ID_NR, $HGNC_SYMBOL_NR );
my %feature_data = define_select_data_headers();
my $feature_file_type = q{select_file};
my $padding = 1;
my $feature_matching_column = $MATCHING_COLUMN;
my %tree;

my $is_ok = parse_feature_file_data(
{
data_line => $data_line,
feature_columns_ref => \@feature_columns,
feature_data_href => \%feature_data,
feature_file_type => $feature_file_type,
feature_matching_column => $feature_matching_column,
padding => $padding,
tree_href => \%tree,
}
);
my %expected_feature_data = ( $HGNC_ID_NR => $HGNC_ID_NR );

## Then return true if parsed
ok( $is_ok, q{Parsed feature file data } );
is( $feature_data{$HGNC_ID_NR}, $expected_feature_data{$HGNC_ID_NR},
q{Set feature data} );

## Given a data with whitespace
$feature_matching_column = $MATCHING_COLUMN_WITH_SPACE;
parse_feature_file_data(
{
data_line => $data_line,
feature_columns_ref => \@feature_columns,
feature_data_href => \%feature_data,
feature_file_type => $feature_file_type,
feature_matching_column => $feature_matching_column,
padding => $padding,
tree_href => \%tree,
}
);
$expected_feature_data{a_gene} = q{a_gene};

## Then underscore should replace whitespace in feature data
is( $feature_data{a_gene}, $expected_feature_data{a_gene}, q{Set feature data} );

done_testing();
18 changes: 10 additions & 8 deletions t/parse_feature_file_header.t
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use MIP::Constants qw{ $COMMA $SPACE $TAB };
use MIP::Test::Fixtures qw{ test_standard_cli };

my $VERBOSE = 1;
our $VERSION = 1.00;
our $VERSION = 1.01;

$VERBOSE = test_standard_cli(
{
Expand All @@ -40,17 +40,19 @@ BEGIN {
### Check all internal dependency modules and imports
## Modules with import
my %perl_module = (
q{MIP::Vcfparser} => [qw{ define_select_data_headers parse_feature_file_header }],
q{MIP::Test::Fixtures} => [qw{ test_standard_cli }],
q{MIP::File::Format::Feature_file} => [qw{ parse_feature_file_header }],
q{MIP::Vcfparser} => [qw{ define_select_data_headers }],
q{MIP::Test::Fixtures} => [qw{ test_standard_cli }],
);

test_import( { perl_module_href => \%perl_module, } );
}

use MIP::Vcfparser qw{ define_select_data_headers parse_feature_file_header };
use MIP::File::Format::Feature_file qw{ parse_feature_file_header };
use MIP::Vcfparser qw{ define_select_data_headers };

diag( q{Test parse_feature_file_header from Vcfparser.pm v}
. $MIP::Vcfparser::VERSION
diag( q{Test parse_feature_file_header from Feature_file.pm v}
. $MIP::File::Format::Feature_file::VERSION
. $COMMA
. $SPACE . q{Perl}
. $SPACE
Expand All @@ -65,7 +67,7 @@ Readonly my $HGNC_SYMBOL_NR => 4;
## Given
my @feature_columns = ( $HGNC_ID_NR, $HGNC_SYMBOL_NR );
my %feature_data = define_select_data_headers();
my $feature_file_key = q{select_file};
my $feature_file_type = q{select_file};
my $feature_file_path = q{a_select_file_path};
my @headers = ( q{#chromosome}, qw{ gene_start gene_stop hgnc_id hgnc_symbol } );
my $header_line = join $TAB, @headers;
Expand All @@ -74,7 +76,7 @@ my $is_ok = parse_feature_file_header(
{
feature_columns_ref => \@feature_columns,
feature_data_href => \%feature_data,
feature_file_key => $feature_file_key,
feature_file_type => $feature_file_type,
feature_file_path => $feature_file_path,
header_line => $header_line,
}
Expand Down
Loading

0 comments on commit da0f402

Please sign in to comment.