-
Notifications
You must be signed in to change notification settings - Fork 199
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
mostly rewrite RandomSeq.pm, rename to FakeFasta.pm
contains methods for creating large fasta files (containing specific sequences of specific lengths) for testing.
- Loading branch information
Showing
4 changed files
with
142 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
use strict; | ||
use warnings; | ||
|
||
use Test::More; | ||
use File::Temp; | ||
|
||
use lib 'tests/perl_tests/lib'; | ||
|
||
use_ok( 'FakeFasta' ); | ||
is( length FakeFasta->random_seq(10), 10 ); | ||
|
||
my $t = File::Temp->new; $t->close; | ||
my $spec = [ { id => 'FooBar1', desc => ' a real fooish bar', length => 20 }, | ||
{ id => 'Sleepy_me', desc => ' real sleepy', length => 500 }, | ||
]; | ||
FakeFasta->fkfa_to_fasta( | ||
out_file => "$t", | ||
spec => $spec, | ||
); | ||
like( slurp($t), | ||
qr/^>FooBar1 a real fooish bar\n[ACTGN]{20}\n>Sleepy_me real sleepy\n[ACTGN]{500}\n$/, | ||
'got the right fasta', | ||
); | ||
|
||
is_deeply( FakeFasta->fasta_to_fkfa( $t ), $spec, 'got the right spec back out' ); | ||
|
||
done_testing; | ||
|
||
########### | ||
sub slurp { open my $f, '<', shift; local $/; <$f> } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
=head1 NAME | ||
FakeFasta - supporting module for making random sequences, and files full of them | ||
=head1 METHODS | ||
=cut | ||
|
||
package FakeFasta; | ||
|
||
use strict; | ||
use warnings; | ||
|
||
use Carp; | ||
use JSON 2 (); | ||
|
||
=head2 random_seq( $length ) | ||
Return a random string of A,C,T,G,N of the given length. | ||
=cut | ||
|
||
sub random_seq { | ||
my ( $self, $length ) = @_; | ||
my $rand = '0' x $length; | ||
$rand =~ s/ . / [qw( A C T G N )]->[ int rand 5 ] /xge; | ||
return $rand; | ||
} | ||
|
||
=head2 fasta_to_fkfa( $file ) | ||
Given a FASTA file, examine it and generate a fkfa (fake FASTA) | ||
description for it, which can be used by fkfa_to_fasta to regenerate | ||
the file, almost the same but with random sequence. | ||
Returns a hashref specification of the fkfa, as: | ||
[ | ||
{ id => 'FooSeq1', length => 1234, desc => 'blah blah' }, | ||
... | ||
] | ||
=cut | ||
|
||
sub fasta_to_fkfa { | ||
my ( $self, $file ) = @_; | ||
|
||
my @spec; | ||
open my $f, '<', $file or die "$! reading $file"; | ||
my $curr_entry; | ||
local $_; #< unlike for, while does not automatically localize $_ | ||
while( <$f> ) { | ||
if( /^\s*>\s*(\S+)(.+)/ ) { | ||
push @spec, $curr_entry = { id => $1, desc => $2, length => 0 }; | ||
chomp $curr_entry->{desc}; | ||
} | ||
else { | ||
s/\s//g; | ||
if( $curr_entry ) { | ||
$curr_entry->{length} += length; | ||
} | ||
else { die 'parse error' } | ||
} | ||
} | ||
|
||
return \@spec; | ||
} | ||
|
||
=head2 fkfa_to_fasta( %args ) | ||
Given a .fkfa (fake FASTA) description, expand it to a full FASTA | ||
file. Returns a subroutine ref that, when called repeatedly, returns | ||
chunks of the FASTA file output. | ||
Example: | ||
fkfa_to_fasta( spec => \@fkfa_spec, out_file => '/path/to/output.fasta' ); | ||
# OR | ||
fkfa_to_fasta( in_file => 'path/to/file.fkfa' ); | ||
=cut | ||
|
||
sub fkfa_to_fasta { | ||
my ( $self, %args ) = @_; | ||
|
||
# slurp and decode the in_file if present | ||
if( $args{in_file} ) { | ||
open my $f, '<', $args{in_file} or die "$! reading '$args{in_file}'"; | ||
local $/; | ||
$args{spec} = JSON::from_json( scalar <$f> ); | ||
} | ||
|
||
croak "must provide a spec argument" unless $args{spec}; | ||
croak "must provide an out_file argument" unless $args{out_file}; | ||
|
||
# now open our output file and make our sequences | ||
open my $out_fh, '>', $args{out_file} | ||
or confess "$! writing '$args{out_file}'"; | ||
|
||
for my $seq ( @{$args{spec}} ) { | ||
$out_fh->print( | ||
'>', | ||
$seq->{id}, | ||
$seq->{desc} || '', | ||
"\n", | ||
$self->random_seq( $seq->{length} ), | ||
"\n" | ||
); | ||
} | ||
} | ||
|
||
1; |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.