Skip to content

Commit

Permalink
mostly rewrite RandomSeq.pm, rename to FakeFasta.pm
Browse files Browse the repository at this point in the history
contains methods for creating large fasta files (containing specific
sequences of specific lengths) for testing.
  • Loading branch information
rbuels committed Feb 9, 2012
1 parent f01e351 commit ab3467f
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 73 deletions.
30 changes: 30 additions & 0 deletions tests/perl_tests/fake_fasta.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use strict;
use warnings;

use Test::More;
use File::Temp;

use lib 'tests/perl_tests/lib';

use_ok( 'FakeFasta' );
is( length FakeFasta->random_seq(10), 10 );

my $t = File::Temp->new; $t->close;
my $spec = [ { id => 'FooBar1', desc => ' a real fooish bar', length => 20 },
{ id => 'Sleepy_me', desc => ' real sleepy', length => 500 },
];
FakeFasta->fkfa_to_fasta(
out_file => "$t",
spec => $spec,
);
like( slurp($t),
qr/^>FooBar1 a real fooish bar\n[ACTGN]{20}\n>Sleepy_me real sleepy\n[ACTGN]{500}\n$/,
'got the right fasta',
);

is_deeply( FakeFasta->fasta_to_fkfa( $t ), $spec, 'got the right spec back out' );

done_testing;

###########
sub slurp { open my $f, '<', shift; local $/; <$f> }
112 changes: 112 additions & 0 deletions tests/perl_tests/lib/FakeFasta.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
=head1 NAME
FakeFasta - supporting module for making random sequences, and files full of them
=head1 METHODS
=cut

package FakeFasta;

use strict;
use warnings;

use Carp;
use JSON 2 ();

=head2 random_seq( $length )
Return a random string of A,C,T,G,N of the given length.
=cut

sub random_seq {
my ( $self, $length ) = @_;
my $rand = '0' x $length;
$rand =~ s/ . / [qw( A C T G N )]->[ int rand 5 ] /xge;
return $rand;
}

=head2 fasta_to_fkfa( $file )
Given a FASTA file, examine it and generate a fkfa (fake FASTA)
description for it, which can be used by fkfa_to_fasta to regenerate
the file, almost the same but with random sequence.
Returns a hashref specification of the fkfa, as:
[
{ id => 'FooSeq1', length => 1234, desc => 'blah blah' },
...
]
=cut

sub fasta_to_fkfa {
my ( $self, $file ) = @_;

my @spec;
open my $f, '<', $file or die "$! reading $file";
my $curr_entry;
local $_; #< unlike for, while does not automatically localize $_
while( <$f> ) {
if( /^\s*>\s*(\S+)(.+)/ ) {
push @spec, $curr_entry = { id => $1, desc => $2, length => 0 };
chomp $curr_entry->{desc};
}
else {
s/\s//g;
if( $curr_entry ) {
$curr_entry->{length} += length;
}
else { die 'parse error' }
}
}

return \@spec;
}

=head2 fkfa_to_fasta( %args )
Given a .fkfa (fake FASTA) description, expand it to a full FASTA
file. Returns a subroutine ref that, when called repeatedly, returns
chunks of the FASTA file output.
Example:
fkfa_to_fasta( spec => \@fkfa_spec, out_file => '/path/to/output.fasta' );
# OR
fkfa_to_fasta( in_file => 'path/to/file.fkfa' );
=cut

sub fkfa_to_fasta {
my ( $self, %args ) = @_;

# slurp and decode the in_file if present
if( $args{in_file} ) {
open my $f, '<', $args{in_file} or die "$! reading '$args{in_file}'";
local $/;
$args{spec} = JSON::from_json( scalar <$f> );
}

croak "must provide a spec argument" unless $args{spec};
croak "must provide an out_file argument" unless $args{out_file};

# now open our output file and make our sequences
open my $out_fh, '>', $args{out_file}
or confess "$! writing '$args{out_file}'";

for my $seq ( @{$args{spec}} ) {
$out_fh->print(
'>',
$seq->{id},
$seq->{desc} || '',
"\n",
$self->random_seq( $seq->{length} ),
"\n"
);
}
}

1;
55 changes: 0 additions & 55 deletions tests/perl_tests/lib/RandomSeq.pm

This file was deleted.

18 changes: 0 additions & 18 deletions tests/perl_tests/random_seq.t

This file was deleted.

0 comments on commit ab3467f

Please sign in to comment.