diff --git a/gdump b/gdump new file mode 100755 index 000000000..067e2f359 --- /dev/null +++ b/gdump @@ -0,0 +1,422 @@ +#!/usr/bin/perl + +use strict; +use lib '../../lib'; +use CGI::Carp qw/fatalsToBrowser/; +use CGI qw/:standard :html3 escape *table *TR *td *pre/; +use Bio::DB::GFF; +use Bio::Graphics::FeatureFile; +use constant CONFIG => '/home4/www/sumo/conf/gbrowse.conf/01.human.conf'; + +my $data = Bio::Graphics::FeatureFile->new(-file=>CONFIG) or die "data $!"; +my @aggregators = split /\s+/,$data->setting(general => 'aggregators'); +my $seqfactory = Bio::DB::GFF->new(-adaptor => 'dbi::mysqlopt', + -user => 'nobody', + -pass => '', + -dsn => 'dbi:mysql:hg12:host=sumo', + -aggregators => \@aggregators, + ); + +# Get info for the possible features +my %fmethod = (); +my @tracks = $data->configured_types; + +for my $track (@tracks) { + next unless $data->setting($track=>'feature') and + $data->setting($track=>'key'); + my @low_level_features = split /\s+/,$data->setting($track=>'feature'); + $fmethod{$data->setting($track=>'key')}=\@low_level_features;; +} +my @results; +if(param()){ + if(param('links') and !param('recurse')){ + + print header('text/html'), + '
',"\n"; + } elsif(!param('recurse')) { print header('text/plain'); } + + @results = dumper($seqfactory, \%fmethod, param('recurse')); + exit unless param('recurse'); +} +my $recurse = join "\n",@results; + +print header(); +print start_html(-style=>{-src=>'/style/default.css'}); +print h1("GDump"); +&print_HTML(\%fmethod); +exit; + +########################### end of program ################################# + +sub print_HTML { +my $fmethod = shift; + +# HTML form +print start_multipart_form, + table({-width=>'100%',-cellspacing=>'1', -cellpadding=>'5'}, + TR({-class=>'searchtitle'}, + th({-width=>'33%'},'1. Sequences to Search'),th({-width =>'33%'},'2. Features'), + th({-width =>'33%'},'3. Options')), + TR({-class=>'searchbody'}, + td({-align=>'center'}, + table({-align =>'center'}, + TR(td({-align=>'LEFT'}, + em('Either'),'choose a pre-defined search:')), + TR(td(scrolling_list(-name=>'prefab', + -size=>5, + -value=>["NONE", + "all chromosomes", + ], + -default=> "NONE", + ),)), + TR(td(br, + em('Or'),'type in a list of sequence or chromosome names:', + br,'e.g.',' ', + '"1',' ','X"')), + TR(td(textarea(-name=>'list', + -rows=>7, + -cols=>21, + -wrap=>'off', + -value=>$recurse, + -force=>1 + ),)), + TR(td( + br,em('Or'), + 'upload a file with sequence or chromosome names:')), + TR(td(filefield(-size=>20, + -name=>'upload' + ),)), + ) #end mini table + ), + + td({-align=>'center'}, + scrolling_list(-name=>'feature', + -size=>21, + -multiple=>1, + -default => ['Gene Models'], + -values=>[sort (keys(%$fmethod))]) + ), + + td( + 'Dump As:', + radio_group(-name=>'dump', + -values=>['FastA','Flatfile'], + -default=>'FastA'),br, + + 'Compare Features Using:', + radio_group(-name=>'logic', + -values=>['AND','OR','XOR','NOT'], + -default=>'OR'),br, + + 'Coordinates Relative to:', + radio_group(-name=>'relative', + -values=>['Query','Chromosome'], + -default=>'Query'),br, + + checkbox_group(-name=>'DNA', + -values=>['Show DNA'], + -default=>['Show DNA']),br + + dd,textfield(-name=>'flank5', + -size=>4, + -maxlength=>4, + -default=>0),'bp 5\' flank', + + dd,checkbox_group(-name=>'flanked', + -values=>['feature'], + -default=>['feature']), + + dd,textfield(-name=>'flank3', + -size=>4,-maxlength=>4, + -default=>0),'bp 3\' flank', + br, + + checkbox_group(-name=>'links', + -values=>['As HTML'], + -default=>['As HTML']),br, + checkbox_group(-name=>'verbose', + -values=>['Verbose'], + -default=>['Verbose']),br, + + 'Match regex: ',textfield(-name=>'grep'),br, + checkbox_group(-name=>'recurse', + -values=>['Paste']), + 'results back into Sequences to Search box' + ) + ), + + TR({-class =>'searchtitle'}, + td(reset()), + td({-align =>'CENTER',},submit("DUMP")), + td(" ") + ), + + ), + endform; +return (\%fmethod); +} + +#****************************************************************************** +sub dumper { + my $seqfac = shift; + my $fmethod = shift; + my $return = shift; + + my $match = 0; + my $logic = param('logic'); + my @features = param('feature'); + + #hack for when 'AND' is selected, but there is only one feature selected + if(scalar(@features) == 1 && $logic eq 'AND'){param(-name=>'logic',-value=>'OR'); dumper($seqfactory); exit;} + + my %features = map{$_=>1} @features; + + #**************************************************************************** + # Get items ...... + my @items; + if (param('prefab') ne "NONE"){ + my $items = parse_prefab(param('prefab')); @items = @$items; + } # => param prefab must be none + + elsif (my $fh = param('upload')){ # Use uploaded file + while(<$fh>){ + @items = split /\s+|\s*,\s*/s, $_; # split on white sp, or comma + } + if (!@items){ + print h2("Error: File must contain sequences that are space or comma delimited."); + exit; + } + } # => param prefab is none, no file uploaded, => check list + + elsif (param('list')) { # get info from list + @items = split /\s+/s, param('list'); + } + + if (!@items){ + print h2("Error: no sequences selected! You must choose one of these:", + br," "x64,"a) select a pre-defined search",br, + " "x64,"b) type a list of sequences",br, + " "x64,"c) upload a file"); + exit; + } + + #**************************************************************************** + my @returns; + foreach my $item (@items){ + # foreach chosen seq, get the sequence from Bio::DB::GFF + my $segment; + my @prev_returns = @returns; + if($item =~ /(\w+):(\d+),(\d+)/){ + ($segment) = $seqfac->segment($1 , $2 => $3); + } else { + ($segment) = $seqfac->segment($item); + } + if (!defined($segment)) + {print h4("Error: No sequence found for \"$item\""); next;}; + + #************************************************************************** + if($logic eq 'OR' or $logic eq 'AND' or $logic eq 'XOR'){ + foreach my $feature (@features){ + my @get_features = @{$fmethod->{$feature}}; + foreach my $get_feature (@get_features){ + my $iterator = $segment->features(-type=>$get_feature,-iterator=>1); + while (my $i = $iterator->next_feature) { + if($logic eq 'OR'){ + $match = (param('dump') eq 'FastA') ? + asFasta($segment,$i,$return) : asTabbed($segment,$i,$return); + push @returns, $match; + } + elsif($logic eq 'AND' or $logic eq 'XOR') { + my $fstring = join " ", $i->features(); + my $show = 1; + my $xor = 0; +# foreach(@get_features){ + foreach(@features){ + unless($fstring =~ m!$_!){$show = 0; $xor++;} + } + if($show and $logic eq 'AND'){ + $match = (param('dump') eq 'FastA') ? + asFasta($segment,$i,$return) : asTabbed($segment,$i,$return); + push @returns, $match; + } + elsif( !$show and $logic eq 'XOR' and ($xor == $#get_features) ){ + $match = (param('dump') eq 'FastA') ? + asFasta($segment,$i,$return) : asTabbed($segment,$i,$return); + push @returns, $match; + } + } # end elsif ($logiv AND or XOR + } # end while + } # end of get_features + } # end foreach $feature + } # end of if ($logic...) + elsif($logic eq 'NOT'){ + my $iterator = $segment->features(-iterator=>1); + while (my $i = $iterator->next_feature) { + next if $features{$i->method}; + $match = (param('dump') eq 'FastA') ? asFasta($segment,$i,$return) + : asTabbed($segment,$i,$return); + push @returns, $match; + } + } + print h4("No data for $item") if @prev_returns ==@returns; + } # end of foreach $item + + #**************************************************************************** + # If there are no hits.... + if (!@returns){ + my $print_features = join ", ", @features; + if ($print_features){ + print h2("Results: No data for any features selected:",$print_features); + } + else {print h2("Error: no features selected!!");} + } + return @returns if $return; +}# end sub + + +############################################################################### +sub parse_prefab{ + my $prefab = shift; + my @items; + if ($prefab eq 'all chromosomes'){ + @items = qw(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y); + } + elsif ($prefab eq "all genes") { +# @items = $DB->fetch(-query=>'find predicted_gene'); + } + else { # for all confirmed genes and all genet. def. genes +# @items = $prefab eq "all confirmed genes"? $DB->fetch('Gene'=>'*') +# :$DB->fetch(-query=>'find Sequence Confirmed_by'); + } + return \@items; +} # end parse prefab + + +############################################################################### +sub asFasta { + my ($segment,$feature,$return) = @_; + $segment->absolute(1) if param('relative') eq 'Chromosome'; + $feature->absolute(1) if ((param('relative') eq 'Chromosome') or $return); + + my $grep = param('grep') or undef; + my $dna = param('DNA') + ? get_dna($segment,$feature)."\n" + : undef; + my $verbose = param('verbose')? $feature->type : undef; + my $query = $segment->refseq.':'.$feature->start.','.$feature->stop; + my $label = $feature; +# my $label = param('links') ? a({-href=>Object2URL($feature->group->name,$feature->group->class)},$feature) +# : $feature; + + my $header = join ' ',(">$query",$label,$verbose,"\n"); + if($grep and param('links')){ + markup(\$dna,$grep,'SPAN','match'); + } + + if($header.$dna =~ /($grep)/gs){ + next unless $1 eq $grep; #comes out as '/' sometimes... why? + if($return){ + return $query; + } + #this step is SLOW + justify(\$dna,80); + print $header.$dna and return $feature; + } + return undef; +}# end sub fasta + +############################################################################### +sub asTabbed { + my ($segment,$feature,$return) = @_; + $segment->absolute(1) if param('relative') eq 'Chromosome'; + $feature->absolute(1) if ((param('relative') eq 'Chromosome') or $return); + + my $grep = param('grep') or undef; + my $dna = param('DNA') + ? get_dna($segment,$feature) + : undef; + my $verbose = param('verbose')? "\t" . $feature->type : undef; + my $query = $segment->refseq.':'.$feature->start.','.$feature->stop; +# my $label = param('links') ? a({-href=>Object2URL($feature->group->name,$feature->group->class)},$feature) : $feature; + my $label = $feature; + + if($grep and param('links')){ + markup(\$dna,$grep,'SPAN','match'); + } + my $outstr = join "\t",($query,$label,$verbose,$dna),"\n"; + + if($outstr =~ /$grep/gs){ + return $query if $return; + return undef if $return; + print $outstr and return $feature; + } +} # end of sub asTabbed + +############################################################################### +sub justify { + my $dna = shift; + my $col = shift; + +#print b($$dna); + + my ($jdna,$count,$inside); + for my $i(0..length($$dna)){ + + $inside = 1 if(substr($$dna,$i,1) eq '<'); + $inside = 0 if(substr($$dna,$i-1,1) eq '>'); + $count++ unless $inside; + + $jdna .= substr($$dna,$i,1); + $jdna .= "\n" unless ($count % $col or $inside); + } + $$dna = $jdna; +} # end of sub justify + +############################################################################### +sub markup { + my $tmpstr = '---chopped---'; + my ($subject,$grep,$tag,$class) = @_; + my $c = $class ? " CLASS=\"$class\"" : undef; + my ($head,$tail) = ("<$tag$c>","$tag>"); + + return undef unless $$subject; + return undef unless $$subject =~ m!$grep!s; + + #FIND THEM ALL !!!!!!!!!! + my @greppeds = (); + while($$subject =~ s!($grep)!$tmpstr!s){push @greppeds,$1;}; + + #extract tags; + my @postpends; + foreach my $grepped (@greppeds){ + my $postpend = undef; + while($grepped =~ s!(<.+?>)!!s){my $t = $1; $postpend .= $t;} + push @postpends, $postpend; + } + + foreach my $g (@greppeds){ + my $p = shift @postpends; + $$subject =~ s!(.*?)$tmpstr!$1$head$g$tail$p!s; + } + return 1; +}# end of sub markup + +############################################################################### +sub get_dna { + my($segment,$feature) = @_; + my $flank5 = param('flank5') or 0; + my $flank3 = param('flank3') or 0; + + $flank5 -- if $flank5 > 0; + $flank3 -- if $flank3 > 0; + + my $dna5 =$segment->subseq(($feature->start - $flank5), + $feature->start)->dna if $flank5; + $dna5 = ''.$dna5.'' if param('links') and $dna5; + my $dna3 = $segment->subseq($feature->stop, + ($feature->stop + $flank3))->dna if $flank3; + $dna3 = ''.$dna3.'' if param('links') and $dna3; + my $dnaed = param('flanked') ? $feature->dna : '-'; + + return $dna5.$dnaed.$dna3; +}# end of sub get dna