diff --git a/gdump b/gdump new file mode 100755 index 000000000..067e2f359 --- /dev/null +++ b/gdump @@ -0,0 +1,422 @@ +#!/usr/bin/perl + +use strict; +use lib '../../lib'; +use CGI::Carp qw/fatalsToBrowser/; +use CGI qw/:standard :html3 escape *table *TR *td *pre/; +use Bio::DB::GFF; +use Bio::Graphics::FeatureFile; +use constant CONFIG => '/home4/www/sumo/conf/gbrowse.conf/01.human.conf'; + +my $data = Bio::Graphics::FeatureFile->new(-file=>CONFIG) or die "data $!"; +my @aggregators = split /\s+/,$data->setting(general => 'aggregators'); +my $seqfactory = Bio::DB::GFF->new(-adaptor => 'dbi::mysqlopt', + -user => 'nobody', + -pass => '', + -dsn => 'dbi:mysql:hg12:host=sumo', + -aggregators => \@aggregators, + ); + +# Get info for the possible features +my %fmethod = (); +my @tracks = $data->configured_types; + +for my $track (@tracks) { + next unless $data->setting($track=>'feature') and + $data->setting($track=>'key'); + my @low_level_features = split /\s+/,$data->setting($track=>'feature'); + $fmethod{$data->setting($track=>'key')}=\@low_level_features;; +} +my @results; +if(param()){ + if(param('links') and !param('recurse')){ + + print header('text/html'), + '
',"\n";
+  } elsif(!param('recurse')) { print header('text/plain'); }
+
+  @results = dumper($seqfactory, \%fmethod,  param('recurse'));
+  exit unless param('recurse');
+}
+my $recurse = join "\n",@results;
+
+print header();
+print start_html(-style=>{-src=>'/style/default.css'});
+print h1("GDump");
+&print_HTML(\%fmethod);
+exit;
+
+########################### end of program    #################################
+
+sub print_HTML {
+my $fmethod = shift;
+
+# HTML form
+print start_multipart_form,
+  table({-width=>'100%',-cellspacing=>'1', -cellpadding=>'5'},
+        TR({-class=>'searchtitle'},
+	   th({-width=>'33%'},'1. Sequences to Search'),th({-width =>'33%'},'2. Features'),
+	   th({-width =>'33%'},'3. Options')),
+        TR({-class=>'searchbody'},
+	   td({-align=>'center'},
+	      table({-align =>'center'},
+		    TR(td({-align=>'LEFT'}, 
+			  em('Either'),'choose a pre-defined search:')),
+		    TR(td(scrolling_list(-name=>'prefab',
+					 -size=>5,
+					 -value=>["NONE",
+					       "all chromosomes",
+					       ],
+					 -default=> "NONE",
+					),)),
+		    TR(td(br,
+			  em('Or'),'type in a list of sequence or chromosome names:',
+			  br,'e.g.',' ',
+			  '"1',' ','X"')),
+		    TR(td(textarea(-name=>'list',
+				   -rows=>7,
+				   -cols=>21,
+				   -wrap=>'off',
+				   -value=>$recurse,
+				   -force=>1
+				  ),)),
+		    TR(td(
+			  br,em('Or'),
+			  'upload a file with sequence or chromosome names:')),
+		    TR(td(filefield(-size=>20,
+				    -name=>'upload'
+				   ),)),
+		   ) #end mini table
+	     ),
+	
+           td({-align=>'center'},
+	      scrolling_list(-name=>'feature',
+			     -size=>21,
+			     -multiple=>1,
+			     -default => ['Gene Models'],
+                             -values=>[sort (keys(%$fmethod))])
+	     ),
+
+           td(
+	      'Dump As:',
+	      radio_group(-name=>'dump',
+			  -values=>['FastA','Flatfile'],
+			  -default=>'FastA'),br,
+
+	      'Compare Features Using:',
+	      radio_group(-name=>'logic',
+			  -values=>['AND','OR','XOR','NOT'],
+			  -default=>'OR'),br,
+
+              'Coordinates Relative to:',
+	      radio_group(-name=>'relative',
+			  -values=>['Query','Chromosome'],
+			  -default=>'Query'),br,
+	
+              checkbox_group(-name=>'DNA',
+			     -values=>['Show DNA'],
+			     -default=>['Show DNA']),br
+	
+	      dd,textfield(-name=>'flank5',
+			   -size=>4,
+			   -maxlength=>4,
+			   -default=>0),'bp 5\' flank',
+
+	      dd,checkbox_group(-name=>'flanked',
+				-values=>['feature'],
+				-default=>['feature']),
+	
+	      dd,textfield(-name=>'flank3',
+			   -size=>4,-maxlength=>4,
+			   -default=>0),'bp 3\' flank',
+	      br,
+
+              checkbox_group(-name=>'links',
+			     -values=>['As HTML'],
+			     -default=>['As HTML']),br,
+              checkbox_group(-name=>'verbose',
+			     -values=>['Verbose'],
+			     -default=>['Verbose']),br,
+
+	      'Match regex: ',textfield(-name=>'grep'),br,
+	      checkbox_group(-name=>'recurse',
+			     -values=>['Paste']),
+	      'results back into Sequences to Search box'
+	      )
+        ),
+
+        TR({-class =>'searchtitle'},
+	   td(reset()),
+	   td({-align =>'CENTER',},submit("DUMP")),
+	   td(" ")
+	  ),
+
+       ),
+  endform;
+return (\%fmethod);
+}
+
+#******************************************************************************
+sub dumper {
+  my $seqfac  = shift;
+  my $fmethod = shift;
+  my $return  = shift;
+
+  my $match  = 0;
+  my $logic  = param('logic');
+  my @features = param('feature');
+
+  #hack for when 'AND' is selected, but there is only one feature selected
+  if(scalar(@features) == 1 && $logic eq 'AND'){param(-name=>'logic',-value=>'OR'); dumper($seqfactory); exit;}
+
+  my %features = map{$_=>1} @features;
+
+  #****************************************************************************
+  # Get items ......
+  my @items;
+  if (param('prefab') ne "NONE"){
+    my $items = parse_prefab(param('prefab')); @items = @$items;
+  } # => param prefab must be none
+
+  elsif (my $fh = param('upload')){   # Use uploaded file
+    while(<$fh>){
+      @items = split /\s+|\s*,\s*/s, $_;  # split on white sp, or comma
+    }
+    if (!@items){
+      print h2("Error: File must contain sequences that are space or comma delimited.");
+      exit;
+    }
+  } # => param prefab is none, no file uploaded, => check list
+
+  elsif (param('list')) {               # get info from list
+    @items = split /\s+/s, param('list');
+  }
+
+  if (!@items){
+    print h2("Error: no sequences selected! You must choose one of these:",
+	     br," "x64,"a) select a pre-defined search",br,
+	     " "x64,"b) type a list of sequences",br,
+	     " "x64,"c) upload a file");
+    exit;
+  }
+
+  #****************************************************************************
+  my @returns;
+  foreach my $item (@items){
+    # foreach chosen seq, get the sequence from Bio::DB::GFF
+    my $segment;
+    my @prev_returns = @returns;
+    if($item =~ /(\w+):(\d+),(\d+)/){
+      ($segment) = $seqfac->segment($1 , $2 => $3);
+    } else {
+      ($segment) = $seqfac->segment($item);
+    }
+    if (!defined($segment))
+      {print h4("Error:  No sequence found for \"$item\""); next;};
+
+    #**************************************************************************
+    if($logic eq 'OR' or $logic eq 'AND' or $logic eq 'XOR'){
+      foreach my $feature (@features){
+	my @get_features = @{$fmethod->{$feature}};
+	foreach my $get_feature (@get_features){
+	my $iterator = $segment->features(-type=>$get_feature,-iterator=>1);
+	while (my $i = $iterator->next_feature) {
+	  if($logic eq 'OR'){
+	    $match = (param('dump') eq 'FastA') ?
+	      asFasta($segment,$i,$return) : asTabbed($segment,$i,$return);
+	    push @returns, $match;
+	  } 
+	  elsif($logic eq 'AND' or $logic eq 'XOR') {
+	    my $fstring = join " ", $i->features();
+            my $show = 1;
+	    my $xor  = 0;
+#	    foreach(@get_features){
+	    foreach(@features){
+	      unless($fstring =~ m!$_!){$show = 0; $xor++;}
+	    }
+	    if($show and $logic eq 'AND'){
+	      $match = (param('dump') eq 'FastA') ? 
+		asFasta($segment,$i,$return) : asTabbed($segment,$i,$return);
+	      push @returns, $match;
+	    } 
+	    elsif( !$show and $logic eq 'XOR' and ($xor == $#get_features) ){
+	      $match = (param('dump') eq 'FastA') ?
+		asFasta($segment,$i,$return) : asTabbed($segment,$i,$return);
+	      push @returns, $match;
+	    }
+	  } # end elsif ($logiv AND or XOR
+	} # end while
+      } # end of get_features
+      } # end foreach $feature
+    } # end of if ($logic...)
+    elsif($logic eq 'NOT'){
+      my $iterator = $segment->features(-iterator=>1);
+      while (my $i = $iterator->next_feature) {
+	next if $features{$i->method};
+	$match = (param('dump') eq 'FastA') ? asFasta($segment,$i,$return)
+	  : asTabbed($segment,$i,$return);
+	push @returns, $match;
+      }
+    }
+    print h4("No data for $item") if @prev_returns ==@returns;
+  } # end of foreach $item
+
+  #****************************************************************************
+  # If there are no hits....
+  if (!@returns){
+    my $print_features = join ", ", @features;
+    if ($print_features){
+      print h2("Results: No data for any features selected:",$print_features);
+    }
+    else {print h2("Error: no features selected!!");}
+  }
+  return @returns if $return;
+}# end sub
+
+
+###############################################################################
+sub parse_prefab{
+  my $prefab = shift;
+  my @items;
+  if ($prefab eq 'all chromosomes'){
+    @items = qw(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y);
+  }
+  elsif ($prefab eq "all genes") {
+#    @items = $DB->fetch(-query=>'find predicted_gene');
+  }
+    else {             # for all confirmed genes and all genet. def. genes
+#      @items = $prefab eq "all confirmed genes"? $DB->fetch('Gene'=>'*')
+#	:$DB->fetch(-query=>'find Sequence Confirmed_by');
+    }
+  return \@items;
+} # end parse prefab
+
+
+###############################################################################
+sub asFasta {
+  my ($segment,$feature,$return) = @_;
+  $segment->absolute(1) if param('relative') eq 'Chromosome';
+  $feature->absolute(1) if ((param('relative') eq 'Chromosome') or $return);
+
+  my $grep    = param('grep') or undef;
+  my $dna     = param('DNA')
+                ? get_dna($segment,$feature)."\n"
+		: undef;
+  my $verbose = param('verbose')? $feature->type : undef;
+  my $query   = $segment->refseq.':'.$feature->start.','.$feature->stop;
+  my $label   = $feature;
+#  my $label   = param('links')  ? a({-href=>Object2URL($feature->group->name,$feature->group->class)},$feature)
+#                                : $feature;
+
+  my $header = join ' ',(">$query",$label,$verbose,"\n");
+  if($grep and param('links')){
+    markup(\$dna,$grep,'SPAN','match');
+  }
+
+  if($header.$dna =~ /($grep)/gs){
+    next unless $1 eq $grep;  #comes out as '/' sometimes... why?
+    if($return){
+      return $query;
+    }
+    #this step is SLOW
+    justify(\$dna,80);
+    print $header.$dna and return $feature;
+  }
+  return undef;
+}# end sub fasta
+
+###############################################################################
+sub asTabbed {
+  my ($segment,$feature,$return) = @_;
+  $segment->absolute(1) if param('relative') eq 'Chromosome';
+  $feature->absolute(1) if ((param('relative') eq 'Chromosome') or $return);
+
+  my $grep    = param('grep') or undef;
+  my $dna     = param('DNA')
+                ? get_dna($segment,$feature)
+                : undef;
+  my $verbose = param('verbose')? "\t" . $feature->type : undef;
+  my $query   = $segment->refseq.':'.$feature->start.','.$feature->stop;
+#  my $label   = param('links')  ? a({-href=>Object2URL($feature->group->name,$feature->group->class)},$feature) : $feature;
+  my $label   = $feature;
+
+  if($grep and param('links')){
+    markup(\$dna,$grep,'SPAN','match');
+  }
+  my $outstr = join "\t",($query,$label,$verbose,$dna),"\n";
+
+  if($outstr =~ /$grep/gs){
+    return $query if $return;
+    return undef if $return;
+    print $outstr and return $feature;
+  }
+} # end of sub asTabbed
+
+###############################################################################
+sub justify {
+  my $dna = shift;
+  my $col = shift;
+
+#print b($$dna);
+
+  my ($jdna,$count,$inside);
+  for my $i(0..length($$dna)){
+
+    $inside = 1 if(substr($$dna,$i,1) eq '<');
+    $inside = 0 if(substr($$dna,$i-1,1) eq '>');
+    $count++      unless $inside;
+
+    $jdna .= substr($$dna,$i,1);
+    $jdna .= "\n" unless ($count % $col or $inside);
+  }
+  $$dna = $jdna;
+} # end of sub justify
+
+###############################################################################
+sub markup {
+  my $tmpstr = '---chopped---';
+  my ($subject,$grep,$tag,$class) = @_;
+  my $c = $class ? " CLASS=\"$class\"" : undef;
+  my ($head,$tail) = ("<$tag$c>","");
+
+  return undef unless $$subject;
+  return undef unless $$subject =~ m!$grep!s;
+
+  #FIND THEM ALL !!!!!!!!!!
+  my @greppeds = ();
+  while($$subject =~ s!($grep)!$tmpstr!s){push @greppeds,$1;};
+
+  #extract tags;
+  my @postpends;
+  foreach my $grepped (@greppeds){
+    my $postpend = undef;
+    while($grepped =~ s!(<.+?>)!!s){my $t = $1; $postpend .= $t;}
+    push @postpends, $postpend;
+  }
+
+  foreach my $g (@greppeds){
+    my $p = shift @postpends;
+    $$subject =~ s!(.*?)$tmpstr!$1$head$g$tail$p!s;
+  }
+  return 1;
+}# end of sub markup
+
+###############################################################################
+sub get_dna {
+  my($segment,$feature) = @_;
+  my $flank5 = param('flank5') or 0;
+  my $flank3 = param('flank3') or 0;
+
+  $flank5 -- if $flank5 > 0;
+  $flank3 -- if $flank3 > 0;
+
+  my $dna5 =$segment->subseq(($feature->start - $flank5),
+			     $feature->start)->dna if $flank5;
+  $dna5 = ''.$dna5.'' if param('links') and $dna5;
+  my $dna3 = $segment->subseq($feature->stop, 
+			      ($feature->stop  + $flank3))->dna if $flank3;
+  $dna3 = ''.$dna3.'' if param('links') and $dna3;
+  my $dnaed   = param('flanked') ? $feature->dna : '-';
+  
+  return $dna5.$dnaed.$dna3;
+}# end of sub get dna