Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
  • 15 commits
  • 15 files changed
  • 0 commit comments
  • 4 contributors
Commits on Jun 11, 2015
@brianjohnhaas brianjohnhaas update 92d87d0
@brianjohnhaas brianjohnhaas ok
Merge branch 'master' of https://github.com/PASApipeline/pasapipeline

an updated upstream into a topic branch.
74418d3
Commits on Jun 13, 2015
@brianjohnhaas brianjohnhaas the Error, have 3prime partial protein when 3prime partials werent al…
…lowed! message was due to annotations or transcript structures being imported with coordinates that extend beyond the length of the scaffold
7fe6b78
@brianjohnhaas brianjohnhaas ok 3c8e88a
Commits on Aug 10, 2015
@mimarsh2 mimarsh2 Allow mysql connection through socket
The Perl DBI library allows for connections to the mysql server though a specific socket file by using the mysql_socket=<pathToFile> host name.  When calling the mysql binaries directly, this socket file needs to be passed using the -S argument rather than the -h for the hostname.
7d5dc8a
@mimarsh2 mimarsh2 Socket usage documentation
Add documentation to the configuration template file about using the mysql_socket option
11dff71
Commits on Aug 11, 2015
@brianjohnhaas brianjohnhaas Merge pull request #12 from mimarsh2/mysqlSocket
Mysql socket
7d6e2e6
Commits on Dec 07, 2015
@brianjohnhaas brianjohnhaas update to recognize latest Trinity accession values 69927de
Commits on Jan 20, 2016
@corburn corburn docs: fix Launch_PASA_pipeline.pl -x flag description a30894d
Commits on Oct 17, 2016
@brianjohnhaas brianjohnhaas Merge pull request #15 from corburn/Launch_PASA_pipeline.pl
docs: fix Launch_PASA_pipeline.pl -x flag description
ea25011
Commits on Dec 27, 2016
@yuragal yuragal Compatibility with GMAP versions 2016-05+
In gmap version 2016-05 new arguments [were introduced] (http://research-pub.gene.com/gmap/archive.html)
>Added separate flags in GMAP for controlling intron length of end introns separately from middle introns. Flag names are now --max-intronlength-middle (previously --intronlength) and --max-intronlength-ends

This patch is to evaluate gmap version and to prepare arguments of gmap call accordingly.
440ec0e
Commits on Dec 28, 2016
@brianjohnhaas brianjohnhaas Merge pull request #29 from yuragal/patch-1
Compatibility with GMAP versions 2016-05+
6f6f2e9
Commits on Feb 15, 2017
@brianjohnhaas brianjohnhaas compatible w/ latest mysql 2bf11be
Commits on Apr 21, 2017
@brianjohnhaas brianjohnhaas activate debug option 752ce7f
Commits on Jul 11, 2017
@brianjohnhaas brianjohnhaas more useful error messaging and troubleshooting options 8d31504
@@ -27,9 +27,13 @@ sub create_gene_obj {
## exons_ref should be end5's keyed to end3's for all exons.
my ($gene_struct_mod, $cdna_seq) = &get_cdna_seq ($exons_href, $sequence_ref);
+
+
my $cdna_seq_length = length $cdna_seq;
my $long_orf_obj = new Longest_orf();
+ #print STDERR "CDNA_SEQ: [$cdna_seq], length: $cdna_seq_length\n";
+
# establish long orf finding parameters.
$long_orf_obj->forward_strand_only();
if ($partial_info_href->{"5prime"}) {
@@ -43,24 +47,28 @@ sub create_gene_obj {
$long_orf_obj->get_longest_orf($cdna_seq);
my ($end5, $end3) = $long_orf_obj->get_end5_end3();
- print "CDS: $end5, $end3\n" if $SEE;
+ #print STDERR "*** CDS: $end5, $end3\n";# if $SEE;
my $gene_obj = &create_gene ($gene_struct_mod, $end5, $end3);
+ #print STDERR $gene_obj->toString();
+
$gene_obj->create_all_sequence_types($sequence_ref);
my $protein = $gene_obj->get_protein_sequence();
-
+ my $recons_cds = $gene_obj->get_CDS_sequence();
+ #print STDERR "reconsCDS: $recons_cds\n";
+
## check partiality
if ($protein) { # it is possible that we won't have any cds structure
if ($protein !~ /^M/) {
# this would require that we allowed for 5prime partials
unless ($partial_info_href->{"5prime"}) {
- confess "Error, have 5' partial protein when 5prime partials weren't allowed!\n$protein\n";
+ confess "Error, have 5' partial protein when 5prime partials weren't allowed!\n$protein\n$cdna_seq\n";
}
}
if ($protein !~ /\*$/) {
# this would require that we allowed for 3prime partials
unless ($partial_info_href->{"3prime"}) {
- confess "Error, have 3' partial protein when 3prime partials weren't allowed!\n$protein\n";
+ confess "Error, have 3' partial protein when 3prime partials weren't allowed!\n$protein\n$cdna_seq\n";
}
}
@@ -80,6 +88,9 @@ sub create_gene_obj {
####
sub get_cdna_seq {
my ($gene_struct, $assembly_seq_ref) = @_;
+
+ my $seq_length = length($$assembly_seq_ref);
+
my (@end5s) = sort {$a<=>$b} keys %$gene_struct;
my $strand = "?";
foreach my $end5 (@end5s) {
@@ -90,7 +101,7 @@ sub get_cdna_seq {
}
if ($strand eq "?") {
print Dumper ($gene_struct);
- die "ERROR: I can't determine what orientation the cDNA is in!\n";
+ confess "ERROR: I can't determine what orientation the cDNA is in!\n";
}
print NOTES "strand: $strand\n";
my $cdna_seq;
@@ -99,6 +110,11 @@ sub get_cdna_seq {
foreach my $end5 (@end5s) {
#print $end5;
my $end3 = $gene_struct->{$end5};
+
+ if ($end5 > $seq_length || $end3 > $seq_length) {
+ confess "Error, coords are out of bounds of sequence length: $seq_length:\n" . Dumper(\$gene_struct);
+ }
+
my ($coord1, $coord2) = sort {$a<=>$b} ($end5, $end3);
my $exon_seq = substr ($$assembly_seq_ref, $coord1 - 1, ($coord2 - $coord1 + 1));
$cdna_seq .= $exon_seq;
@@ -114,21 +130,26 @@ sub get_cdna_seq {
####
sub create_gene {
my ($gene_struct_mod, $cds_pointer_lend, $cds_pointer_rend) = @_;
+
+ #use Data::Dumper;
+ #print STDERR Dumper($gene_struct_mod) . "CDS: $cds_pointer_lend, $cds_pointer_rend\n";
+
my $strand = $gene_struct_mod->{strand};
- my @exons = @{$gene_struct_mod->{exons}};
+ my @exons = sort {$a->[0]<=>$b->[0]} @{$gene_struct_mod->{exons}};
if ($strand eq '-') {
@exons = reverse (@exons);
}
my $mRNA_pointer_lend = 1;
my $mRNA_pointer_rend = 0;
my $gene_obj = new Gene_obj();
foreach my $coordset_ref (@exons) {
- my ($coord1, $coord2) = @$coordset_ref;
+ my ($coord1, $coord2) = sort {$a<=>$b} @$coordset_ref;
my ($end5, $end3) = ($strand eq '+') ? ($coord1, $coord2) : ($coord2, $coord1);
my $exon_obj = new mRNA_exon_obj($end5, $end3);
my $exon_length = ($coord2 - $coord1 + 1);
$mRNA_pointer_rend = $mRNA_pointer_lend + $exon_length - 1;
## see if cds is within current cDNA range.
+ #print STDERR "mRNA coords: $mRNA_pointer_lend-$mRNA_pointer_rend\n";
if ( $cds_pointer_rend >= $mRNA_pointer_lend && $cds_pointer_lend <= $mRNA_pointer_rend) { #overlap
my $diff = $cds_pointer_lend - $mRNA_pointer_lend;
my $delta_lend = ($diff >0) ? $diff : 0;
View
@@ -35,8 +35,12 @@ sub index_GTF_gene_objs_from_GTF {
my $gene_id = $gene_obj->{TU_feat_name};
+
if ($seen{$gene_id}) {
- confess "Error, already processed gene: $gene_id\nearlier: " . $seen{$gene_id}->toString() . " but now as: " . $gene_obj->toString();
+ confess "Error, already processed gene: $gene_id\n"
+ . " here: " . $gene_obj->toString() . "\n"
+ . " and earlier: " . $seen{$gene_id}->toString();
+
}
$seen{$gene_id} = $gene_obj;
@@ -71,6 +75,7 @@ sub GTF_to_gene_objs {
my %gene_id_to_name;
my %gene_id_to_seq_name;
+ my %gene_id_to_gene_name;
open (my $fh, $gtf_filename) or die "Error, cannot open $gtf_filename";
while (<$fh>) {
@@ -94,24 +99,25 @@ sub GTF_to_gene_objs {
$gene_id_to_source{$gene_id} = $source;
- my $transcript_id;
- if ($annot =~ /transcript_id \"([^\"]+)\"/) {
- $transcript_id = $1;
- }
- else {
- #print STDERR "\nWARNING: cannot get transcript_id from $annot of line\n$_\n";
- next;
- }
+ $annot =~ /transcript_id \"([^\"]+)\"/ or confess "Error, cannot get transcript_id from $annot of line\n$_";
+ my $transcript_id = $1;
if ($annot =~ /name \"([^\"]+)\"/) {
my $name = $1;
$gene_id_to_name{$gene_id} = $name;
}
+
+ my $gene_name = "";
+ if ($annot =~ /gene_name \"([^\"]+)\"/) {
+ $gene_name = $1;
+ $gene_id_to_gene_name{$gene_id} = $gene_name;
+ }
+
# print "gene_id: $gene_id, transcrpt_id: $transcript_id, $type\n";
if ($type eq 'transcript' || $type eq 'gene') { next; } # capture by exon coordinates
-
+
if ($type eq 'CDS' || $type eq 'stop_codon' || $type eq 'start_codon') {
push (@{$gene_transcript_data{$seqname}->{$gene_id}->{$transcript_id}->{CDS}}, [$end5, $end3] );
@@ -120,11 +126,11 @@ sub GTF_to_gene_objs {
elsif ($type eq "exon" || $type =~ /UTR/) {
push (@{$gene_transcript_data{$seqname}->{$gene_id}->{$transcript_id}->{mRNA}}, [$end5, $end3] );
}
- elsif ($type =~ /rna/i) {
+ else {
## assuming noncoding feature
push (@{$noncoding_features{$seqname}->{$type}->{$gene_id}->{$transcript_id}}, [$end5, $end3] );
}
-
+
}
close $fh;
@@ -188,6 +194,9 @@ sub GTF_to_gene_objs {
else {
$gene_obj->{com_name} = $transcript_id;
}
+ if (my $gene_name = $gene_id_to_gene_name{$gene_id}) {
+ $gene_obj->{gene_name} = $gene_name;
+ }
$gene_obj->{asmbl_id} = $seqname;
$gene_obj->{source} = $source;
@@ -203,7 +212,6 @@ sub GTF_to_gene_objs {
foreach my $other_gene_obj (@gene_objs) {
$template_gene_obj->add_isoform($other_gene_obj);
}
- $template_gene_obj->refine_gene_object();
push (@top_gene_objs, $template_gene_obj);
# print $template_gene_obj->toString();
View
@@ -202,7 +202,10 @@ sub wait_for_all_threads_to_complete {
$self->_add_error_thread($thread);
$status = "ERROR";
}
- $self->report_thread_info($thread_id, $status);
+ $self->{thread_id_timing}->{end}->{$thread_id} = time();
+ if ($THREAD_MONITORING) {
+ $self->report_thread_info($thread_id, $status);
+ }
}
@{$self->{current_threads}} = (); # clear them out.
@@ -24,7 +24,8 @@
foreach my $gene_id (@gene_ids) {
my $gene_obj_ref = $gene_obj_indexer_href->{$gene_id};
-
+
+ print "// $asmbl_id, gene: $gene_id\n";
print $gene_obj_ref->toString();
}
@@ -84,6 +84,8 @@
foreach my $isoform ($gene_obj_ref, $gene_obj_ref->get_additional_isoforms()) {
my $isoform_id = $isoform->{Model_feat_name};
+ my $gene_id = $isoform->{TU_feat_name};
+
my $seq = "";
@@ -105,7 +107,16 @@
my $com_name = $isoform->{com_name} || "";
- print ">$isoform_id\n$seq\n";
+ my $gene_name = $isoform->{gene_name};
+ my $header = ">$isoform_id $gene_id";
+ if ($gene_name) {
+ $header .= " $gene_name ";
+ }
+ if ($com_name) {
+ $gene_name .= " $com_name";
+ }
+
+ print "$header\n$seq\n";
}
}
}
@@ -6,6 +6,7 @@
# server actively running MySQL
MYSQLSERVER=localhost # or server.com
+# Pass socket connections through Perl DBI syntax e.g. MYSQLSERVER=mysql_socket=/tmp/mysql.sock
# read-write username and password
MYSQL_RW_USER=xxxxxx
@@ -20,11 +20,11 @@ USE_PASA_DB_SETUP_HOOK=false
#####################################
# server actively running MySQL
-MYSQLSERVER=bhaas-lx
+MYSQLSERVER=localhost
# read-only username and password
-MYSQL_RO_USER=access
-MYSQL_RO_PASSWORD=access
+MYSQL_RO_USER=pasa_access
+MYSQL_RO_PASSWORD=pasa_access
# read-write username and password
MYSQL_RW_USER=access
@@ -35,7 +35,7 @@ MYSQL_RW_PASSWORD=access
# Web browser navigation settings: #########
############################################
-BASE_PASA_URL=http://bhaas-lx:8080/cgi-bin/
+BASE_PASA_URL=http://localhost:8080/cgi-bin/where_you_installed_pasa_cgi_root
#############################################
View
Binary file not shown.
@@ -110,7 +110,7 @@ CREATE TABLE alt_splice_tokens (
CREATE TABLE annotation_admin (
version_id int(11) NOT NULL auto_increment,
- date datetime NOT NULL default '0000-00-00 00:00:00',
+ date datetime NOT NULL,
PRIMARY KEY (version_id)
) ;
@@ -120,7 +120,7 @@ CREATE TABLE annotation_admin (
CREATE TABLE annotation_compare (
compare_id int(11) NOT NULL auto_increment,
- date datetime NOT NULL default '0000-00-00 00:00:00',
+ date datetime NOT NULL,
annotation_version int(10) unsigned NOT NULL default '0',
PRIMARY KEY (compare_id)
) ;
View
@@ -16,7 +16,8 @@ Using the backticks instead of single quotes in the db name pattern is mandatory
------
create user 'pasa_access' identified by 'pasa_access';
-grant select on *.* to 'pasa_access@%';
-grant all on *.* to 'pasa_write@%';
+create user 'pasa_write' identified by '...';
+
+grant select on *.* to 'pasa_access';
+grant all on *.* to 'pasa_write';
-grant all on *.* to 'pasa_write'@'localhost' identified by '...';
@@ -133,7 +133,7 @@
#
#
# // Jump-starting or prematurely terminating
-# -x flag, print cmds only, don\'t process anything. (useful to get indices for -x or -e opts below)
+# -x flag, print cmds only, don\'t process anything. (useful to get indices for -s or -e opts below)
# -s <int> pipeline index to start running at (avoid rerunning searches).
# -e <int> pipeline index where to stop running, and do not execute this entry.
#
@@ -355,6 +355,9 @@ sub get_TDN_component {
# trinity de novo 2.0.0+ style
$component = $1;
}
+ elsif ($cdna_acc =~ /^(TRINITY\S+_c\d+_g\d+)/) {
+ $component = $1;
+ }
else {
confess "Error, cannot extract component info from trinity de novo assembly: $cdna_acc";
}
@@ -34,6 +34,9 @@ use vars qw ($opt_h $opt_p $DEBUG $opt_M $opt_G $MIN_PERCENT_OVERLAP $MIN_PERCEN
$STOMP_HIGH_PERCENTAGE_OVERLAPPING_GENE $MIN_PERCENT_ALIGN_LENGTH
$MIN_PERCENT_LENGTH_FL_COMPARE $TRUST_FL_STATUS $CLUSTER_ID_ONLY);
+
+my $RESTRICT_SINGLE_CONTIG = undef;
+
my $CPU = 2;
&GetOptions ('M=s' => \$opt_M,
@@ -62,6 +65,8 @@ my $CPU = 2;
## hidden opts for debugging purposes
'CLUSTER_ID_ONLY=i' => \$CLUSTER_ID_ONLY, # the only cluster ID to process.
+ 'RESTRICT_SINGLE_CONTIG=s' => \$RESTRICT_SINGLE_CONTIG,
+
);
@@ -140,6 +145,12 @@ _EOH_
;
+### Hidden options
+###
+## --RESTRICT_SINGLE_CONTIG <string> genomic contig to restrict analysis to
+###
+
+
if ($opt_h) {die $usage;}
@@ -291,7 +302,9 @@ main: {
print "### There are $num_contigs genomic contigs which will be examined.\n";
foreach my $result (@results) {
my $asmbl_id = $result->[0];
-
+
+ if ($RESTRICT_SINGLE_CONTIG && $asmbl_id ne $RESTRICT_SINGLE_CONTIG) { next; }
+
$thread_helper->wait_for_open_thread();
my $thread = threads->create('process_contig', $asmbl_id);
@@ -308,7 +321,17 @@ main: {
if (@failures) {
## examine them... these are the same threads created above, use use the threads api to access info about them
## such as error messages
- die "Error, there were " . scalar(@failures) . " threads (contig jobs) that failed... See error messages above in order to troubleshoot further";
+
+ print STDERR "Error, there were " . scalar(@failures) . " threads (contig jobs) that failed... See error messages above in order to troubleshoot further";
+
+ foreach my $failed_thread (@failures) {
+ my $thread_id = $failed_thread->tid;
+ print STDERR "Failed thread ($thread_id) info:\n";
+ $thread_helper->report_thread_info($thread_id, "FAILED");
+ }
+
+ exit(1);
+
}
else {
## all good!
@@ -1444,7 +1467,7 @@ sub get_annotated_gene_models_on_contig {
my @models;
# get the annotated working models for that chromosome:
- my $query = "select a.gene_id, min(a.lend), max(a.rend), a.orient from annotation_store a where a.annotdb_asmbl_id = ? and annotation_version = ? group by a.gene_id\n";
+ my $query = "select a.gene_id, min(a.lend), max(a.rend), a.orient from annotation_store a where a.annotdb_asmbl_id = ? and annotation_version = ? group by a.gene_id, a.orient\n";
my @results = &Mysql_connect::do_sql_2D($dbproc, $query, $asmbl_id, $annot_version);
foreach my $result (@results) {
Oops, something went wrong.

No commit comments for this range