Permalink
Browse files

AP11-193 Fix for wrong GFF files

  • Loading branch information...
1 parent 487e374 commit 5616faeb849869ffa7ace9f90915bf08a2480d4a @PrzemyslawP PrzemyslawP committed Apr 2, 2013
@@ -1,14 +1,22 @@
package au.org.intersect.samifier.domain;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.log4j.Logger;
+
+import au.org.intersect.samifier.parser.GenomeFileParsingException;
+
public class Genome {
private Map<String, GeneInfo> genes;
-
+ private static Logger LOG = Logger.getLogger(Genome.class);
public Genome() {
genes = new HashMap<String, GeneInfo>();
}
@@ -50,4 +58,29 @@ public String toString() {
return out.toString();
}
+ public void verify() throws GenomeFileParsingException{
+ for (GeneInfo genInfo : genes.values()) {
+ //first we'll sort locations
+ int start = genInfo.getStart();
+ List <GeneSequence> newEntries = new ArrayList<GeneSequence>();
+ for (GeneSequence sequence : genInfo.getLocations()) {
+ if (sequence.getStart() < start) {
+ String errorMessage = "Gene " + genInfo.getId() + " in chromosome " + genInfo.getChromosome() + " has overlaping part at position (" + sequence.getStart() +","+ start + "). Cannot continue.";
+ LOG.error(errorMessage);
+ throw new GenomeFileParsingException(errorMessage);
+ }
+ if (sequence.getStart() != start) {
+ LOG.warn("Gene " + genInfo.getId() + " in chromosome " + genInfo.getChromosome() + " has missing part at position (" + start +","+(sequence.getStart() - 1)+ "). Assuming non coding sequence.");
+ GeneSequence seq = new GeneSequence(sequence.getParentId(), false, start, sequence.getStart() - 1, sequence.getDirection());
+ newEntries.add(seq);
+ }
+ start = sequence.getStop() + 1;
+ }
+ for (GeneSequence location : newEntries) {
+ genInfo.addLocation(location);
+ }
+ }
+ }
+
+
}
@@ -10,7 +10,7 @@
private int startIndex;
private int length;
private String direction;
-
+ private String chromosome;
private BigDecimal confidenceScore;
private String frame;
@@ -24,19 +24,20 @@ public ProteinLocation(String name, int startIndex, int length,
public ProteinLocation(String name, int startIndex, int length,
String direction, String frame, BigDecimal confidenceScore) {
- this(name, startIndex, length, direction, frame, null, null);
+ this(name, startIndex, length, direction, frame, null, null, null);
}
public ProteinLocation(String name, int startIndex, int length,
String direction, String frame, BigDecimal confidenceScore,
- String virtualProteinName) {
+ String virtualProteinName, String chromosome) {
this.name = name;
this.direction = direction;
this.frame = frame;
this.confidenceScore = confidenceScore;
this.startIndex = startIndex;
this.length = length;
this.virtualProteinNames = new HashSet<String>();
+ this.chromosome = chromosome;
if (virtualProteinName != null) {
virtualProteinNames.add(virtualProteinName);
}
@@ -93,15 +94,21 @@ public void update(ProteinLocation other) {
virtualProteinNames.addAll(other.getVirtualProteinNames());
}
+ public String getChromosome() {
+ return chromosome;
+ }
@Override
public int compareTo(ProteinLocation o) {
return getStartIndex() - o.getStartIndex();
}
@Override
public boolean equals(Object other) {
- if (!(other instanceof ProteinLocation) ) return false;
- ProteinLocation otherLocation = (ProteinLocation) other;
+ if (!(other instanceof ProteinLocation)) return false;
return this.toString().equals(other.toString());
}
+ public void setChromosome(String chromosome) {
+ this.chromosome = chromosome;
+ }
+
}
@@ -81,6 +81,7 @@ private Genome doParsing(File genomeFile) throws IOException,
reader.close();
}
}
+ genome.verify();
return genome;
}
@@ -28,24 +28,15 @@ public void testParsingMascotPeptideSearchResultsDatFormat()
List<PeptideSearchResult> list = peptideSearchResultsParser.parseResults(mascotFile);
// The parser should find sixteen results - same as mzid file
- assertEquals("Parser should find seven ", 16, list.size());
System.out.println(list.toString());
+ assertEquals("Parser should find seven ", 7, list.size());
assertTrue(list.contains(new PeptideSearchResult("q21_p1", "EFGILK", "KPYK1_YEAST", 469, 474 , new BigDecimal("25.95"))));
assertTrue(list.contains(new PeptideSearchResult("q131_p1", "SVIDNAR", "KPYK1_YEAST", 62, 68 , new BigDecimal("40.45"))));
assertTrue(list.contains(new PeptideSearchResult("q217_p1", "INFGIEK", "KPYK1_YEAST", 460, 466 , new BigDecimal("37.51"))));
assertTrue(list.contains(new PeptideSearchResult("q376_p2", "TGIAIGLNK", "RL36B_YEAST", 5, 13 , new BigDecimal("35.86"))));
assertTrue(list.contains(new PeptideSearchResult("q887_p1", "KRNEEEDAK", "RL31A_YEAST", 78, 86 , new BigDecimal("40.09"))));
- assertTrue(list.contains(new PeptideSearchResult("q887_p1", "KRNEEEDAK", "RL31B_YEAST", 78, 86 , new BigDecimal("40.09"))));
assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31A_YEAST", 11, 19 , new BigDecimal("52.75"))));
- assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31B_YEAST", 11, 19 , new BigDecimal("52.75"))));
- assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31_ASHGO", 11, 19 , new BigDecimal("52.75"))));
- assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31_CYAPA", 18, 26 , new BigDecimal("52.75"))));
- assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31_DICDI", 10, 18 , new BigDecimal("52.75"))));
- assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31_NICGU", 16, 24 , new BigDecimal("52.75"))));
- assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31_PANGI", 16, 24 , new BigDecimal("52.75"))));
- assertTrue(list.contains(new PeptideSearchResult("q1009_p2", "EYTINLHKR", "RL31_PERFR", 17, 25 , new BigDecimal("52.75"))));
assertTrue(list.contains(new PeptideSearchResult("q2365_p1", "NEEEDAKNPLFSYVEPVLVASAK", "RL31A_YEAST", 80, 102 , new BigDecimal("20.69"))));
- assertTrue(list.contains(new PeptideSearchResult("q2365_p1", "NEEEDAKNPLFSYVEPVLVASAK", "RL31B_YEAST", 80, 102 , new BigDecimal("20.69"))));
}
catch(Exception e)
{

0 comments on commit 5616fae

Please sign in to comment.