In [1]:
# Modified from TALON: Techonology-Agnostic Long Read Analysis Pipeline. Authored by : Dana Wyman
import pandas as pd 
import numpy as np
import sys
import itertools
from copy import deepcopy
import copy
import os

In [2]:
class Edge(object):
    """Stores information about an edge, including its location
       and the gene/transcript(s) it belongs to.
       Attributes:
           identifier: Accession ID of the edge
           gene: Accession ID of the gene that the edge belongs to
           transcript_ids: Set of transcript accession IDs that the edge 
           belongs to
           chromosome: Chromosome that the transcript is located on 
           (format "chr1")
           start: The start position of the edge with respect to the
           forward strand 
           end: The end position of the edge with respect to the
           forward strand
           strand: "+" if the edge is on the forward strand, and "-" if
           it is on the reverse strand
 
           length: The length of the edge
    """

    def __init__(self, identifier, chromosome, start, end, strand, gene_id,
                 transcript_id, annotations):
        self.chromosome = str(chromosome)
        self.gene_id = gene_id
        self.start = int(start)
        self.end = int(end)
        self.strand = strand
        self.length = abs(self.end - self.start + 1)
        self.annotations = annotations

        self.identifier = str(identifier)
        self.transcript_ids = set()
        if transcript_id != None:
            self.transcript_ids.add(transcript_id)
        self.v1 = None
        self.v2 = None

    def print_edge(self):
        """ Prints a string representation of the edge"""
        print(self.identifier + ": " + self.chromosome + ":" + \
              str(self.start) + "-" + str(self.end))
        print(self.transcript_ids)
        return
    
    def getExonLineByObject(exonObj):
        exonLine=str(exonObj.chromosome+"\t"+exonObj.annotations.get('source')+"\t"+"exon"+"\t"+str(exonObj.start)+"\t"+str(exonObj.end)+"\t.\t"+exonObj.strand+"\t.\t")
        for x in exonObj.annotations:
            exonLine=exonLine+str(x+" \""+exonObj.annotations[x]+"\"; ")
        return exonLine+"\n"

    def create_edge_from_gtf(edge_info):
        """ Creates an edge object using information from a GTF entry
                Args:
                   edge_info: A list containing fields from a GTF file edge entry.
                   Example:   
                   ['chr1', 'HAVANA', 'exon', '11869', '12227', '.', '+', '.', 
                    'gene_id "ENSG00000223972.5"; transcript_id "ENST00000456328.2"; 
                    gene_type "transcribed_unprocessed_pseudogene"; 
                    gene_status "KNOWN"; gene_name "DDX11L1"; 
                    transcript_type "processed_transcript"; 
                    transcript_status "KNOWN"; transcript_name "DDX11L1-002"; 
                    edge_number 1; edge_id "ENSE00002234944.1"; level 2; 
                    tag "basic"; transcript_support_level "1"; 
                    havana_gene "OTTHUMG00000000961.2"; 
                    havana_transcript "OTTHUMT00000362751.1";'] 
        """
        description = edge_info[-1]
        start = int(edge_info[3])
        end = int(edge_info[4])
        chromosome = edge_info[0]
        strand = edge_info[6]

        annotations = extract_edge_annotations_from_GTF(edge_info)
        if "exon_id" not in annotations:
            annotations["exon_id"] = "_".join([chromosome, str(start), str(end), strand])
        gene_id = annotations['gene_id']
        transcript_id = annotations['transcript_id']
        edge_id = "_".join([chromosome, str(start), str(end), strand,gene_id])

        if "gene_id" in description:
            gene_id = (description.split("gene_id ")[1]).split('"')[1]
        if "transcript_id" in description:
            transcript_id = (description.split("transcript_id ")[1]).split('"')[1]

        edge = Edge(edge_id, chromosome, start, end, strand, gene_id, transcript_id,
                    annotations)
        return edge

def extract_edge_annotations_from_GTF(tab_fields):
    """ Extracts key-value annotations from the GTF description field
    """

    attributes = {}

    # remove trailing newline and split by semicolon
    description = tab_fields[-1].strip('\n')
    description = description.split(';')

    # Parse description
    for fields in description:
        if fields == "" or fields == " ": continue
        fields = fields.split()
        if fields[0] == '': fields = fields[1:]

        key = fields[0].replace('"', '')
        val = ' '.join(fields[1:]).replace('"', '')
        
        attributes[key] = val

    # Put in placeholders for important attributes (such as gene_id) if they
    # are absent
    if "gene_id" not in attributes:
        attributes["gene_id"] = "NULL"
    if "transcript_id" not in attributes:
        attributes["transcript_id"] = "NULL"

    attributes["source"] = tab_fields[1]

    return attributes

def get_edge_from_db(vertex_info_1, vertex_info_2):
    """ Uses information from a database edge entry to create an edge object.
    """
    if vertex_info_1["edge_id"] != vertex_info_2["edge_id"]:
        raise ValueError('Tried to create edge from endpoints with different IDs')
    edge_id = vertex_info_1["edge_id"]
    chromosome = vertex_info_1['chromosome']
    start = min(vertex_info_1['position'], vertex_info_2['position'])
    end = max(vertex_info_1['position'], vertex_info_2['position']) 
    strand = vertex_info_1['strand']
    gene_id = vertex_info_1['gene_id']

    edge = Edge(edge_id, chromosome, start, end, strand, gene_id, None, None)
    edge.v1 = str(vertex_info_1["vertex_ID"])
    edge.v2 = str(vertex_info_2["vertex_ID"])
    return edge

def create_novel_edge(chromosome, start, end, strand, gene_id, transcript_id, counter):
    """ Creates a novel edge with a unique identifier (obtained using
        counter). Returns the edge object as well as the updated counter.
    """
    counter["edges"] += 1
    curr_novel = counter["edges"]
    edge = Edge(curr_novel, chromosome, start, end, strand, gene_id, transcript_id,
                None)
    return edge

In [3]:
class Transcript(object):
    """Stores information about a gene transcript, including its location
       and constitutive exons.
       Attributes:
           identifier: Accession ID of transcript, i.e. an Ensembl ID. Must
           be unique.
           name: Human-readable name of the transcript. Does not have to be 
           unique
           chromosome: Chromosome that the transcript is located on 
           (format "chr1")
           start: The start position of the transcript with respect to the
           forward strand 
           end: The end position of the transcript with respect to the
           forward strand
           strand: "+" if the transcript is on the forward strand, and "-" if
           it is on the reverse strand
           gene_id: unique ID of the gene that this transcript belongs to
           exons: List of exon objects belonging to this transcript, in sorted
           order.
    """

    def __init__(self, identifier, chromosome, start, end, strand, gene_id, 
                 annotations):

        self.identifier = str(identifier)
        self.gene_id = str(gene_id)

        self.chromosome = str(chromosome)
        self.start = int(start)
        self.end = int(end)
        self.strand = strand
        self.n_exons = 0
        self.exons = []
        self.introns = []
        self.annotations = annotations

    def get_5prime_vertex(self):
        """ Returns ID of 5' end vertex """

        if self.strand == "+":
            return self.exons[0].v1
        if self.strand == "-":
            return self.exons[-1].v2

    def get_3prime_vertex(self):
        """ Returns ID of 5' end vertex """

        if self.strand == "+":
            return self.exons[-1].v2
        if self.strand == "-":
            return self.exons[0].v1

    def get_edge_path(self):
        edges = self.get_all_edges()
        if len(edges) == 0:
            return None
        path = [ x.identifier for x in edges]

        # Must reverse the path if the transcript is on the '-' strand
        if self.strand == "-":
            path = path[::-1]
        return ",".join(path)

    def get_all_edges(self):
        all_edges = []
        for i in range(0,self.n_exons):
            all_edges.append(self.exons[i])
            try:
                all_edges.append(self.introns[i])
            except:
                pass
            
        return all_edges

    def get_length(self):
        """ Computes the length of the transcript by summing the lengths of
            its exons """

        if len(self.exons) == 0:
            raise ValueError('Cannot compute length: Transcript does not ' + \
                             'have any exons')
        
        transcript_length = 0
        for exon in self.exons:
            transcript_length += exon.length
        return transcript_length

    def get_exon_coords(self):
        """ Returns a list of the exon coordinates in order """
        exon_coords = []
        for exon in self.exons:
            exon_coords.append(int(exon.start))
            exon_coords.append(int(exon.end))
        return exon_coords

    def add_exon(self, exon):
        """Adds an exon object to the transcript."""

        if exon.start > exon.end:
            raise ValueError('Exon start (' + str(exon.start) + ') ' + \
                'is supposed to be before the exon end (' + str(exon.end) + ')')

        # Check where in the list the exon should be added
        for i in range(0,len(self.exons)):
            existing_exon = self.exons[i]
            if exon.end < existing_exon.start:
                self.exons = self.exons[0:i] + [exon] + self.exons[i:]
                self.check_exon_validity()
                self.n_exons += 1
                return
        self.exons.append(exon)
        self.check_exon_validity()
        self.n_exons += 1
        return

    def add_intron(self, intron):
        """Adds an edge object to the transcript."""

        if intron.start > intron.end:
            raise ValueError('Intron start (' + str(intron.start) + ')' + \
                'is supposed to be before the intron end (' + str(intron.end) + ')')

        # Check where in the list the intron should be added
        for i in range(0,len(self.introns)):
            existing_intron = self.introns[i]
            if intron.end < existing_intron.start:
                self.introns = self.introns[0:i] + [intron] + self.introns[i:]
                return
        self.introns.append(intron)
        return
                    
    def check_exon_validity(self):
        """ The transcript's exons are valid if:
            1) Exons are in sorted order (ascending)
            2) Exon bounds do not exceed transcript start and end
            3) Exons are all on the appropriate chromosome
            If these conditions are violated, this function raises an error.
        """
        prev = 0
        for exon in self.exons:
            if exon.chromosome != self.chromosome:
                raise ValueError('Invalid exon in transcript ' + \
                      self.identifier + ': wrong chromosome')
            if exon.start < self.start or exon.end > self.end:
                print("self.start: " + str(self.start))
                print("self.end: " + str(self.end))
                print("exon.start: " + str(exon.start))
                print("exon.end: " + str(exon.end))
                raise ValueError('Invalid exon in transcript ' + \
                      self.identifier + ': (' + str(exon.start) + "-" + \
                      str(exon.end) + \
                      ') is located beyond start or end of transcript')
            if exon.start <= prev:
                # This error would indicate a TALON bug rather than user error,
                # so we shouldn't see it. 
                raise ValueError('Exons of transcript ' + \
                      self.identifier + ' are not stored in ascending order.')
            prev = exon.end
        return

    def get_introns(self):
        """
        Computes introns based on the exon list
        """
        exon_coords = self.get_exon_coords()
        intron_list = []

        i = 1
        while (i < len(exon_coords) - 1):
            j = i + 1

            intron_list.append(exon_coords[i] + 1)
            intron_list.append(exon_coords[j] - 1)
            i += 2

        return intron_list


    def print_transcript(self):
        """ Print a string representation of the Transcript. Good for debugging
        """
        transcript_id = self.identifier
        if transcript_id == None:
            transcript_id = "Transcript"

        print("\tLocation: " + self.chromosome + ":" + str(self.start) + "-" + \
              str(self.end) + "(" + self.strand + ")")

        # Print exons
        print("\tExons: " + "\n".join([str(x.start) + "-" + str(x.end) for x in self.exons]))
        return 
    
    def getTranscriptLineByObject(transcriptObj):
        transcriptLine=str(transcriptObj.chromosome+"\t"+transcriptObj.annotations.get('source')+"\t"+"transcript"+"\t"+str(transcriptObj.start)\
     +"\t"+str(transcriptObj.end)+"\t.\t"+transcriptObj.strand+"\t.\t")
        for x in transcriptObj.annotations:
            transcriptLine=transcriptLine+str(x+" \""+transcriptObj.annotations[x]+"\"; ")
        return transcriptLine+"\n"
    

    def get_transcript_from_gtf(transcript_info):
        """ Uses information from a GTF-formatted transcript entry to create a
        Transcript object.
            Args:
                transcript_info: A list containing fields from a GTF file gene 
                entry. Example:

                chr1	HAVANA	transcript	12010	13670	.	+
                .	gene_id "ENSG00000223972.5"; transcript_id "ENST00000450305.2"; 
                gene_type "transcribed_unprocessed_pseudogene"; 
                gene_status "KNOWN"; gene_name "DDX11L1"; 
                transcript_type "transcribed_unprocessed_pseudogene"; 
                transcript_status "KNOWN"; transcript_name "DDX11L1-001"; 
                level 2; ont "PGO:0000005"; ont "PGO:0000019"; tag "basic"; 
                transcript_support_level "NA"; havana_gene "OTTHUMG00000000961.2"; 
                havana_transcript "OTTHUMT00000002844.2";
        """
        chromosome = transcript_info[0]
        start = int(transcript_info[3])
        end = int(transcript_info[4])
        strand = transcript_info[6]

        if "transcript_id" not in transcript_info[-1]:
                raise ValueError('GTF entry lacks a transcript_id field')
        annotations = extract_transcript_annotations_from_GTF(transcript_info)


        gene_id = annotations['gene_id']
        transcript_id = annotations['transcript_id']

        transcript = Transcript(transcript_id, chromosome, start, end, strand, 
                                gene_id, annotations)

        return transcript

def extract_transcript_annotations_from_GTF(tab_fields):
    """Extracts key-value annotations from the GTF description field"""

    attributes = {}

    # remove trailing newline and split by semicolon
    description = tab_fields[-1].strip("\n")
    description = description.split(";")

    # Parse description
    for fields in description:
        if fields == "" or fields == " ":
            continue
        fields = fields.split()
        if fields[0] == "":
            fields = fields[1:]

        key = fields[0].replace('"', "")
        val = " ".join(fields[1:]).replace('"', "")

        attributes[key] = val

    # Put in placeholders for important attributes (such as gene_id) if they
    # are absent
    if "gene_id" not in attributes:
        attributes["gene_id"] = "NULL"

    attributes["source"] = tab_fields[1]

    return attributes
 


    def get_transcript_from_exon(exon, gene_id, transcript_id):
        """ In rare cases, GTF exons are listed with gene and transcript IDs that
            do not have corresponding entries. In this case, we create a transcript
            for this exon for bookkeeping purposes."""

        name = transcript_id
        chromosome = exon.chromosome
        start = exon.start
        end = exon.end
        strand = exon.strand
        transcript = Transcript(transcript_id, name, None, chromosome, start, end,
                                strand, gene_id)
        return transcript

    def create_novel_transcript(chromosome, start, end, strand, gene_id, counter,
                                 exons, introns):
        """ Creates a novel transcript with a unique identifier (obtained using
            counter). Returns the transcript object as well as the updated counter.
        """
        counter["transcripts"] += 1
        transcript_id = str(counter["transcripts"])

        transcript = Transcript(transcript_id, chromosome, start, end, strand, 
                                gene_id, None)

        for exon in exons:
            transcript.add_exon(exon)
        for intron in introns:
            transcript.add_intron(intron)

        return transcript




In [1]:
class Gene(object):
    """ Contains high-level information about a gene, such as its identifiers, 
        genomic location, and transcripts. Does not contain exon information.
        Attributes:
            - identifier: Accession ID of gene, i.e. an Ensembl ID. Required.
            - name: Human-readable name of the gene. This attribute can be left 
              empty if the gene does not have an assigned name.
            - chromosome: Chromosome that the gene is located on (format "chr1")
            - start: The start position of the gene with respect to the forward 
              strand (int). Should always be less than or equal to end.
            - end: The end position of the gene with respect to the forward strand 
              (int). Should always be greater than or equal to start.
            - strand: "+" if the gene is on the forward strand, "-" if it is on 
              the reverse strand
            - annotations: a dictionary of miscellaneous annotation categories
              extracted from a GTF
            
    """

    def __init__(self, identifier, chromosome, start, end, strand, annotations):
        start = int(start)
        end = int(end)

        self.identifier = str(identifier)
        self.chromosome = chromosome
        self.start = int(start)
        self.end = int(end)
        self.strand = strand
        self.transcripts = {}
        self.length = end - start + 1
        self.annotations = annotations

        if start > end:
            raise ValueError("""Plus strand gene start must be less than or 
                             equal to end.""")

    def set_name(self, name):
        """ Sets the name attribute of the Gene to the provided value.
        """
        self.annotations['name'] = name
        return

    def add_transcript(self, transcript):
        """ Adds a key-value pair (transcript identifier -> Transcript oject)
            to the gene's transcript dictionary
            Args:
                transcript: object of type Transcript. Must overlap with the 
                location of the gene.
        """
        if transcript.start >= self.end or transcript.end <= self.start:
            raise ValueError('Transcript must overlap the gene it is assigned to')
 
        if transcript.gene_id == self.identifier:
            # In order to belong to a gene, the transcript gene_id must match
            transcript_id = transcript.identifier
            self.transcripts[transcript_id] = transcript
        else:
            raise ValueError('Gene ID of transcript must match gene ' + \
                  'in order for assignment to be made.')
        return             


    def print_gene(self):
        """ Print a string representation of the Gene. Good for debugging. """

        if "name" in self.annotations != "":
            # Include name in output if there is one
            print(self.identifier + " (" + self.annotations['name']  + "):")
        else:
            print(self.identifier + ":")

        print("\tLocation: " + self.chromosome + ":" + str(self.start) + "-" + \
              str(self.end) + "(" + self.strand + ")")
        
        # Print transcripts in shorthand 
        for transcript in self.transcripts:
            print("\t Transcript: " + transcript)

        return
   
    
    def getGeneLineByObject(gene):
        geneLine=str(gene.chromosome+"\t"+gene.annotations.get('source')+"\t"+"gene"+"\t"+str(gene.start)\
         +"\t"+str(gene.end)+"\t.\t"+gene.strand+"\t.\t")
        for x in gene.annotations:
            geneLine=geneLine+str(x+" \""+gene.annotations[x]+"\"; ")
        return geneLine+"\n"

    def get_gene_from_gtf(gene_info):
        """ Creates a Gene object from a GTF file entry
            Args:
                gene_info: A list containing fields from a GTF file gene entry.
                Example:
                ['chr1', 'HAVANA', 'gene', '11869', '14409', '.', '+', '.',
                'gene_id "ENSG00000223972.5";
                gene_type "transcribed_unprocessed_pseudogene";
                gene_status "KNOWN"; gene_name "DDX11L1"; level 2;
                havana_gene "OTTHUMG00000000961.2";']
        """
        chromosome = gene_info[0]
        start = int(gene_info[3])
        end = int(gene_info[4])
        strand = gene_info[6]
        annotations = extract_gene_annotations_from_GTF(gene_info)
        if "gene_id" not in gene_info[-1]:
                raise ValueError('GTF entry lacks a gene_id field')
        gene_id = annotations['gene_id']

        gene = Gene(gene_id, chromosome, start, end, strand, annotations)
        return gene

def extract_gene_annotations_from_GTF(tab_fields):
    """Parses the description field of a gene GTF in order to organize the 
       information therein into a dictionary.
    """

    attributes = {}

    # remove trailing newline and split by semicolon
    description = tab_fields[-1].strip('\n')
    description = description.split(';')

    # Parse description
    for fields in description:
        if fields == "" or fields == " ": continue
        fields = fields.split()
        if fields[0] == '': fields = fields[1:]

        key = fields[0].replace('"', '')
        val = ' '.join(fields[1:]).replace('"', '')

        attributes[key] = val

    attributes["source"] = tab_fields[1]

    return attributes  

    def get_gene_from_exon(exon, gene_id):
        """ In rare cases, GTF exons are listed with gene and transcript IDs that
            do not have corresponding entries. In this case, we create a gene
            for this exon for bookkeeping purposes."""

        gene_name = gene_id
        chromosome = exon.chromosome
        start = exon.start
        end = exon.end
        strand = exon.strand
        gene = Gene(gene_id, gene_name, None, chromosome, start, end, strand)
        return gene

    def create_novel_gene(chromosome, start, end, strand, counter):
        """ Creates a novel gene with a unique identifier (obtained using
            counter). Returns the gene object as well as the updated counter.
        """
        gene_id = str(counter["genes"] + 1)
        counter["genes"] += 1
        gene = Gene(gene_id, chromosome, start, end, strand, None)
        return gene

In [5]:
def read_gtf_file(gtf_file):
    """ Reads gene, transcript, and edge information from a GTF file.
        Args:
            gtf_file: Path to the GTF file
        Returns:
            genes: A dictionary mapping gene IDs to corresponding gene objects
            transcripts: A dictionary mapping gene IDs to corresponding
                   transcript objects
            exons: A dictionary mapping exon IDs to corresponding edge objects
    """
    genes = {}
    transcripts = {}
    exons = {}

    with open(gtf_file) as gtf:
        for line in gtf:
            line = line.strip()

            # Ignore header
            if line.startswith("#"):
                continue

            # Split into constitutive fields on tab
            tab_fields = line.split("\t")
            chrom = tab_fields[0]
            entry_type = tab_fields[2]

            # Entry is a gene
            if entry_type == "gene":
                gene = Gene.get_gene_from_gtf(tab_fields)
                native_id = gene.identifier
                genes[native_id] = gene

            # Entry is a transcript
            elif entry_type == "transcript":
                transcript = Transcript.get_transcript_from_gtf(tab_fields)
                gene_id = transcript.gene_id
                if gene_id in genes:
                    genes[gene_id].add_transcript(transcript)
                native_id = transcript.identifier
                transcripts[native_id] = transcript

            # Entry is an edge
            elif entry_type == "exon":
                exon = Edge.create_edge_from_gtf(tab_fields)
                # This ID is used because of a rare GENCODE bug
                location_exon_id = exon.identifier
                exons[location_exon_id] = exon

                transcript_id = list(exon.transcript_ids)[0]
                gene_id = exon.annotations["gene_id"]

                if location_exon_id not in exons:
                    # Add the new edge to the data structure
                    exons[location_exon_id] = exon
                else:
                    # Update existing exon entry, including its transcript set
                    exon = exons[location_exon_id]
                    exon.transcript_ids.add(transcript_id)

                if transcript_id in transcripts:
                    currTranscript = transcripts[transcript_id]
                    currTranscript.add_exon(exon)

    return genes, transcripts, exons

In [6]:
def printNumberOfGeneTranscriptsExons(genes, transcripts, exons):
    print('geneCount',len(genes))
    print('transcriptCount',len(transcripts))
    print('exonCount',len(exons))

In [7]:
#########################################################################################################
def getGeneWithTranscriptsAndExonsAsLines(gene):
    geneLine=Gene.getGeneLineByObject(gene)
    geneWithTranscriptsAndExonsLines=geneLine
    #geneWithTranscriptAndExons=geneLine
    for transcriptID in gene.transcripts:
        transcriptObj=gene.transcripts.get(transcriptID)
        transcriptLine=Transcript.getTranscriptLineByObject(transcriptObj)
        geneWithTranscriptsAndExonsLines=geneWithTranscriptsAndExonsLines+transcriptLine
        for exonObj in transcriptObj.exons:
            exonLine=Edge.getExonLineByObject(exonObj)
            geneWithTranscriptsAndExonsLines=geneWithTranscriptsAndExonsLines+exonLine
    return geneWithTranscriptsAndExonsLines

In [8]:
#########################################################################################################    
def writeGeneToTheFile(singleGene,gtfFilePath):
    gtfFileObj = open(gtfFilePath, "a")
    for i in range(len(singleGene)):
        gtfFileObj.write(singleGene[i])
    gtfFileObj.close()
    
#########################################################################################################


In [9]:
def validateGene(gene):
    if len(gene.transcripts)==0:
        raise ValueError("Gene : \t",gene.identifier, "\t has no Transcripts")
    if gene.start > gene.end:
        raise ValueError("Gene : \t",gene.identifier ,"\t start > end")
    for transcriptID in list(gene.transcripts):
        transcriptObj=gene.transcripts.get(transcriptID)
        if len(transcriptObj.exons)==0:
            raise ValueError("Transcript : \t",transcriptObj.identifier ,"\t has no exons")
        if transcriptObj.start > transcriptObj.end:
            raise ValueError("Transcript : \t",transcriptObj.identifier, "\t start > end")
        for exon in transcriptObj.exons:
            if exon.start>exon.end:
                raise ValueError("exon : \t",exon.identifier ,"\t start > end")
            if exon.start==0 or exon.end==0:
                raise ValueError("exon : \t",exon.identifier, "\t start or end is zero")
    return True


def validateAndWriteGTFToFile(CleanedSameStrandGenes,cleanedForSenseAndAntisenseFile):
    if os.path.exists(cleanedForSenseAndAntisenseFile):
        os.remove(cleanedForSenseAndAntisenseFile)
    for geneID in CleanedSameStrandGenes.keys():
        gene=CleanedSameStrandGenes.get(geneID)
        if validateGene(gene)==True:
            geneWithTranscriptsAndExonsLines=getGeneWithTranscriptsAndExonsAsLines(gene)
            writeGeneToTheFile(geneWithTranscriptsAndExonsLines,cleanedForSenseAndAntisenseFile)
        else:
            raise ValueError("Gene : \t",gene.identifier, "\t has some problems") 