### Object-Oriented Programming Example (Sequence,SequenceRecord,DNASequence,AASequence)

* Sequence Class 
    - Each Record should be a Sequence class
    - all Sequence Records are in upper case and contain no number, each len of seq > 1 
* Protein Sequence Class:
    - Each protein sequence record contain only the standard 20 amino acids and inherits properties of Sequence
* DNA Sequence Class: 
    - Each DNA seq record contains only 4 bases [A,T,G,C] and inherits properties of Sequence

In [28]:
from functools import total_ordering
@total_ordering
#making new sequence class that does not use GC content 
class Sequence():
    def __init__(self,sequence):
        self.sequence = sequence.upper()
        assert len(self.sequence) > 1 , "not a sequence"
    def __str__(self):
        return f'sequence:{self.sequence}'
    def __repr__(self):
        x = len(self.sequence)
        return f'sequence: {self.sequence}\nlength : {x}'
    def __eq__(self,other):
        if len(self.sequence) == len(other.sequence):
            return True
        else:
            return False
    def __lt__(self,other):
        if len(self.sequence) < len(other.sequence):
            return True
        else:
            return False
    def __len__(self):
        return len(self.sequence)
    
class SequenceRecord():
    def __init__(self,sequence,label):
        assert isinstance(sequence,Sequence), "not a Sequence object"
        self.sequence = sequence
        self.label = label
    def __str__(self):
        output = f'{self.label}' + "\n" + f'{self.sequence}'
        return output
    def __repr__(self):
        output = f'>{self.label} \n {self.sequence}'
        return output
class DNAsequence(Sequence):
    def __init__(self,sequence,label):
        Sequence.__init__(self,sequence)
        self.sequence = sequence
        self.label = label 
        for i in self.sequence:
            if i not in "ATGC":
                raise TypeError("Not a DNA Sequence")
    def GC(self):
        GC = 0 
        sequence_calc = list(self.sequence)
        for i in sequence_calc:
            if i == "G" or i == "C":
                GC = GC + 1
        self.GC = GC / len(sequence_calc)
        return self.GC
    def translate(self):
        aa_dict = {'M':['ATG'], 'F':['TTT', 'TTC'], 'L':['TTA', 'TTG', 'CTT', 'CTC', 'CTA', 'CTG'], 'C':['TGT', 'TGC'], 'Y':['TAC', 'TAT'], 'W':['TGG'], 'P':['CCT', 'CCC', 'CCA', 'CCG'], 'H':['CAT', 'CAC'],
'Q':['CAA', 'CAG'], 'R':['CGT', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG'], 'I':['ATT', 'ATC', 'ATA'], 'T':['ACT', 'ACC', 'ACA', 'ACG'],
'N':['AAT', 'AAC'], 'K':['AAA', 'AAG'], 'S':['AGT', 'AGC', 'TCT', 'TCC', 'TCA', 'TCG'], 'V':['GTT', 'GTC', 'GTA', 'GTG'],
'A':['GCT', 'GCC', 'GCA', 'GCG'], 'D':['GAT', 'GAC'], 'E':['GAA', 'GAG'], 'G':['GGT', 'GGC', 'GGA', 'GGG'], '*':['TAA','TAG','TGA']}
        l = []
        q = []
        for i in self.sequence:# loop through each char in sequence
            l.append(i)
            if len(l) > 2:
                l = "".join(l)
                for key,values in aa_dict.items():
                    if l in values:
                        q.append(key)
                l = []
        return "".join(q)
    def __add__(self,other):
        return (str(self.sequence)+str(other.sequence), f"{self.label}+{other.label}") 
    def to_fasta(self):
        with open(f'{self.label}.fasta',"w") as f1:
            f1.write(f">{self.label}\n")
            f1.write(f'{self.sequence}')
            return f'fasta_file written to {f1.name}'
    
    
class AAsequence(Sequence):
    def __init__(self,sequence,label):
        Sequence.__init__(self,sequence)
        self.sequence = sequence
        self.label = label
        for i in self.sequence:
            if i not in "ACDEFGHIKLMNPQRSTVW":
                raise TypeError("Not an AA Sequence")
    def aliphatic_residues(self):
        aliphatic = ["A","V","M","G","P","I","L"]
        aliphatic_num = 0
        for i in self.sequence:
            if i in aliphatic:
                aliphatic_num = aliphatic_num + 1 
        return aliphatic_num #it would be nice to do something cooler so i might change this 
        
    
        

            
        

        

In [31]:
seq1 = Sequence("AACCATA")
seq2 = DNAsequence("AACATACACAAAAGGG","shh gene")

#String representation of objects 
print(seq1)
print(seq2)
#Repr representation 
print(f'{repr(seq1)}\n') #repr = more verbose output than string representation 
print(f'{repr(seq2)}\n')
#Demostration of Polymorphism (Pillar of Object-Oriented Programming) which is a consequence of Inheritance (Pillar of Object-Oriented Programming)
print(isinstance(seq2,DNAsequence))
print(isinstance(seq2,Sequence))



fasta_file written to shh gene.fasta
sequence:AACCATA
sequence:AACATACACAAAAGGG
sequence: AACCATA
length : 7

sequence: AACATACACAAAAGGG
length : 16

True
True


### Additional Features of Classes 

In [32]:
seq1 = Sequence("AACCATA")
seq2 = DNAsequence("ATTAAAAAACA","DNA_BINDING")
seq3 = AAsequence("KTKLLKAKAKLLKVALKW","Chemokine")
print("Sequence 2 (DNA)")
print(repr(seq2))
print(seq2.translate()) #convert DNA --> Amino Acid Sequence
print(f'{seq2.GC()}') #GC Content of Sequence 
print(f'{"-"*40}')
print("Sequence 3 (Protein)")
print(repr(seq3))
print(seq3.aliphatic_residues()) #returns number of Aliphatic Residues in Sequence

#DNASequence object can be written to fasta file
print(seq2.to_fasta())


Sequence 2 (DNA)
sequence: ATTAAAAAACA
length : 11
IKK
0.09090909090909091
----------------------------------------
Sequence 3 (Protein)
sequence: KTKLLKAKAKLLKVALKW
length : 18
9
fasta_file written to DNA_BINDING.fasta
