In [2]:
#With Biotite can create own custom alphabet
#Load Biotite and matplotlib packages
import biotite
import biotite.sequence as seq
import matplotlib.pyplot as plt

#Similar to Biopython

#Methods and features
print(dir(biotite))
#Methods and features in biotite.sequence
print(dir(biotite.sequence))

#Create dna sequence
dna = seq.NucleotideSequence("ATCTGCATG")

#Get letters in sequence
print(dna.alphabet)
print(dna.get_alphabet())

#Methods inside seq.NucleotideSequence
print(dir(seq.NucleotideSequence))

#Frequency of nucleotides within sequence
print(dna.get_symbol_frequency())

#Complement and reverse complement DNA structure 
#Native reverse complement method not in Biotite unlike in Biopython
print(dna.complement())
print(dna.reverse().complement())

#Custom reverse complment function
def reverse_complement(dna):
    return seq.NucleotideSequence(str(dna)[::-1]).complement()
print(reverse_complement(dna))

#Create protein sequence
protein_seq = seq.ProteinSequence("MIT")
print(protein_seq)

#Get count/frequency of each of the 20 amino acids
print(protein_seq.get_symbol_frequency())

#Convert amino acid sequence from one to three letter codons
for aa in protein_seq:
    print(aa,seq.ProteinSequence().convert_letter_1to3(aa))

#Convert 3 letter codon to 1 letter codon, most likely a bug is present here due to KeyError: 'M'
#Not a big problem becasue for this functionality can use Biopython if needed
'''protein_seq2 = seq.ProteinSequence("MetIleThr")
print(protein_seq2)
for aa in protein_seq2:
    print(aa,seq.ProteinSequence().convert_letter_3to1(aa))'''

#Sequence manipulation, always convert sequence to string
#Length of DNA
print(len(dna))
#Find the position of a nucleotide when it first appears
print(str(dna).find("C"))
#Find the frequency of a nucleotide in sequence
print(str(dna).count("T"))

#Search for a subsequence
sub_seq = seq.NucleotideSequence("CAT")
print(seq.find_subsequence(dna,sub_seq))

#Find all occurances of nucleotide in sequence and returns positions in sequence
print(seq.find_symbol(dna,"C"))

#Finds first occurance of nucleotide in sequence and returns position in sequence
print(seq.find_symbol_first(dna,"C"))

#Find last occurance of nucleotide in sequence and returns position in sequence
print(seq.find_symbol_last(dna,"C"))

#Plot of frequency of nucleotides
freq = dna.get_symbol_frequency()
#plt.bar(freq.keys(),freq.values())


#Protein Synthesis

#DNA to mRNA, mRNA to aa sequence
#No native transcribe function unlike in biopython but can directly convert to aa sequence
#May only return empty lists if DNA sequence not a multiple of 3
#Also must specify complete=True then sequence will be returned as stop codon is issued
protein = dna.translate(complete=True)
print(protein)

#Can check for all amino acids in CodonTable and other tables available for different species
#Parsing argument as 1 shows official NCBI CodonTable
print(seq.CodonTable.load(1))
#NCBI Biotite CodonTable
print(seq.CodonTable.default_table())

#Search for a particular amino acid-nucleotide combinations
table = seq.CodonTable.default_table()
#Example find aa by codon
print(table["GAA"])
#Example find codon from aa
print(table["E"])

#CodonTable for Bacterial Genome using number
bact_table = seq.CodonTable.load(11)
print(bact_table)

#CodonTable for Yeast Genome using name
yeast_table = seq.CodonTable.load("Yeast Mitochondrial")
print(yeast_table)

['AdaptiveFancyArrow', 'Copyable', 'File', 'InvalidFileError', 'TextFile', '__author__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', 'colors', 'copyable', 'file', 'sequence', 'set_font_size_in_coord', 'temp', 'temp_dir', 'temp_file', 'visualize']
['Alphabet', 'AlphabetError', 'AlphabetMapper', 'AnnotatedSequence', 'Annotation', 'CodonTable', 'Feature', 'GeneralSequence', 'LetterAlphabet', 'Location', 'NucleotideSequence', 'ProteinSequence', 'Sequence', '__author__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'alphabet', 'annotation', 'codec', 'codon', 'find_subsequence', 'find_symbol', 'find_symbol_first', 'find_symbol_last', 'search', 'seqtypes', 'sequence']
['A', 'C', 'G', 'T']
['A', 'C', 'G', 'T']
['__abstractmethods__', '__add__', '__class__', '__copy_create__', '__copy_fill__', '__delattr__', '__dict__', '__dir__', '__d