In [1]:
from skbio import DNA,RNA,Sequence
import skbio.io
import io

#Reading a FASTA sequence, SARS CoV-2 sequence
seq_file = skbio.io.read("sequence.fasta",format="fasta")

#Method 1: A generator object can be looped through
#for i in seq_file:
#    print(i)

#Method 2: Reading directly as a Generic Sequence
seq = Sequence.read("sequence.fasta")
seq
#print(seq)

Sequence
-----------------------------------------------------------------------
Metadata:
    'description': 'Severe acute respiratory syndrome coronavirus 2
                    isolate Wuhan-Hu-1, complete genome'
    'id': 'MN908947.3'
Stats:
    length: 29903
-----------------------------------------------------------------------
0     ATTAAAGGTT TATACCTTCC CAGGTAACAA ACCAACCAAC TTTCGATCTC TTGTAGATCT
60    GTTCTCTAAA CGAACTTTAA AATCTGTGTG GCTGTCACTC GGCTGCATGC TTAGTGCACT
...
29820 TTTAGTAGTG CTATCCCCAT GTGATTTTAA TAGCTTCTTA GGAGAATGAC AAAAAAAAAA
29880 AAAAAAAAAA AAAAAAAAAA AAA

In [2]:
#Work with Genbank file and converting to a DNA sequence
seq2 = DNA.read("sequence.gb")
seq2

DNA
-----------------------------------------------------------------------
Metadata:
    'ACCESSION': 'MN908947'
    'COMMENT': <class 'str'>
    'DEFINITION': 'Severe acute respiratory syndrome coronavirus 2
                   isolate Wuhan-Hu-1, complete genome.'
    'KEYWORDS': '.'
    'LOCUS': <class 'dict'>
    'REFERENCE': <class 'list'>
    'SOURCE': <class 'dict'>
    'VERSION': 'MN908947.3'
Interval metadata:
    23 interval features
Stats:
    length: 29903
    has gaps: False
    has degenerates: False
    has definites: True
    GC-content: 37.97%
-----------------------------------------------------------------------
0     ATTAAAGGTT TATACCTTCC CAGGTAACAA ACCAACCAAC TTTCGATCTC TTGTAGATCT
60    GTTCTCTAAA CGAACTTTAA AATCTGTGTG GCTGTCACTC GGCTGCATGC TTAGTGCACT
...
29820 TTTAGTAGTG CTATCCCCAT GTGATTTTAA TAGCTTCTTA GGAGAATGAC AAAAAAAAAA
29880 AAAAAAAAAA AAAAAAAAAA AAA

In [3]:
#Writing sequences
with io.StringIO() as fh:
    print(seq.write(fh).getvalue())

>MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome
ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGC

In [5]:
#Quering NCBI, without downloading files
species_mito = skbio.sequence.GeneticCode.from_ncbi(21)
#test = skbio.sequence.GeneticCode.from_ncbi("MN908947.3")
#species_mito
print(species_mito)

print("\nMitochondria Species Name:",species_mito.name)
print("Reading frame for Mitochondria species:",species_mito.reading_frames)

#Translate sequence using the converted RNA from seq 2
print(species_mito.translate(RNA(seq2.transcribe())))

  AAs  = FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
Starts = -----------------------------------M---------------M------------
Base1  = UUUUUUUUUUUUUUUUCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
Base2  = UUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGG
Base3  = UCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAG

Mitochondria Species Name: Trematode Mitochondrial
Reading frame for Mitochondria species: [1, 2, 3, -1, -2, -3]
INGLYLPS*QTNQLSISCSSVL*TNFNICVAVTRLHA*CTHAV*LMTNYCRWQDTSNSSIFCSLLTVSSVLQPIISTSSFRPGVTES*DGEPCPWFQRENTRPTQFACFTGSRRARTWLWSLRGGGLISGTSTS*SWHLWLSSSWNSRFASTWTALCVHQTFGCSNCTSWSCYGWAGSSTRSHSVRS*WWDTWCPCPSCGRNTSGLPQGSSS*ER**SSWWP*LRRRSNVIWLSRRAWHWSLWSFSSNLEH*T*QWCYPWTHAWA*RSGMHSLCR*QLLWPWWLPSWVH*SPSSTCW*SFMHFVRTTGLYWH*EGCMLLPWTWAWNCLVHGTFWNELWIADTFWN*IGNEIWHLQWGMSNFCISLNFHNQDYSTKGWNENAWWLYG*NSICLSSCVTNWMQPNVPFNSHEVWSLWWNFMADGRFC*SHLRILWHWEFD*SSCHYLWLLTPNCCC*NLLSSMSQFSSSTWA*SCRMP*WIWLENHSS*GWSHYCLWSLCVLLCWLP*QVCLLGSTC*