# Covid analysis
Covid fasta file: https://www.ncbi.nlm.nih.gov/nuccore/MN908947.3?report=fasta

In [7]:
#reading fasta file
from Bio import SeqIO

In [8]:
#parsing fasta
for record in SeqIO.parse('covid.fasta','fasta'):
    print(record.id)

MN908947.3


In [9]:
#reading
covid_rec = SeqIO.read("covid.fasta","fasta")

In [15]:
print(covid_rec,covid_rec.id)

ID: MN908947.3
Name: MN908947.3
Description: MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome
Number of features: 0
Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA', SingleLetterAlphabet()) MN908947.3


In [16]:
#len of sequence
len(covid_rec)

29903

# Protein synthesis
DNA => mRNA => protein

In [32]:
#getting DNA seq
dna = covid_rec.seq
dna

Seq('ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGT...AAA', SingleLetterAlphabet())

In [33]:
#transcribe dna => rna
mrna = dna.transcribe()
mrna

Seq('AUUAAAGGUUUAUACCUUCCCAGGUAACAAACCAACCAACUUUCGAUCUCUUGU...AAA', RNAAlphabet())

In [34]:
#translate mrna => protein
protein = mrna.translate()
protein

Seq('IKGLYLPR*QTNQLSISCRSVL*TNFKICVAVTRLHA*CTHAV*LITNYCR*QD...KKK', HasStopCodon(ExtendedIUPACProtein(), '*'))

In [37]:
# Find all the amino acids
amino_acids = protein.split('*')


In [39]:
import pandas as pd

In [43]:
#converting set to str
aa = [str(i) for i in amino_acids]

In [44]:
df = pd.DataFrame({'amino_acids':aa})

In [47]:
df['count'] = df['amino_acids'].apply(len)

In [49]:
df.head()

Unnamed: 0,amino_acids,count
0,IKGLYLPR,8
1,QTNQLSISCRSVL,13
2,TNFKICVAVTRLHA,14
3,CTHAV,5
4,LITNYCR,7


In [51]:
df.nlargest(20,'count')

Unnamed: 0,amino_acids,count
548,CTIVFKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFL...,2701
694,ASAQRSQITLHINELMDLFMRIFTIGTVTLKQGEIKDATPSDFVRA...,290
719,TNMKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNS...,123
695,AQADEYELMYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALR...,83
718,QQMFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSL...,63
6,DGEPCPWFQRENTRPTQFACFTGSRRARTWLWRLRGGGLIRGTSTS,46
464,TMLRCYFPKCSEKNNQGYTPLVVTHNFDFTFSFSPEYSMVFVLFFV,46
539,DVVYTHWYWSGNNSYTGSQYGSRILWWCIVLSVLPLPHRSSKS,43
758,LQTLAANCTICPQRFSVLRNVAHWHGSHTFGNVVDLHRCHQIG,43
771,KSHHIFTEATRSTIECTVNNARESCLYGRALMCKINFSSAIPM,43


# Covid 3D visualization

## PDB file link: https://www.ncbi.nlm.nih.gov/Structure/pdb/6LU7
PDB => Protein Data Bank

In [52]:
#parsing pdb
from Bio.PDB import PDBParser

In [65]:
#reading PDB file
parser = PDBParser()
structure = parser.get_structure("6LU7","6lu7.pdb") #id,file path
structure

<Structure id=6LU7>

In [66]:
model = structure[0]
for i in model:
    print(i)
    

<Chain id=A>
<Chain id=C>


In [67]:
# Check the atoms
for model in structure:
    print(model)
    for chain in model:
        print(chain)
        for residue in chain:
            for atom in residue:
                print(atom)

tom CG>
<Atom CD>
<Atom NE>
<Atom CZ>
<Atom NH1>
<Atom NH2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom CD>
<Atom CE>
<Atom NZ>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom OG>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom OD1>
<Atom ND2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom ND1>
<Atom CD2>
<Atom CE1>
<Atom NE2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom OD1>
<Atom ND2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom CD1>
<Atom CD2>
<Atom CE1>
<Atom CE2>
<Atom CZ>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom CD1>
<Atom CD2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG1>
<Atom CG2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom CD>
<Atom OE1>
<Atom NE2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom OD1>
<Atom ND2>
<Atom N>
<Atom CA>
<Atom C

In [68]:
# 3d visualizatiom
import nglview as nv

In [90]:
nv.demo() #to check if working

NGLWidget()

In [104]:
view =  nv.show_biopython(structure)

In [105]:
view

NGLWidget()

In [64]:
#using py3mol
import py3Dmol

In [94]:
view2 = py3Dmol.view(query='pdb:6LU7')

In [95]:
view2.setStyle({'cartoon':{'color':'spectrum'}})


<py3Dmol.view at 0x7fde37ea6a90>

In [96]:
view2.render_image()

<py3Dmol.view at 0x7fde37ea6a90>

In [97]:
import pytraj as pt

In [98]:
covid_tj = pt.load('6lu7.pdb')

In [99]:
view3 = nv.show_pytraj(covid_tj)

In [100]:
view3

NGLWidget()

In [101]:
view3.render_image()

Image(value=b'', width='99%')