In [None]:
#Similar to Biopython
import biotite.sequence.io.fasta as fasta
import biotite.sequence.io.genbank as gb
#print(dir(fasta))
#print(dir(gb))

#Read Fasta file
file = fasta.FastaFile()
file.read("ebola_sequence.fasta")

#Display full contents
#print(file)

#Best if only 1 sequence is preseent
ebola_sequence = fasta.get_sequence(file)
print(ebola_sequence)

#If multiple sequences then use get_sequences method
fasta.get_sequences(file)

#Read Genbank file
g_file = gb.GenBankFile()
g_file.read("ebola_sequence.gb")
print(g_file)
#print(dir(g_file))

#Get sequence only
ebola_seq = gb.get_sequence(g_file)
print(ebola_seq)

In [9]:
#Reading/downloading without a physical file in directory
import biotite.database.entrez as entrez
import biotite.sequence.io.fasta as fasta
#print(dir(entrez))

#Get database name
print(entrez.get_database_name("Nucleotide"))

#Get database name for protein
print(entrez.get_database_name("Protein"))


#Search - Query(simple/composite) ==> Search ==> ID ==> Fetch

#Making query
myquery = entrez.SimpleQuery("Covid19")

#Search with query in nuccore database for Nucleotides and number of results
myquery_ID = entrez.search(myquery,"Nucleotide",number=10)
print(myquery_ID)

#1st method
#Downloading the file, fetch => multiple files or fetch_single_file => only 1 file
#Downloading file with 1st Id from myquery_ID, name of file, type of file (fasta)
#using nuccore database for Nucleotides, and fasta type file
file_path = entrez.fetch(myquery_ID[0],"mypdb_dir",suffix="fa",db_name="nuccore",ret_type="fasta")
print(file_path)

#2nd method
#Downloading using ID same as before but using ID instead
#ID KU182909.1
ID_file_path = entrez.fetch("KU182909.1","mypdb_dir",suffix="fa",db_name="nuccore",ret_type="fasta")
print(ID_file_path)

#Reading file from downloaded using ID
file2 = fasta.FastaFile()
file2.read(ID_file_path)
for header,string in file2.items():
    print(header,"\n")
    print(string)

nuccore
protein
['1848663758', '1848590735', '1848554851', '1848549603', '1848549589', '1848548974', '1848548961', '1848548948', '1848548930', '1848548917']
mypdb_dir/1848663758.fa
mypdb_dir/KU182909.1.fa
KU182909.1 Ebola virus isolate Ebola virus/H. sapiens-tc/COD/1995/Kikwit-9510622, complete genome 

CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGTTACTGTAATCACACCTGGTTTGTTTCAGAGCCACATCACAAAGATAGAGAACAACCTAGGTCTCTGAAGGGAGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACATCTAGGTCTTATCACATCACAAGTCCCACCTCAGACTCTGCAGGGTGATCCAACAACCTTAATAGAAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAATTAACCTTGGTTTTGAACTTGAATACTTAGAGGATTGGAGATTCAACAACCCTAAAGCTTGGGGTAAAACATTGGAAATAGTTAAAAGACAAATTGCTCGGAATCACAACATTCCGAGTATGGATTCTCGTCCTCAGAAAGTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAGGAGATTTGCCAACTTATCATACAGGCCTTTGAAGCAGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCA

In [10]:
#.pdb, .cif, fetching and wokring with the files
import biotite.database.rcsb as rcsb # rcsb.org/search
import biotite.structure.io.pdb as pdb #There are other file types too like .cif, .npz and .xtc etc
#print(dir(rcsb))
#print(dir(pdb))

#Use ID to get data from rcsb using the internet, or can download files
#Can use biotite.temp_dir() or create own like below
pdb_file_path = rcsb.fetch("4ZS6","pdb","mypdb_dir")
print(pdb_file_path)

#Can do same thing if you want to fetch multiple files
pdb_file_path2 = rcsb.fetch(["4ZS6","6LU7"],"pdb","mypdb_dir")
print(pdb_file_path2)

#Reading the 3D pdb files
file_reader = pdb.PDBFile()
file_reader.read("mypdb_dir/4ZS6.pdb")
#print(file_reader)

#Methods
#print(dir(file_reader))

#Only structure shown, omits other details
protein_structure = file_reader.get_structure()
#print(protein_structure)

#Visualising 3D structures using py3DMol or nglview as seen in Biopython
#Coordinates and shape
print("Shape",protein_structure.shape)
print("Coordinates",protein_structure.coord)

mypdb_dir/4ZS6.pdb
['mypdb_dir/4ZS6.pdb', 'mypdb_dir/6LU7.pdb']
Shape (1, 9842)
Coordinates [[[-18.11   24.359 641.863]
  [-17.678  23.037 641.408]
  [-16.857  22.298 642.484]
  ...
  [-28.943  39.391 574.093]
  [-31.744  37.804 575.741]
  [-26.276  43.09  573.911]]]
