# Download GenBank Files for Each Accession Number

Use Biopython and Entrez to batch download all the GenBank files using the accession column in metadata.csv file.

### Import required libraries

In [1]:
from Bio import Entrez, SeqIO
import pandas as pd
import time

### Entrez access for NCBI

In [2]:
Entrez.email = "achshah.rm@iu-study.org"

### Load metadata

In [3]:
df = pd.read_csv("/Users/achshahrm/Documents/dengue_mutation_prediction/data/metadata.csv")
accessions = df["Accession"].dropna().unique()

In [4]:
df.head(5)

Unnamed: 0,Accession,GenBank_RefSeq,Organism_Name,Length,Country,Host,Tissue_Specimen_Source,Submitters,Organization,Org_location,Publications,Collection_Date
0,PV554916,GenBank,dengue virus type 2,10488,India,Homo sapiens,blood,"Singh,P., Negi,S., Sharma,K., Khare,R., Bharga...","Department of Microbiology, All India Institut...",India,,2024-09-30
1,PV554917,GenBank,dengue virus type 2,10488,India,Homo sapiens,blood,"Singh,P., Negi,S., Sharma,K., Khare,R., Bharga...","Department of Microbiology, All India Institut...",India,,2024-09-30
2,PV554918,GenBank,dengue virus type 2,10512,India,Homo sapiens,blood,"Singh,P., Negi,S., Sharma,K., Khare,R., Bharga...","Department of Microbiology, All India Institut...",India,,2024-10-01
3,PV554919,GenBank,dengue virus type 3,10482,India,Homo sapiens,blood,"Singh,P., Negi,S., Sharma,K., Khare,R., Bharga...","Department of Microbiology, All India Institut...",India,,2024-08-30
4,PV554920,GenBank,dengue virus type 2,10488,India,Homo sapiens,blood,"Singh,P., Negi,S., Sharma,K., Khare,R., Bharga...","Department of Microbiology, All India Institut...",India,,2024-08-30


### Download and save GenBank files

In [5]:
output_path = "/Users/achshahrm/Documents/dengue_mutation_prediction/data/indian_denv_genomes.gb"

with open(output_path, "w") as outfile:
    for i, acc in enumerate(accessions):
        try:
            print(f"Fetching {acc} ({i+1}/{len(accessions)})")
            handle = Entrez.efetch(db="nucleotide", id=acc, rettype="gb", retmode="text")
            gb_record = handle.read()
            outfile.write(gb_record + "\n\n")
            handle.close()
            time.sleep(0.5)
        except Exception as e:
            print(f"Error fetching {acc}: {e}")

Fetching PV554916 (1/411)
Fetching PV554917 (2/411)
Fetching PV554918 (3/411)
Fetching PV554919 (4/411)
Fetching PV554920 (5/411)
Fetching PV554921 (6/411)
Fetching PV554922 (7/411)
Fetching PV554923 (8/411)
Fetching PV554924 (9/411)
Fetching PV554925 (10/411)
Fetching PV554926 (11/411)
Fetching PV554927 (12/411)
Fetching PV554928 (13/411)
Fetching PV554929 (14/411)
Fetching PV554930 (15/411)
Fetching PV554931 (16/411)
Fetching PV554932 (17/411)
Fetching PV554933 (18/411)
Fetching PV344212 (19/411)
Fetching PV344214 (20/411)
Fetching PV344215 (21/411)
Fetching PV344225 (22/411)
Fetching PV344227 (23/411)
Fetching PV290916 (24/411)
Fetching PV138434 (25/411)
Fetching PV138435 (26/411)
Fetching PV138436 (27/411)
Fetching PQ795758 (28/411)
Fetching PQ795759 (29/411)
Fetching PQ795760 (30/411)
Fetching PQ795768 (31/411)
Fetching PQ795769 (32/411)
Fetching PQ795770 (33/411)
Fetching PQ795771 (34/411)
Fetching PQ795774 (35/411)
Fetching PQ795775 (36/411)
Fetching PQ795776 (37/411)
Fetching P