In [3]:
pip install biopython


Collecting biopython
  Downloading biopython-1.85-cp312-cp312-win_amd64.whl.metadata (13 kB)
Downloading biopython-1.85-cp312-cp312-win_amd64.whl (2.8 MB)
   ---------------------------------------- 0.0/2.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.8 MB 165.2 kB/s eta 0:00:17
   ---------------------------------------- 0.0/2.8 MB 220.2 kB/s eta 0:00:13
   ---------------------------------------- 0.0/2.8 MB 220.2 kB/s eta 0:00:13
    --------------------------------------- 0.1/2.8 MB 299.4 kB/s eta 0:00:10
   - -------------------------------------- 0.1/2.8 MB 327.7 kB/s eta 0:00:09
   - -------------------------------------- 0.1/2.8 MB 327.7 kB/s eta 0:00:09
   - -------------------------------------- 0.1/2.8 MB 281.0 kB/s eta 0:00:10
   - -------------------------------------- 0.1/2.8 MB 313.8 kB/s eta 0:00:09
   -- ------------------------------------- 0.2/2.8 MB 446.4 kB/s eta 0:00:06
   --

In [5]:
pip install requests




In [9]:
import os
import time
import tkinter as tk
from tkinter import filedialog, messagebox
from Bio import Entrez
import requests

# -------- Configuration --------
Entrez.email = "m.i.2002.pk@gmail.com"  # Replace with your actual email

# -------- Data Fetching Functions --------

def fetch_ncbi_sequence(accession):
    try:
        handle = Entrez.efetch(db="nucleotide", id=accession, rettype="fasta", retmode="text")
        data = handle.read()
        handle.close()
        return data if data.startswith(">") else None
    except Exception:
        return None

def fetch_uniprot_protein(accession):
    try:
        url = f"https://rest.uniprot.org/uniprotkb/{accession}.fasta"
        response = requests.get(url)
        if response.status_code == 200 and response.text.startswith(">"):
            return response.text
        return None
    except Exception:
        return None

# -------- File Handling --------

def load_accession_list(filepath):
    if not os.path.exists(filepath):
        return []
    with open(filepath, 'r') as f:
        return [line.strip() for line in f if line.strip()]

def validate_accessions(accessions):
    return [acc for acc in accessions if acc.startswith(("NM_", "NP_", "NR_"))]

def create_output_dirs(base_dir):
    for sub in ["DNA", "RNA", "Protein"]:
        os.makedirs(os.path.join(base_dir, sub), exist_ok=True)

def save_fasta(acc, data, data_type, base_dir):
    filepath = os.path.join(base_dir, data_type, f"{acc}.fasta")
    with open(filepath, 'w') as f:
        f.write(data)

# -------- BioFetch Process --------

def run_biofetch(input_file, output_dir, status_label):
    accessions = load_accession_list(input_file)
    accessions = validate_accessions(accessions)

    if not accessions:
        status_label.config(text="⚠ No valid accession numbers found.", fg="orange")
        return

    create_output_dirs(output_dir)
    log = []

    for acc in accessions:
        if acc.startswith("NP_"):
            data = fetch_uniprot_protein(acc)
            data_type = "Protein"
        elif acc.startswith("NR_"):
            data = fetch_ncbi_sequence(acc)
            data_type = "RNA"
        else:
            data = fetch_ncbi_sequence(acc)
            data_type = "DNA"

        if data:
            save_fasta(acc, data, data_type, output_dir)
            log.append(f"SUCCESS: {acc} -> {data_type}")
        else:
            log.append(f"ERROR: Failed to fetch {acc}")

    log_path = os.path.join(output_dir, "fetch_log.txt")
    with open(log_path, 'w') as f:
        f.write("\n".join(log))

    status_label.config(text=f"✅ Done! Log saved at: {log_path}", fg="green")

# -------- GUI Setup --------

def select_input_file():
    file_path = filedialog.askopenfilename(filetypes=[("All Files", "*.*")])
    input_entry.delete(0, tk.END)
    input_entry.insert(0, file_path)

def select_output_dir():
    folder_path = filedialog.askdirectory()
    output_entry.delete(0, tk.END)
    output_entry.insert(0, folder_path)

def start_process():
    input_path = input_entry.get()
    output_path = output_entry.get()

    if not os.path.exists(input_path):
        status_label.config(text="❌ Input file does not exist.", fg="red")
        return
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    status_label.config(text="🔄 Fetching data...", fg="blue")
    root.after(100, lambda: run_biofetch(input_path, output_path, status_label))

# -------- GUI Layout --------

root = tk.Tk()
root.title("🧬 BioFetch Tool (Live API Version)")
root.geometry("600x320")
root.configure(bg="#f1f1f1")

title = tk.Label(root, text="BioFetch – Live Gene & Protein Sequence Extractor",
                 font=("Segoe UI", 14, "bold"), bg="#f1f1f1", fg="#333")
title.pack(pady=10)

frame = tk.Frame(root, bg="#f1f1f1")
frame.pack(pady=5)

tk.Label(frame, text="Input Accession File:", bg="#f1f1f1", font=("Segoe UI", 10)).grid(row=0, column=0, sticky="w", pady=5)
input_entry = tk.Entry(frame, width=50, font=("Segoe UI", 10))
input_entry.grid(row=0, column=1, padx=5)
tk.Button(frame, text="Browse", command=select_input_file,
          bg="#4CAF50", fg="white", font=("Segoe UI", 9)).grid(row=0, column=2)

tk.Label(frame, text="Output Folder:", bg="#f1f1f1", font=("Segoe UI", 10)).grid(row=1, column=0, sticky="w", pady=10)
output_entry = tk.Entry(frame, width=50, font=("Segoe UI", 10))
output_entry.grid(row=1, column=1, padx=5)
tk.Button(frame, text="Browse", command=select_output_dir,
          bg="#4CAF50", fg="white", font=("Segoe UI", 9)).grid(row=1, column=2)

tk.Button(root, text="Start Fetching", command=start_process,
          bg="#2196F3", fg="white", font=("Segoe UI", 11)).pack(pady=15)

status_label = tk.Label(root, text="", font=("Segoe UI", 10, "italic"), bg="#f1f1f1")
status_label.pack()

root.mainloop()
