In [None]:
to install run in command line py -m pip install biopython pandas

In [None]:
import pandas as pd
from Bio import Entrez, SeqIO

# Set your email (required by NCBI)
Entrez.email = "your.email@example.com"

# Function to fetch gene product label
def fetch_gene_product(accession, start, end):
    try:
        # Fetch the sequence data from NCBI
        handle = Entrez.efetch(db="nuccore", id=accession, seq_start=start, seq_stop=end, rettype="gb", retmode="text")
        record = SeqIO.read(handle, "genbank")
        handle.close()

        # Search for the CDS feature that overlaps with the position
        for feature in record.features:
            if feature.type == "CDS" and feature.location.start <= start <= feature.location.end:
                return feature.qualifiers.get('product', ['Unknown'])[0]

    except Exception as e:
        return f"Error: {str(e)}"
    
    return "No matching CDS"

# Read the CSV file
input_file = "./Data/Alignment-HitTable AY insertions.csv"  # Replace with your input file name
output_file = "output.csv"  # Name of the output file
df = pd.read_csv(input_file)

# Create a new column for gene product labels
df['gene_product'] = df.apply(lambda row: fetch_gene_product(row['accession'], row['start'], row['end']), axis=1)

# Save the annotated DataFrame to a new CSV file
df.to_csv(output_file, index=False)
print(f"Gene product labels saved to {output_file}")