In [None]:
from Bio import SeqIO
# Load the GenBank file
record = SeqIO.read("sequence.gb", "genbank")

In [3]:
# Organism Name
organism = record.annotations["organism"]
print("Organism:", organism)

# Sequence
sequence = record.seq
print("Sequence (first 100 bases):", sequence[:100])

# Sequence Length
print("Sequence Length:", len(sequence))

Organism: Pseudoterranova decipiens
Sequence (first 100 bases): GGAACCATTATGCACTCTTCAATAGTTTTGGCCACCGTGCTCTTTGTAGCGATTGCTTCAGCATCAAAAAGTAAGTTTCACCTCCCTCCCCCCCCCCTTT
Sequence Length: 2765


In [4]:
# Loop through all annotated features in the sequence
# We're interested in CDS (coding sequence) features
for feature in record.features:
    if feature.type == "CDS":
        # Extract gene name from the qualifiers dictionary
        # If "gene" is not present, use "-" as default
        gene = feature.qualifiers.get("gene", ["-"])[0]

        # Extract product (protein name) from qualifiers
        product = feature.qualifiers.get("product", ["-"])[0]

        # Extract location of the gene (start-end coordinates)
        location = feature.location

        # Print the gene information
        print(f"Gene: {gene}\n  Product: {product}\n  Location: {location}\n")

Gene: -
  Product: hemoglobin
  Location: join{[9:70](+), [255:343](+), [508:606](+), [803:937](+), [1192:1319](+), [1593:1681](+), [2017:2423](+)}



In [6]:
import csv

with open("cds_features.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Gene", "Product", "Start", "End"])
    
    for feature in record.features:
        if feature.type == "CDS":
            gene = feature.qualifiers.get("gene", ["-"])[0]
            product = feature.qualifiers.get("product", ["-"])[0]
            start = int(feature.location.start)
            end = int(feature.location.end)
            writer.writerow([gene, product, start, end])