<a href="https://colab.research.google.com/github/Chandan0731/bioinformatics_lab/blob/main/exp_5(primer_design).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (13 kB)
Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.86


In [4]:
from Bio import Entrez
import os

# 1. Identity
Entrez.email = "chandanksshetty@gmail.com"
print("Searching NCBI for Cytochrome Oxidase Accession Number...")

search_term = "Homo sapiens[Organism] AND COX1[Gene] AND RefSeq"
handle = Entrez.esearch(db="nucleotide", term=search_term, retmax=1, sort="relevance")
record = Entrez.read(handle)
handle.close()

# 3. Extract the Accession ID automatically
if len(record["IdList"]) > 0:
    target_id = record["IdList"][0]
    print(f"✅ Success! Found Accession ID: {target_id}")

Searching NCBI for Cytochrome Oxidase Accession Number...
✅ Success! Found Accession ID: 2194973615


In [5]:
# Block 1B: Fetch and Save Sequence
import os

# Ensure target_id exists from the previous block
if 'target_id' in locals():
    print(f"Downloading sequence for ID: {target_id}...")

    try:
        # Fetch data using the ID we found
        net_handle = Entrez.efetch(db="nucleotide", id=target_id, rettype="fasta", retmode="text")
        seq_data = net_handle.read()
        net_handle.close()

        # Save to file
        filename = "cytochrome_oxidase.fasta"
        with open(filename, "w") as f:
            f.write(seq_data)

        print(f"✅ Sequence saved to '{filename}'. You can now run Block 2.")
    except Exception as e:
        print(f"❌ Error fetching data: {e}")
else:
    print("❌ Error: 'target_id' is missing. Please re-run Block 1A.")

Downloading sequence for ID: 2194973615...
✅ Sequence saved to 'cytochrome_oxidase.fasta'. You can now run Block 2.


In [6]:
# Block 2: Load and Verify Sequence
def read_fasta(filename):
    with open(filename, 'r') as f:
        descriptor = f.readline().rstrip()
        seq = ''
        for line in f:
            seq += line.rstrip()
    return descriptor, seq

if os.path.exists("cytochrome_oxidase.fasta"):
    descriptor, sequence_str = read_fasta("cytochrome_oxidase.fasta")
    print(f"Description: {descriptor}")
    print(f"Total Length: {len(sequence_str)} base pairs")
else:
    print("File not found. Please re-run Block 1B.")

Description: >NC_060933.1 Homo sapiens isolate CHM13 chromosome 9, alternate assembly T2T-CHM13v2.0
Total Length: 150617247 base pairs


In [9]:
# Block 3: Restriction Mapping with Neat Table Output
import re
import pandas as pd  # Library for creating nice tables

def find_restriction_sites(seq, recognition_seq):
    sites = []
    for match in re.finditer(recognition_seq, seq):
        sites.append(match.start() + 1)
    return sites

print("Analyzing Cytochrome Oxidase for Restriction Sites...\n")

enzymes = {
    'HindIII': 'AAGCTT',
    'EcoRI':   'GAATTC',
    'KpnI':    'GGTACC',
    'BamHI':   'GGATCC',
    'NotI':    'GCGGCCGC',
    'XhoI':    'CTCGAG'
}

# 1. Collect Data
results = []
for name, pattern in enzymes.items():
    cuts = find_restriction_sites(sequence_str, pattern)

    # Check if it cuts or not
    status = "Cuts Found" if len(cuts) > 0 else "No Cuts"

    # Format the positions nicely (e.g., "254, 890")
    if len(cuts) > 0:
        positions_str = ", ".join(map(str, cuts))
    else:
        positions_str = "-"

    # Add row to results
    results.append({
        "Enzyme": name,
        "Recognition Seq": pattern,
        "Status": status,
        "Number of Cuts": len(cuts),
        "Cut Positions (bp)": positions_str
    })

# 2. Create DataFrame (Table)
df = pd.DataFrame(results)

# 3. Display the Table
# We use 'display()' which works beautifully in Colab
from IPython.display import display
display(df)

# Optional: Save table to CSV if you want to download it
df.to_csv("restriction_map_results.csv", index=False)
print("\n✅ Table generated and saved as 'restriction_map_results.csv'")

Analyzing Cytochrome Oxidase for Restriction Sites...



Unnamed: 0,Enzyme,Recognition Seq,Status,Number of Cuts,Cut Positions (bp)
0,HindIII,AAGCTT,Cuts Found,34630,"9922, 18485, 21895, 24345, 26071, 26692, 31670..."
1,EcoRI,GAATTC,Cuts Found,35632,"21888, 23805, 26130, 30859, 42680, 47272, 6054..."
2,KpnI,GGTACC,Cuts Found,12675,"4053, 31173, 39789, 52486, 59599, 68615, 87121..."
3,BamHI,GGATCC,Cuts Found,15251,"7479, 11867, 17528, 26107, 32263, 49943, 50373..."
4,NotI,GCGGCCGC,Cuts Found,479,"23087, 89381, 342053, 971980, 978656, 983019, ..."
5,XhoI,CTCGAG,Cuts Found,9143,"14146, 41607, 42647, 68292, 87393, 124879, 135..."



✅ Table generated and saved as 'restriction_map_results.csv'
