<a href="https://colab.research.google.com/github/Chandan0731/bioinformatics_lab/blob/main/Experiment_6_Restriction_Mapping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install biopython matplotlib



In [4]:
from Bio import Entrez
import re

# 1. Setup Identity
Entrez.email = "chandanksshetty@gmail.com"

# 2. AUTOMATED SEARCH
print("Searching NCBI for Cytochrome Oxidase (COX1) Accession ID...")
search_term = "Homo sapiens[Organism] AND COX1[Gene] AND RefSeq"
handle = Entrez.esearch(db="nucleotide", term=search_term, retmax=1, sort="relevance")
record = Entrez.read(handle)
handle.close()

# 3. Download and Analyze
if len(record["IdList"]) > 0:
    target_id = record["IdList"][0]
    print(f"✅ Found Accession ID: {target_id}")

    print(f"Downloading sequence...")
    try:
        net_handle = Entrez.efetch(db="nucleotide", id=target_id, rettype="fasta", retmode="text")
        seq_data = net_handle.read()
        net_handle.close()

        # Clean sequence
        seq_parts = seq_data.split('\n')
        sequence_str = "".join(seq_parts[1:])
        print(f"✅ Sequence Loaded Successfully: {len(sequence_str)} base pairs.")

        # --- PERFORM ANALYSIS ---
        def find_cuts(seq, pattern):
            # Find all matches, but strictly slice the list to the first 25
            all_cuts = [m.start() + 1 for m in re.finditer(pattern, seq)]
            return all_cuts[:25]

        enzymes = {
            'HindIII': 'AAGCTT',
            'EcoRI':   'GAATTC',
            'KpnI':    'GGTACC',
            'BamHI':   'GGATCC',
            'XhoI':    'CTCGAG'
        }

        # Store results for Block 2
        map_data = {}
        print("\n--- Analysis Results (Restricted to first 25 cuts) ---")
        for name, pattern in enzymes.items():
            cuts = find_cuts(sequence_str, pattern)
            map_data[name] = cuts

            if cuts:
                print(f"✂️ {name}: {len(cuts)} cut(s) found. Positions: {cuts}")
            else:
                print(f"• {name} does not cut.")

        print("\n✅ Data ready for plotting. Run Block 2.")

    except Exception as e:
        print(f"❌ Error fetching sequence: {e}")
        sequence_str = ""
else:
    print("❌ Auto-search failed to find an ID.")
    sequence_str = ""

Searching NCBI for Cytochrome Oxidase (COX1) Accession ID...
✅ Found Accession ID: 2194973615
Downloading sequence...
✅ Sequence Loaded Successfully: 150617247 base pairs.

--- Analysis Results (Restricted to first 25 cuts) ---
✂️ HindIII: 25 cut(s) found. Positions: [9922, 18485, 21895, 24345, 26071, 26692, 31670, 32287, 32709, 33173, 43813, 47524, 47702, 51558, 54341, 56562, 58027, 59826, 62117, 71597, 78297, 80258, 81451, 85371, 93835]
✂️ EcoRI: 25 cut(s) found. Positions: [21888, 23805, 26130, 30859, 42680, 47272, 60544, 66060, 73311, 75050, 75883, 88284, 91809, 94210, 97749, 106736, 113445, 115487, 116953, 120435, 121738, 122021, 122782, 129109, 137429]
✂️ KpnI: 25 cut(s) found. Positions: [4053, 31173, 39789, 52486, 59599, 68615, 87121, 93264, 117763, 120406, 121821, 134076, 135896, 137458, 138663, 154581, 178720, 180297, 184329, 206740, 215078, 218071, 225303, 226501, 229981]
✂️ BamHI: 25 cut(s) found. Positions: [7479, 11867, 17528, 26107, 32263, 49943, 50373, 55945, 65019, 666

In [10]:
# Block 2: Modern Interactive Restriction Map (Plotly)
import plotly.graph_objects as go

def plot_interactive_map(sequence_length, enzymes_data):
    fig = go.Figure()

    # 1. Draw the DNA Backbone (The main line)
    fig.add_trace(go.Scatter(
        x=[0, sequence_length], y=[0, 0],
        mode='lines',
        line=dict(color='black', width=4),
        name='DNA Sequence',
        hoverinfo='skip'
    ))

    # 2. Add Enzyme Cuts
    colors = ['#FF5733', '#33FF57', '#3357FF', '#FF33A1', '#FF8F33'] # Modern neon palette

    for i, (enzyme_name, cut_sites) in enumerate(enzymes_data.items()):
        if not cut_sites:
            continue

        color = colors[i % len(colors)]

        # Create Y-axis offsets so enzymes have their own "lanes"
        # This prevents overlapping completely
        y_lane = 0.5 + (i * 0.3)

        # Add the vertical cut lines
        fig.add_trace(go.Scatter(
            x=cut_sites,
            y=[y_lane] * len(cut_sites),
            mode='markers+text',
            name=enzyme_name,
            marker=dict(symbol='line-ns-open', size=25, color=color, line=dict(width=3)),
            text=[f"{enzyme_name}" for _ in cut_sites],
            textposition="top center",
            hovertemplate=f"<b>{enzyme_name}</b><br>Position: %{{x}}<extra></extra>"
        ))

        # Add 'Drop lines' connecting the marker to the DNA backbone
        # (This draws the faint line down to the main black line)
        for site in cut_sites:
            fig.add_shape(type="line",
                x0=site, y0=0, x1=site, y1=y_lane,
                line=dict(color=color, width=1, dash="dot")
            )

    # 3. Modern Layout Settings
    fig.update_layout(
        title=dict(text="<b> Restriction Map: Cytochrome Oxidase (COX1)</b>", font=dict(size=20)),
        xaxis_title="Base Pair Position",
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-0.5, 2.5]),
        xaxis=dict(showgrid=True, gridcolor='#f0f0f0'),
        plot_bgcolor='white',
        height=600,
        hovermode="closest",
        legend=dict(title="Enzymes (Click to Toggle)"),
    )

    fig.show()

# Run the interactive plotter
if 'sequence_str' in locals() and len(sequence_str) > 0:
    print("Generating Interactive Map... (Hover over points!)")
    plot_interactive_map(len(sequence_str), map_data)
else:
    print("❌ No sequence data found. Please run Block 1 first.")

Generating Interactive Map... (Hover over points!)
