<a href="https://colab.research.google.com/github/Chanchreek/DAA-AIML-Hons.-B1-B2-B3/blob/main/PRAD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🧬 MarkoMutate: Real-Time DNA Mutation Simulator

# Step 1: Install and import libraries
!pip install biopython imageio

import os
import random
from Bio import SeqIO
from collections import defaultdict
import matplotlib.pyplot as plt
import imageio
from IPython.display import Video

# Step 2: Load DNA sequences from FASTA (or use sample)
def load_fasta(file_path):
    return [str(record.seq).upper().replace("N", "") for record in SeqIO.parse(file_path, "fasta")]

# SAMPLE fallback sequence if no FASTA is uploaded
sample_sequence = ["ATGCGTACGTTAGCGTAGCTAGCGTACGTAGCGTACGTTAGCGTACGTAGCGTACGTAGCGTACGTAG"]

# Step 3: Build a k-th order Markov Chain
def build_markov_chain(sequences, order=1):
    chain = defaultdict(lambda: defaultdict(int))
    for seq in sequences:
        for i in range(len(seq) - order):
            prev = seq[i:i+order]
            next_char = seq[i+order]
            chain[prev][next_char] += 1
    prob_chain = {}
    for prev, nexts in chain.items():
        total = sum(nexts.values())
        prob_chain[prev] = {k: v/total for k, v in nexts.items()}
    return prob_chain

# Step 4: Simulate mutation over time
def mutate_sequence(chain, initial_seq, steps=50, mutation_rate=0.01, order=2):
    sequences = [initial_seq]
    curr_seq = initial_seq

    for _ in range(steps):
        mutated_seq = ""
        for i in range(len(curr_seq)):
            if random.random() < mutation_rate:
                context = curr_seq[max(i-order, 0):i]
                if len(context) < order:
                    context = curr_seq[0:order]
                next_probs = chain.get(context, {'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25})
                mutated_seq += random.choices(list(next_probs.keys()), weights=next_probs.values())[0]
            else:
                mutated_seq += curr_seq[i]
        sequences.append(mutated_seq)
        curr_seq = mutated_seq
    return sequences

# Step 5: Visualize each sequence as colored bar frame
def create_frame(seq, frame_num, out_dir="frames"):
    os.makedirs(out_dir, exist_ok=True)
    color_map = {'A': 'green', 'T': 'red', 'G': 'blue', 'C': 'orange'}
    fig, ax = plt.subplots(figsize=(len(seq)/10, 1))
    for i, base in enumerate(seq):
        ax.bar(i, 1, color=color_map.get(base, 'gray'), edgecolor='black')
    ax.set_xlim(0, len(seq))
    ax.axis('off')
    plt.tight_layout()
    plt.savefig(f"{out_dir}/frame_{frame_num:03d}.png")
    plt.close()

# Step 6: Generate video from frames
def generate_video_from_frames(frame_dir="frames", output_video="mutation_sim.mp4", fps=5):
    frames = []
    files = sorted([f for f in os.listdir(frame_dir) if f.endswith('.png')])
    for file in files:
        image = imageio.imread(os.path.join(frame_dir, file))
        frames.append(image)
    imageio.mimsave(output_video, frames, fps=fps)

# Step 7: Upload your own FASTA file (Optional)
from google.colab import files
uploaded = files.upload()  # Upload .fasta if you have it

if uploaded:
    fasta_file = list(uploaded.keys())[0]
    sequences = load_fasta(fasta_file)
else:
    sequences = sample_sequence

# Step 8: Run the simulation
initial_seq = sequences[0][:100]  # Trim for visualization
order = 2
mutation_steps = 50
mutation_rate = 0.03

chain = build_markov_chain(sequences, order=order)
mutation_series = mutate_sequence(chain, initial_seq, steps=mutation_steps, mutation_rate=mutation_rate, order=order)

# Generate image frames
for i, seq in enumerate(mutation_series):
    create_frame(seq, i, out_dir="frames")

# Generate mutation video
generate_video_from_frames(output_video="mutation_sim.mp4", fps=5)

# Display the video inline
Video("mutation_sim.mp4", embed=True)


Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


  image = imageio.imread(os.path.join(frame_dir, file))


In [None]:
# 🧬 MarkoMutate+: Real‐Time DNA Mutation Simulator with Double‑Helix Animation

# Step 0️⃣: Install dependencies
!pip install biopython imageio matplotlib

# Step 1️⃣: Imports
import os, random
import numpy as np
import matplotlib.pyplot as plt
import imageio
from Bio import SeqIO, Entrez
from collections import defaultdict
from IPython.display import Video

# Step 2️⃣: Fetch a real DNA sequence from NCBI (Human mito.)
Entrez.email = "chanchreekjainsanmati@gmail.com"  # ← change to your email
with Entrez.efetch(db="nuccore", id="NC_012920", rettype="fasta", retmode="text") as handle:
    record = next(SeqIO.parse(handle, "fasta"))
full_seq = str(record.seq).upper().replace("N", "")
print(f"Fetched {len(full_seq)} bp from {record.id}")

# Trim to a manageable length for visualization
initial_seq = full_seq[:300]

# Step 3️⃣: Build a k‑order (k=2) Markov Chain
def build_markov_chain(seqs, order=2):
    M = defaultdict(lambda: defaultdict(int))
    for s in seqs:
        for i in range(len(s)-order):
            ctx = s[i:i+order]
            nxt = s[i+order]
            M[ctx][nxt] += 1
    # normalize
    chain = {}
    for ctx, nxts in M.items():
        total = sum(nxts.values())
        chain[ctx] = {b: cnt/total for b, cnt in nxts.items()}
    return chain

chain = build_markov_chain([full_seq], order=2)

# Step 4️⃣: Mutation simulation (100 steps, μ=0.005)
def mutate_over_time(chain, seq, steps=100, μ=0.005, order=2):
    series = [seq]
    curr = seq
    for _ in range(steps):
        new = []
        for i, base in enumerate(curr):
            if random.random() < μ:
                # pick replacement via Markov context (or uniform fallback)
                ctx = curr[max(0,i-order):i]
                if len(ctx)<order: ctx = curr[:order]
                probs = chain.get(ctx, {'A':.25,'C':.25,'G':.25,'T':.25})
                new.append(random.choices(list(probs), weights=probs.values())[0])
            else:
                new.append(base)
        curr = "".join(new)
        series.append(curr)
    return series

mutation_series = mutate_over_time(chain, initial_seq, steps=100, μ=0.005, order=2)

# Step 5️⃣: Double‑helix frame generator
base_color = {'A':'#2ca02c', 'T':'#d62728', 'G':'#1f77b4', 'C':'#ff7f0e'}
comp = {'A':'T','T':'A','G':'C','C':'G'}
helix_pitch = 10  # bases per full turn

def create_double_helix_frame(seq, idx, out="frames"):
    os.makedirs(out, exist_ok=True)
    n = len(seq)
    y = np.arange(n)
    x1 = np.sin(2*np.pi*y/helix_pitch)
    x2 = -x1

    fig, ax = plt.subplots(figsize=(4,8))
    # strands
    ax.plot(x1, y, color='black', lw=1)
    ax.plot(x2, y, color='black', lw=1)
    # rungs
    for i, b in enumerate(seq):
        ax.plot([x1[i], x2[i]], [y[i], y[i]],
                color=base_color.get(b,'gray'), lw=2)
    ax.set_xlim(-1.2, 1.2)
    ax.set_ylim(0, n)
    ax.axis('off')
    plt.tight_layout()
    plt.savefig(f"{out}/frame_{idx:03d}.png", dpi=100)
    plt.close()

# Generate all frames
for i, seq in enumerate(mutation_series):
    create_double_helix_frame(seq, i)

# Step 6️⃣: Stitch into video
frames = sorted([f for f in os.listdir("frames") if f.endswith(".png")])
imgs = [imageio.imread(os.path.join("frames",f)) for f in frames]
imageio.mimsave("mutation_sim.mp4", imgs, fps=10)

# Display inline
Video("mutation_sim.mp4", embed=True, width=400, height=800)


Fetched 16568 bp from NC_012920.1


  imgs = [imageio.imread(os.path.join("frames",f)) for f in frames]


In [None]:
# 🧬 MarkoMutate Realistic 3D DNA Mutation Simulator with Explanatory Visualization

# Step 0: Install dependencies
!pip install biopython imageio

# Step 1: Imports
import os
import random
import numpy as np
import imageio
import matplotlib.pyplot as plt
from Bio import Entrez, SeqIO
from collections import defaultdict
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import Video

# Step 2: Fetch real DNA (human mitochondrial genome) from NCBI
Entrez.email = "chanchreekjainsanmati@gmail.com"  # replace with your email
with Entrez.efetch(db="nuccore", id="NC_012920", rettype="fasta", retmode="text") as handle:
    record = next(SeqIO.parse(handle, "fasta"))
full_seq = str(record.seq).upper().replace("N", "")
print(f"Fetched {len(full_seq)} bp from {record.id}")

# Use a realistic segment for visualization
initial_seq = full_seq[:150]  # 150 bp for 3D clarity

# Step 3: Build 2nd-order Markov Chain
def build_markov_chain(seqs, order=2):
    M = defaultdict(lambda: defaultdict(int))
    for s in seqs:
        for i in range(len(s) - order):
            ctx = s[i:i+order]
            nxt = s[i+order]
            M[ctx][nxt] += 1
    chain = {}
    for ctx, nxts in M.items():
        total = sum(nxts.values())
        chain[ctx] = {b: count/total for b, count in nxts.items()}
    return chain

chain = build_markov_chain([full_seq], order=2)

# Step 4: Simulate mutations over time
def simulate_mutations(chain, seq, steps=100, mutation_rate=0.005, order=2):
    series = [seq]
    curr = seq
    for step in range(steps):
        new_seq = []
        for i, base in enumerate(curr):
            if random.random() < mutation_rate:
                ctx = curr[max(0, i-order):i]
                if len(ctx) < order:
                    ctx = curr[:order]
                probs = chain.get(ctx, {'A':0.25,'C':0.25,'G':0.25,'T':0.25})
                new_seq.append(random.choices(list(probs.keys()), weights=probs.values())[0])
            else:
                new_seq.append(base)
        curr = "".join(new_seq)
        series.append(curr)
    return series

mutation_series = simulate_mutations(chain, initial_seq, steps=100, mutation_rate=0.005)

# Step 5: Create realistic 3D double-helix frames with explanatory text
base_color = {'A':'#76c043', 'T':'#e63946', 'G':'#457b9d', 'C':'#f4a261'}
helix_turns = 4
n = len(initial_seq)
t = np.linspace(0, helix_turns * 2 * np.pi, n)
z = np.linspace(0, 1, n)

def create_3d_frame(seq, idx, out_dir="frames3d"):
    os.makedirs(out_dir, exist_ok=True)
    fig = plt.figure(figsize=(6,6))
    ax = fig.add_subplot(111, projection='3d')
    # Compute strand coordinates
    x1 = np.cos(t)
    y1 = np.sin(t)
    x2 = np.cos(t + np.pi)
    y2 = np.sin(t + np.pi)
    # Plot strands
    ax.plot(x1, y1, z, color='black', lw=1)
    ax.plot(x2, y2, z, color='black', lw=1)
    # Plot base pair connectors
    for i, b in enumerate(seq):
        ax.plot([x1[i], x2[i]], [y1[i], y2[i]], [z[i], z[i]],
                color=base_color.get(b, 'gray'), lw=2)
    # Remove axes for clarity
    ax.set_axis_off()
    ax.view_init(elev=30, azim=45)
    # Add explanatory annotation on the figure
    annotation = (
        f"Step {idx}: Real-time mutation simulation\n"
        "Small base changes accumulate to drive variation.\n"
        "Important for understanding evolution, disease,\n"
        "and biotech innovation."
    )
    fig.text(0.02, 0.02, annotation, fontsize=10, color='black',
             bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    plt.savefig(f"{out_dir}/frame_{idx:03d}.png", dpi=100)
    plt.close()

# Generate frames
for idx, seq in enumerate(mutation_series):
    create_3d_frame(seq, idx)

# Step 6: Stitch frames into video
frames = sorted([f for f in os.listdir("frames3d") if f.endswith(".png")])
video_frames = [imageio.imread(os.path.join("frames3d", f)) for f in frames]
imageio.mimsave("mutation_sim_3d.mp4", video_frames, fps=10)

# Display the video inline
Video("mutation_sim_3d.mp4", embed=True, width=500, height=500)


Fetched 16568 bp from NC_012920.1


  video_frames = [imageio.imread(os.path.join("frames3d", f)) for f in frames]


In [10]:
# Updated: Interactive DNA Mutation Simulator with Proper Event Handling and μ Display

# Step 0: Install dependencies
!pip install biopython imageio ipywidgets

# Step 1: Imports
import os
import shutil
import random
import numpy as np
import imageio
import matplotlib.pyplot as plt
from Bio import Entrez, SeqIO
from Bio.Seq import Seq
from collections import defaultdict
from mpl_toolkits.mplot3d import Axes3D
from IPython.display import Video, clear_output
import ipywidgets as widgets

# Step 2: Fetch real DNA (human mitochondrial genome) from NCBI
Entrez.email = "chanchreekjainsanmati@gmail.com"
with Entrez.efetch(db="nuccore", id="NC_012920", rettype="fasta", retmode="text") as handle:
    record = next(SeqIO.parse(handle, "fasta"))
full_seq = str(record.seq).upper().replace("N", "")
initial_seq = full_seq[:200]  # 200 bp for clarity

# Step 3: Build 2nd-order Markov Chain once
def build_chain(seq, order=2):
    M = defaultdict(lambda: defaultdict(int))
    for i in range(len(seq) - order):
        ctx = seq[i:i+order]
        nxt = seq[i+order]
        M[ctx][nxt] += 1
    return {ctx: {b:cnt/sum(nxts.values()) for b,cnt in nxts.items()} for ctx,nxts in M.items()}

chain = build_chain(full_seq, order=2)

# Step 4: Mutation simulator
def simulate(chain, seq, steps, μ, order=2):
    series = [seq]
    curr = seq
    for _ in range(steps):
        new = []
        for i, b in enumerate(curr):
            if random.random() < μ:
                ctx = curr[max(0, i-order):i]
                if len(ctx) < order:
                    ctx = curr[:order]
                probs = chain.get(ctx, {'A':.25,'C':.25,'G':.25,'T':.25})
                new.append(random.choices(list(probs), weights=probs.values())[0])
            else:
                new.append(b)
        curr = "".join(new)
        series.append(curr)
    return series

# Step 5: 3D double-helix frame
base_color = {'A':'#76c043','T':'#e63946','G':'#457b9d','C':'#f4a261'}
n = 200
turns = 5
t = np.linspace(0, turns * 2 * np.pi, n)
z = np.linspace(0, 1, n)

def frame3d(seq, idx, out="frames_i"):
    os.makedirs(out, exist_ok=True)
    fig = plt.figure(figsize=(4,4))
    ax = fig.add_subplot(111, projection='3d')
    x1, y1 = np.cos(t), np.sin(t)
    x2, y2 = np.cos(t + np.pi), np.sin(t + np.pi)
    ax.plot(x1, y1, z, color='black', lw=1)
    ax.plot(x2, y2, z, color='black', lw=1)
    for i, b in enumerate(seq):
        ax.plot([x1[i], x2[i]], [y1[i], y2[i]], [z[i], z[i]],
                color=base_color[b], lw=2)
    ax.set_axis_off()
    ax.view_init(elev=25, azim=60)
    plt.tight_layout()
    plt.savefig(f"{out}/f{idx:03d}.png", dpi=80)
    plt.close()

# Step 6: Analytics plots
def plot_stats(series):
    gc = [100*(s.count('G')+s.count('C'))/len(s) for s in series]
    muts = [sum(1 for a,b in zip(series[i], series[i+1]) if a!=b) for i in range(len(series)-1)]
    fig, ax = plt.subplots(1, 2, figsize=(8, 3))
    ax[0].plot(gc, marker='o')
    ax[0].set(title='GC% Over Time', xlabel='Step', ylabel='GC%')
    ax[1].bar(range(len(muts)), muts)
    ax[1].set(title='Mutations per Step', xlabel='Step', ylabel='Count')
    plt.tight_layout()
    plt.show()

# Step 7: Protein impact check
def protein_impact(orig, mutated):
    prot0 = Seq(orig).translate(to_stop=False)
    prot1 = Seq(mutated).translate(to_stop=False)
    diffs = [(i, prot0[i], prot1[i]) for i in range(min(len(prot0), len(prot1))) if prot0[i]!=prot1[i]]
    return diffs[:10]

# Step 8: Interactive UI with Output widget
rate_slider = widgets.FloatSlider(value=0.005, min=0, max=0.02, step=0.001, description='μ:')
steps_slider = widgets.IntSlider(value=50, min=10, max=200, step=10, description='Steps:')
seq_text = widgets.Textarea(value=initial_seq, description='Sequence:', layout=widgets.Layout(width='60%', height='80px'))
run_btn = widgets.Button(description='Run Simulation', button_style='success')
output = widgets.Output()

def on_run(b):
    with output:
        clear_output(wait=True)
        # Clear old frames
        if os.path.exists("frames_i"):
            shutil.rmtree("frames_i")
        print(f"Running simulation with mutation rate μ = {rate_slider.value} and steps = {steps_slider.value}\n")
        seq = seq_text.value.strip().upper()
        series = simulate(chain, seq, steps_slider.value, rate_slider.value)
        # Create frames
        for i, s in enumerate(series):
            frame3d(s, i)
        # Make video
        files = sorted(os.listdir("frames_i"))
        vid = [imageio.imread(os.path.join("frames_i", f)) for f in files]
        imageio.mimsave("sim_int.mp4", vid, fps=8)
        # Display results
        print("🔬 Mutation Simulator Results:")
        plot_stats(series)
        diffs = protein_impact(seq, series[-1])
        if diffs:
            print("⚠️ Protein changes (pos, orig→mut):", diffs)
        else:
            print("✅ No protein-level changes detected.")
        display(Video("sim_int.mp4", embed=True, width=400, height=400))

run_btn.on_click(on_run)
ui = widgets.VBox([widgets.HBox([rate_slider, steps_slider]), seq_text, run_btn])
display(ui, output)




VBox(children=(HBox(children=(FloatSlider(value=0.005, description='μ:', max=0.02, step=0.001), IntSlider(valu…

Output()