# 🧬 End-to-End Workflow: Genome Analysis for One Isolate
This notebook runs a complete analysis pipeline on a single clinical isolate of *Proteus mirabilis*, from raw FASTQ reads to AMR gene prediction.

Make sure to activate your Conda environment (`proteus_env`) before launching this notebook.


In [None]:
# 🔧 Step 1: Define file paths and sample ID
import os

sample_id = "isolate01"
read1 = f"data/{sample_id}_R1.fastq.gz"
read2 = f"data/{sample_id}_R2.fastq.gz"
assembly_dir = f"assembly/{sample_id}"
annotation_dir = f"annotation/{sample_id}"
amr_output = f"amr/{sample_id}_amrfinder.tsv"

# Create necessary output directories
os.makedirs(assembly_dir, exist_ok=True)
os.makedirs(annotation_dir, exist_ok=True)
os.makedirs("amr", exist_ok=True)

In [None]:
# 🧬 Step 2: Run SPAdes for genome assembly
%%bash
spades.py -1 data/isolate01_R1.fastq.gz -2 data/isolate01_R2.fastq.gz -o assembly/isolate01

In [None]:
# 🧬 Step 3: Annotate genome using Prokka
%%bash
prokka --outdir annotation/isolate01 --prefix isolate01 assembly/isolate01/contigs.fasta

In [None]:
# 🧬 Step 4: Run MLST typing
%%bash
mlst assembly/isolate01/contigs.fasta

In [None]:
# 🧬 Step 5: AMR gene detection using AMRFinderPlus
%%bash
amrfinder -n assembly/isolate01/contigs.fasta -o amr/isolate01_amrfinder.tsv --organism Proteus_mirabilis

In [None]:
# 📊 Step 6: Preview AMR gene output
import pandas as pd
amr_df = pd.read_csv("amr/isolate01_amrfinder.tsv", sep='\t')
amr_df.head()