# Xist Exon vs Intron Comparative Analysis Pipeline (Multi-Rep)

This notebook orchestrates the analysis of three Nanopore amplicon datasets (Xist Exon Rep1, Exon Rep2, and Intron) and compares their allele-specific expression trends.

## 1. Setup and Paths

In [None]:
import os

# Define Dataset Paths
BASE_DIR = "/Volumes/guttman/users/gmgao/Data_seq"
DATA_EXON1 = os.path.join(BASE_DIR, "20251222-DoxSeqRep1-XistEx-Rep1")
DATA_EXON2 = os.path.join(BASE_DIR, "20251222-DoxSeqRep1-XistEx-Rep2")
DATA_INTRON = os.path.join(BASE_DIR, "20251222-DoxSeqRep1-XistIn")

# Define Results Paths
RESULTS_ROOT = os.path.join(BASE_DIR, "results")
RESULTS_EXON1 = os.path.join(RESULTS_ROOT, "XistEx_Rep1")
RESULTS_EXON2 = os.path.join(RESULTS_ROOT, "XistEx_Rep2")
RESULTS_INTRON = os.path.join(RESULTS_ROOT, "XistIn")
RESULTS_COMP = os.path.join(RESULTS_ROOT, "comparison")

print(f"Results will be saved to: {RESULTS_ROOT}")

## 2. Process Xist Exon Rep1

In [None]:
!python scripts/initialize_reference.py --results_dir {RESULTS_EXON1} --f_primer AC_XistExAmp_5SNPs-F --r_primer AC_XistExAmp_5SNPs-R
!python scripts/fastq_qc.py --data_dir {DATA_EXON1} --results_dir {RESULTS_EXON1}
!python scripts/align_reads.py --data_dir {DATA_EXON1} --results_dir {RESULTS_EXON1}
!python scripts/alignment_stats.py --results_dir {RESULTS_EXON1}
!python scripts/quantify_alleles.py --results_dir {RESULTS_EXON1}
!python scripts/analyze_stoichiometry.py --results_dir {RESULTS_EXON1}
!python scripts/generate_reports.py --results_dir {RESULTS_EXON1}

## 3. Process Xist Exon Rep2

In [None]:
!python scripts/initialize_reference.py --results_dir {RESULTS_EXON2} --f_primer AC_XistExAmp_5SNPs-F --r_primer AC_XistExAmp_5SNPs-R
!python scripts/fastq_qc.py --data_dir {DATA_EXON2} --results_dir {RESULTS_EXON2}
!python scripts/align_reads.py --data_dir {DATA_EXON2} --results_dir {RESULTS_EXON2}
!python scripts/alignment_stats.py --results_dir {RESULTS_EXON2}
!python scripts/quantify_alleles.py --results_dir {RESULTS_EXON2}
!python scripts/analyze_stoichiometry.py --results_dir {RESULTS_EXON2}
!python scripts/generate_reports.py --results_dir {RESULTS_EXON2}

## 4. Process Xist Intron (with SNP Omission)

In [None]:
!python scripts/initialize_reference.py --results_dir {RESULTS_INTRON} --f_primer GG_XistInAmp1_4SNPs-F --r_primer GG_XistInAmp1_4SNPs-R
!python scripts/fastq_qc.py --data_dir {DATA_INTRON} --results_dir {RESULTS_INTRON}
!python scripts/align_reads.py --data_dir {DATA_INTRON} --results_dir {RESULTS_INTRON}
!python scripts/alignment_stats.py --results_dir {RESULTS_INTRON}
!python scripts/quantify_alleles.py --results_dir {RESULTS_INTRON} --omit_snps 2 --min_matches 3
!python scripts/analyze_stoichiometry.py --results_dir {RESULTS_INTRON} --omit_snps 2
!python scripts/generate_reports.py --results_dir {RESULTS_INTRON} --omit_snps 2

## 5. Compare Datasets
Aggregate results in the order: Exon1, Exon2, Intron.

In [None]:
!python scripts/compare_datasets.py --results_dirs {RESULTS_EXON1},{RESULTS_EXON2},{RESULTS_INTRON} --labels ExonRep1,ExonRep2,Intron --output_dir {RESULTS_COMP}

print(f"\nFull comparative analysis complete! Final report at: {os.path.join(RESULTS_COMP, 'Comparative_Analysis_Report.md')}")