# GenomicsToolkit Complete Tutorial

**Quick guide to sequence analysis, variant calling, and visualization**

In [None]:
# Import everything
from genomics_toolkit import SequenceAnalyzer, VariantCaller, Visualizer
from genomics_toolkit.utils import FileHandler
import matplotlib.pyplot as plt
from pathlib import Path

# Initialize
analyzer = SequenceAnalyzer()
caller = VariantCaller()
viz = Visualizer()
fh = FileHandler()

print("✅ Ready to go!")

## 1. Basic Sequence Analysis

In [None]:
# Analyze a sequence
seq = "ATGAAAAAATTTAAAGGGCCCTATGGGGATCTAG"
stats = analyzer.basic_stats(seq)
orfs = analyzer.find_orfs(seq, min_length=9)
protein = analyzer.translate(seq)

print(f"Sequence: {seq}")
print(f"Length: {stats['length']} bp, GC: {stats['gc_content']:.1f}%")
print(f"ORFs found: {len(orfs)}")
print(f"Protein: {protein}")

## 2. FASTA File Processing

In [None]:
# Load and analyze FASTA
fasta_file = Path("../data/sample_sequences.fasta")
if fasta_file.exists():
    sequences = list(analyzer.parse_fasta(fasta_file))
    print(f"Loaded {len(sequences)} sequences")
    
    for i, (header, seq) in enumerate(sequences[:3]):  # First 3
        stats = analyzer.basic_stats(seq)
        orfs = analyzer.find_orfs(seq, min_length=50)
        print(f"Seq {i+1}: {len(seq)} bp, GC: {stats['gc_content']:.1f}%, ORFs: {len(orfs)}")

## 3. Variant Calling

In [None]:
# Simulate variants
if 'sequences' in locals() and sequences:
    ref_seq = sequences[0][1]
    variants = caller.simulate_variants(ref_seq, num_snps=20, num_indels=5)
    
    # Filter and annotate
    filtered = caller.filter_variants(min_quality=25)
    annotated = caller.annotate_variants(filtered)
    
    print(f"Total variants: {len(variants)}")
    print(f"After filtering: {len(filtered)}")
    
    # Show first few variants
    for i, var in enumerate(annotated[:3]):
        print(f"Variant {i+1}: {var.chrom}:{var.pos} {var.ref}>{var.alt} (Q={var.quality:.1f})")

## 4. Visualization

In [None]:
# Plot sequence composition
if 'sequences' in locals() and sequences:
    seq = sequences[0][1]
    composition = analyzer.composition_analysis(seq, window_size=100)
    fig = viz.plot_sequence_composition(composition)
    plt.show()

# Plot variants
if 'annotated' in locals():
    fig = viz.plot_variant_statistics(annotated)
    plt.show()

## 5. CLI Usage Examples

```bash
# Analyze sequences
genomics-toolkit analyze-sequence data/sample_sequences.fasta --report --plots

# Call variants
genomics-toolkit call-variants data/reference_genome.fasta --simulate --report

# Run complete pipeline
genomics-toolkit pipeline data/sample_reads.fastq --reference data/reference_genome.fasta
```

## 6. Export Results

In [None]:
# Create a simple report
if 'annotated' in locals():
    results = {
        'analysis_type': 'Tutorial Demo',
        'sequence_stats': [analyzer.basic_stats(seq) for _, seq in sequences[:3]],
        'variant_stats': caller.calculate_population_stats(annotated)
    }
    
    # Generate HTML report
    viz.create_html_report(results, '../output/tutorial_report.html')
    print("📄 Report saved to output/tutorial_report.html")

print("🎉 Tutorial complete! You've learned the basics of GenomicsToolkit.")