## Setup and Imports

In [None]:
import sys
import os
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path.cwd().parent))

# Import our modules
from voice_enrollment import VoiceEnrollment, create_voice_database
from diarization_processor import DiarizationProcessor
from transcription import TranscriptionProcessor
from main import process_meeting_audio, quick_diarize, diarize_and_transcribe

print("✓ Imports successful")

## Configuration

Set your file paths here:

In [None]:
# ==== CONFIGURE THESE PATHS ====

# Meeting audio file to process
MEETING_AUDIO = "path/to/your/meeting.wav"

# Speaker voice samples for enrollment
SPEAKER_SAMPLES = {
    "John": ["path/to/john_sample1.wav", "path/to/john_sample2.wav"],
    "Jane": ["path/to/jane_sample.wav"],
    "Ali": ["path/to/ali_sample.wav"]
}

# Where to save speaker database
SPEAKER_DATABASE = "speakers_database.json"

# Optional: Path to custom Whisper model
WHISPER_MODEL_PATH = None  # or "path/to/whisper_medium.pt"

# Language (en, fa, ar, etc.) or None for auto-detect
LANGUAGE = "en"  # Change to "fa" for Persian

# Output directory
OUTPUT_DIR = "diarization_output"

print("✓ Configuration set")

## Step 1: Create Speaker Database

Enroll speakers by providing reference audio samples.

In [None]:
# Create voice database from samples
print("Creating speaker database...")

enrollment = create_voice_database(
    database_path=SPEAKER_DATABASE,
    speaker_samples=SPEAKER_SAMPLES
)

print(f"\n✓ Database created with {len(enrollment.get_all_speakers())} speakers")
print(f"Enrolled speakers: {enrollment.get_all_speakers()}")

## Step 2: Quick Diarization (No Transcription)

Perform speaker diarization to detect "who spoke when"

In [None]:
# Quick diarization without transcription
print("Performing diarization...\n")

result = quick_diarize(
    audio_path=MEETING_AUDIO,
    database_path=SPEAKER_DATABASE,
    output_dir=OUTPUT_DIR
)

print(f"\n{'='*60}")
print("RESULTS")
print(f"{'='*60}")
print(f"Number of speakers detected: {result['num_speakers']}")
print(f"Identified speakers: {result['identified_speakers']}")
print(f"Total segments: {len(result['segments'])}")
print(f"\nOutput files:")
for key, path in result['output_files'].items():
    print(f"  {key}: {path}")

## Step 3: View Diarization Segments

In [None]:
# Display first 10 segments
print("Diarization Segments:")
print(f"{'='*60}\n")

for i, seg in enumerate(result['segments'][:10], 1):
    start = seg['start']
    end = seg['end']
    speaker = seg['speaker']
    identified = "✓" if seg.get('identified', False) else "?"
    
    print(f"{i}. [{start:6.2f}s - {end:6.2f}s] {identified} {speaker}")

if len(result['segments']) > 10:
    print(f"\n... and {len(result['segments']) - 10} more segments")

## Step 4: Full Pipeline with Transcription

Run complete pipeline with speaker diarization + transcription

In [None]:
# Full pipeline: Diarization + Transcription
print("Running full pipeline with transcription...\n")

result_full = process_meeting_audio(
    meeting_audio_path=MEETING_AUDIO,
    voice_embeddings_database_path=SPEAKER_DATABASE,
    expected_language=LANGUAGE,
    output_transcriptions=True,
    transcriptor_model_path=WHISPER_MODEL_PATH,
    output_dir=OUTPUT_DIR + "_with_transcript"
)

print(f"\n{'='*60}")
print("COMPLETE RESULTS")
print(f"{'='*60}")
print(f"Speakers detected: {result_full['num_speakers']}")
print(f"Identified: {result_full['identified_speakers']}")
print(f"Segments: {len(result_full['segments'])}")

## Step 5: View Transcript with Speakers

In [None]:
# Display transcript with speaker labels
if 'transcription' in result_full:
    print("TRANSCRIPT WITH SPEAKER LABELS")
    print(f"{'='*60}\n")
    
    # Show first 10 segments with text
    for i, seg in enumerate(result_full['segments'][:10], 1):
        if 'text' in seg:
            print(f"[{seg['start']:6.2f}s - {seg['end']:6.2f}s]")
            print(f"{seg['speaker']}: {seg['text']}")
            print()
    
    if len(result_full['segments']) > 10:
        print(f"... and {len(result_full['segments']) - 10} more segments\n")
    
    # Full transcript
    print(f"\n{'='*60}")
    print("FULL TRANSCRIPT")
    print(f"{'='*60}\n")
    print(result_full['transcription'])
else:
    print("No transcription available (transcription was disabled)")

## Step 6: Alternative - Using Direct Function Call

In [None]:
# Alternative method: Direct function call with all parameters
result_alt = diarize_and_transcribe(
    audio_path=MEETING_AUDIO,
    database_path=SPEAKER_DATABASE,
    language=LANGUAGE,
    whisper_model=WHISPER_MODEL_PATH,
    output_dir="diarization_alternative"
)

print(f"✓ Processing complete")
print(f"Results saved to: {result_alt['output_dir']}")

## Step 7: Save Results Summary

In [None]:
import json

# Create summary
summary = {
    "audio_file": MEETING_AUDIO,
    "num_speakers": result_full['num_speakers'],
    "identified_speakers": result_full['identified_speakers'],
    "enrolled_speakers": result_full['enrolled_speakers'],
    "total_segments": len(result_full['segments']),
    "output_files": result_full['output_files']
}

# Save summary
summary_path = Path(result_full['output_dir']) / "summary.json"
with open(summary_path, 'w') as f:
    json.dump(summary, f, indent=2)

print(f"✓ Summary saved to: {summary_path}")
print("\nSummary:")
print(json.dumps(summary, indent=2))

## Advanced: Manual Testing of Components

Test individual components separately

In [None]:
# Test 1: Voice Enrollment
print("Testing Voice Enrollment...")
test_enrollment = VoiceEnrollment("test_database.json")

# Enroll a test speaker (replace with actual audio path)
# test_enrollment.enroll_speaker("TestSpeaker", "test_audio.wav")
# test_enrollment.save_database()

print("✓ Voice Enrollment working")

In [None]:
# Test 2: Diarization Processor
print("Testing Diarization Processor...")
test_diarizer = DiarizationProcessor()

# Perform diarization (replace with actual audio path)
# test_segments = test_diarizer.perform_diarization("test_meeting.wav")
# print(f"Found {len(test_segments)} segments")

print("✓ Diarization Processor working")

In [None]:
# Test 3: Transcription Processor
print("Testing Transcription Processor...")
test_transcriptor = TranscriptionProcessor(model_name="base")

# Transcribe audio (replace with actual audio path)
# test_result = test_transcriptor.transcribe_audio("test_audio.wav", language="en")
# print(f"Transcribed {len(test_result['segments'])} segments")

print("✓ Transcription Processor working")

## Troubleshooting

Common issues and solutions:

1. **GPU Memory Error**: Use smaller Whisper model or switch to CPU
2. **Poor Identification**: Enroll speakers with longer/multiple samples
3. **Wrong Speaker Count**: Specify `num_speakers` parameter
4. **Language Issues**: Explicitly set `expected_language` parameter

In [None]:
# Check system resources
import torch

print("System Information:")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## Complete!

Your diarization system is now ready to use. Check the output directory for all generated files.