# ITU WebTV Processing Pipeline - Standalone Notebook

A simplified version of the complete processing pipeline that can run independently.
Processes video/audio content into structured transcripts with AI-powered speaker identification.

## Setup and Dependencies

In [None]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Install required packages (run once)
# !pip install yt-dlp openai-whisper google-generativeai python-docx torch

import os
import re
import json
import time
import requests
import subprocess
from pathlib import Path
from datetime import timedelta, datetime
import random
import math

# Optional imports with graceful fallbacks
try:
    import yt_dlp
    print("‚úÖ yt-dlp available")
except ImportError:
    print("‚ùå yt-dlp not available - video processing disabled")
    yt_dlp = None

try:
    import whisper
    import torch
    print(f"‚úÖ Whisper available - GPU: {torch.cuda.is_available()}")
except ImportError:
    print("‚ùå Whisper not available - transcription disabled")
    whisper = None

try:
    import google.generativeai as genai
    print("‚úÖ Google Gemini available")
except ImportError:
    print("‚ùå Google Gemini not available - AI features disabled")
    genai = None

try:
    from docx import Document
    from docx.shared import Inches, Pt, RGBColor
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    print("‚úÖ python-docx available")
except ImportError:
    print("‚ùå python-docx not available - document generation disabled")
    Document = None

## Configuration

In [None]:
# Configuration
GEMINI_API_KEY = "your-gemini-api-key-here"  # Replace with your API key
OUTPUT_DIR = "output"  # Directory for processed files
MODEL_NAME = "gemini-2.5-flash-lite-preview-06-17"
WHISPER_MODEL = "medium.en"  # or "base.en" for faster processing

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)
print(f"üìÅ Output directory: {OUTPUT_DIR}")

## Step 1: Audio Download Functions

In [None]:
def download_audio_from_url(url, output_dir):
    """Download audio from video URL using yt-dlp"""
    if not yt_dlp:
        raise Exception("yt-dlp not available")
    
    output_path = Path(output_dir) / 'audio.mp3'
    
    ydl_opts = {
        'format': 'bestaudio[ext=m4a]/bestaudio[ext=webm]/bestaudio/best',
        'outtmpl': str(output_path.with_suffix('')),
        'extractaudio': True,
        'audioformat': 'mp3',
        'audioquality': '192k',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'no_warnings': False,
        'quiet': False
    }
    
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            print(f"üì• Downloading audio from: {url}")
            info = ydl.extract_info(url, download=False)
            title = info.get('title', 'Unknown')
            duration = info.get('duration', 0)
            
            ydl.download([url])
            
            # Ensure MP3 file exists
            if not output_path.exists():
                for file in Path(output_dir).iterdir():
                    if file.suffix.lower() in ['.mp3', '.m4a', '.wav']:
                        file.rename(output_path)
                        break
            
            if not output_path.exists():
                raise FileNotFoundError("Audio file not found after download")
                
            print(f"‚úÖ Audio downloaded: {output_path}")
            return output_path, {'title': title, 'duration': duration}
            
    except Exception as e:
        raise Exception(f"Failed to download audio: {str(e)}")

def use_local_audio_file(file_path, output_dir):
    """Copy local audio file to output directory"""
    file_path = Path(file_path)
    if not file_path.exists():
        raise FileNotFoundError(f"Audio file not found: {file_path}")
    
    output_path = Path(output_dir) / 'audio.mp3'
    
    # Copy or convert to MP3 if needed
    if file_path.suffix.lower() == '.mp3':
        import shutil
        shutil.copy2(file_path, output_path)
    else:
        # Use FFmpeg to convert to MP3
        try:
            cmd = [
                'ffmpeg', '-i', str(file_path),
                '-acodec', 'mp3', '-ab', '192k',
                '-y', str(output_path)
            ]
            subprocess.run(cmd, check=True, capture_output=True)
        except subprocess.CalledProcessError:
            # Fallback: just copy the file
            import shutil
            shutil.copy2(file_path, output_path)
    
    print(f"‚úÖ Audio file ready: {output_path}")
    return output_path, {'title': file_path.stem, 'duration': 0}

## Step 2: Audio Transcription

In [None]:
def transcribe_audio(audio_path, output_dir, model_size="medium.en"):
    """Transcribe audio using OpenAI Whisper"""
    if not whisper:
        raise Exception("Whisper not available")
    
    output_dir = Path(output_dir)
    
    # Use GPU if available
    device = "cuda" if torch and torch.cuda.is_available() else "cpu"
    print(f"üé§ Loading Whisper model '{model_size}' on {device.upper()}")
    
    model = whisper.load_model(model_size, device=device)
    
    print(f"üéôÔ∏è Transcribing audio: {audio_path}")
    result = model.transcribe(str(audio_path), language="en", verbose=False)
    
    # Save raw transcript
    transcript_path = output_dir / 'transcript.txt'
    with open(transcript_path, 'w', encoding='utf-8') as f:
        f.write(result['text'].strip())
    
    # Generate SRT file
    srt_path = output_dir / 'transcript.srt'
    with open(srt_path, 'w', encoding='utf-8') as f:
        for i, segment in enumerate(result['segments'], 1):
            start_time = format_srt_time(segment['start'])
            end_time = format_srt_time(segment['end'])
            text = segment['text'].strip()
            
            f.write(f"{i}\n")
            f.write(f"{start_time} --> {end_time}\n")
            f.write(f"{text}\n\n")
    
    print(f"‚úÖ Transcription complete: {len(result['text'])} chars, {len(result['segments'])} segments")
    return transcript_path, srt_path, result['segments']

def format_srt_time(seconds):
    """Convert seconds to SRT time format"""
    td = timedelta(seconds=seconds)
    hours, remainder = divmod(td.total_seconds(), 3600)
    minutes, seconds = divmod(remainder, 60)
    milliseconds = int((seconds % 1) * 1000)
    return f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d},{milliseconds:03d}"

## Step 3: SRT to JSON Conversion

In [None]:
def srt_to_json(srt_path):
    """Convert SRT file to JSON format for AI processing"""
    with open(srt_path, 'r', encoding='utf-8') as f:
        srt_content = f.read()

    cues = []
    pattern = r'(\d+)\n([\d:,]+) --> ([\d:,]+)\n(.*?)(?=\n\n\d+\n|$)'
    
    for match in re.finditer(pattern, srt_content, re.DOTALL):
        index = int(match.group(1))
        start = match.group(2).replace(',', '.')
        end = match.group(3).replace(',', '.')
        text = match.group(4).strip().replace('\n', ' ')

        cues.append({
            "index": index,
            "start": start,
            "end": end,
            "speaker": "",
            "text": text
        })

    print(f"üìù Converted SRT to JSON: {len(cues)} segments")
    return cues

## Step 4: AI Speaker Identification

In [None]:
def setup_gemini_api():
    """Initialize Gemini API"""
    if not genai or not GEMINI_API_KEY or GEMINI_API_KEY == "your-gemini-api-key-here":
        return None
    
    genai.configure(api_key=GEMINI_API_KEY)
    return genai.GenerativeModel(MODEL_NAME)

def extract_speaker_context(transcript_text):
    """Extract speaker information from transcript using Gemini"""
    model = setup_gemini_api()
    if not model:
        print("‚ö†Ô∏è Gemini API not available, skipping speaker context extraction")
        return {"speakers": []}
    
    prompt = f"""
You are an expert in transcript analysis and speaker identification.

Analyze this transcript and extract information about all speakers mentioned.

Identify:
1. Speaker names (when they introduce themselves or are introduced)
2. Their positions/titles
3. Organizations they represent
4. Countries they represent (if applicable)

Return as JSON:
{{
    "speakers": [
        {{
            "name": "Speaker Name",
            "title": "Their title/position",
            "organization": "Organization",
            "country": "Country (if applicable)",
            "description": "Brief description"
        }}
    ]
}}

Transcript:
{transcript_text}

Return ONLY the JSON object:
"""
    
    try:
        print("üß† Extracting speaker context with Gemini AI...")
        response = model.generate_content(prompt)
        
        # Clean response
        cleaned = response.text.strip()
        if cleaned.startswith("```json"):
            cleaned = cleaned[7:]
        if cleaned.endswith("```"):
            cleaned = cleaned[:-3]
        
        speaker_info = json.loads(cleaned.strip())
        
        print(f"‚úÖ Found {len(speaker_info.get('speakers', []))} speakers in context")
        return speaker_info
        
    except Exception as e:
        print(f"‚ùå Error extracting speaker context: {e}")
        return {"speakers": []}

def create_speaker_context_prompt(speaker_info):
    """Create context prompt for speaker diarization"""
    if not speaker_info.get('speakers'):
        return ""
    
    context = "\n\nKNOWN SPEAKERS IN THIS TRANSCRIPT:\n" + "=" * 50 + "\n"
    
    for speaker in speaker_info['speakers']:
        name = speaker.get('name', 'Unknown')
        title = speaker.get('title', '')
        org = speaker.get('organization', '')
        country = speaker.get('country', '')
        
        context += f"‚Ä¢ {name}"
        if title:
            context += f" - {title}"
        if org:
            context += f" at {org}"
        if country:
            context += f" (representing {country})"
        context += "\n"
    
    context += "=" * 50 + "\n"
    context += "Use these EXACT speaker names when you recognize them.\n\n"
    
    return context

## Step 5: Speaker Diarization

In [None]:
def fill_speakers_in_transcript(transcript_data, speaker_context=""):
    """Use Gemini AI to identify speakers in transcript segments"""
    model = setup_gemini_api()
    if not model:
        print("‚ö†Ô∏è Gemini API not available, returning transcript without speaker identification")
        return transcript_data
    
    # Process in batches to handle token limits
    batch_size = 50  # Adjust based on token limits
    filled_segments = []
    
    for i in range(0, len(transcript_data), batch_size):
        batch = transcript_data[i:i + batch_size]
        batch_num = i // batch_size + 1
        total_batches = math.ceil(len(transcript_data) / batch_size)
        
        print(f"üß† Processing speaker identification batch {batch_num}/{total_batches}...")
        
        batch_string = json.dumps(batch, indent=2)
        
        prompt = f"""
You are an expert in transcript analysis and speaker diarization.
Analyze this transcript batch and identify who is speaking in each segment.

{speaker_context}

Instructions:
1. Use the EXACT speaker names from the known speakers list when you recognize them
2. For unknown speakers, use descriptive labels like 'Moderator', 'Participant 1', etc.
3. Base identification on speech patterns, content, and context clues

Return the complete JSON with "speaker" field filled for every segment:

{batch_string}

Return ONLY the filled JSON array:
"""
        
        try:
            response = model.generate_content(prompt)
            
            # Clean response
            cleaned = response.text.strip()
            if cleaned.startswith("```json"):
                cleaned = cleaned[7:]
            if cleaned.endswith("```"):
                cleaned = cleaned[:-3]
            
            filled_batch = json.loads(cleaned.strip())
            filled_segments.extend(filled_batch)
            
            # Add delay to avoid rate limiting
            time.sleep(1)
            
        except Exception as e:
            print(f"‚ùå Error processing batch {batch_num}: {e}")
            # Use original batch if AI processing fails
            filled_segments.extend(batch)
    
    print(f"‚úÖ Speaker identification complete: {len(filled_segments)} segments")
    return filled_segments

## Step 6: Generate Speaker-Separated Transcript

In [None]:
def parse_speaker_info(speaker_name):
    """Parse speaker name to extract name and organization"""
    if not speaker_name or speaker_name.strip() == "":
        return "Unknown Speaker", "Unknown"
    
    speaker_name = speaker_name.strip()
    
    # Pattern: "Name (Organization)"
    paren_match = re.match(r'^(.+?)\s*\((.+?)\)$', speaker_name)
    if paren_match:
        return paren_match.group(1).strip(), paren_match.group(2).strip()
    
    # Pattern: "Name - Organization"
    dash_match = re.match(r'^(.+?)\s*[‚Äì-]\s*(.+)$', speaker_name)
    if dash_match:
        return dash_match.group(1).strip(), dash_match.group(2).strip()
    
    # Pattern: "Name, Title, Organization"
    comma_parts = speaker_name.split(',')
    if len(comma_parts) >= 2:
        return comma_parts[0].strip(), ', '.join(comma_parts[1:]).strip()
    
    return speaker_name, "Not specified"

def time_to_seconds(time_str):
    """Convert HH:MM:SS.mmm to seconds"""
    try:
        if ':' in time_str:
            parts = time_str.split(':')
            if len(parts) == 3:
                hours = float(parts[0])
                minutes = float(parts[1])
                seconds = float(parts[2])
                return hours * 3600 + minutes * 60 + seconds
        return float(time_str)
    except:
        return 0.0

def group_consecutive_segments(transcript_data):
    """Group consecutive segments from the same speaker"""
    if not transcript_data:
        return []
    
    grouped_segments = []
    current_group = {
        'speaker': transcript_data[0].get('speaker', 'Unknown'),
        'text_parts': [transcript_data[0].get('text', '')],
        'start_time': time_to_seconds(transcript_data[0].get('start', '0')),
        'end_time': time_to_seconds(transcript_data[0].get('end', '0')),
    }
    
    for i in range(1, len(transcript_data)):
        segment = transcript_data[i]
        current_speaker = segment.get('speaker', 'Unknown')
        
        if current_speaker == current_group['speaker']:
            # Same speaker, add to current group
            current_group['text_parts'].append(segment.get('text', ''))
            current_group['end_time'] = time_to_seconds(segment.get('end', current_group['end_time']))
        else:
            # Different speaker, save current group and start new one
            current_group['combined_text'] = ' '.join(current_group['text_parts'])
            grouped_segments.append(current_group.copy())
            
            current_group = {
                'speaker': current_speaker,
                'text_parts': [segment.get('text', '')],
                'start_time': time_to_seconds(segment.get('start', '0')),
                'end_time': time_to_seconds(segment.get('end', '0')),
            }
    
    # Don't forget the last group
    current_group['combined_text'] = ' '.join(current_group['text_parts'])
    grouped_segments.append(current_group)
    
    return grouped_segments

def create_speaker_separated_transcript(filled_transcript, output_dir, title="Meeting"):
    """Create human-readable speaker-separated transcript"""
    output_dir = Path(output_dir)
    
    # Group consecutive segments
    grouped = group_consecutive_segments(filled_transcript)
    
    # Create speaker transcript
    speakers_path = output_dir / 'transcript_speakers.txt'
    with open(speakers_path, 'w', encoding='utf-8') as f:
        f.write(f"# Speaker-separated transcript: {title}\n\n")
        
        for segment in grouped:
            speaker_name = segment['speaker']
            clean_speaker, representing = parse_speaker_info(speaker_name)
            content = segment['combined_text']
            start_time = segment['start_time']
            end_time = segment['end_time']
            
            # Format speaker header
            if representing and representing != "Not specified":
                speaker_header = f"[{clean_speaker}, {representing}]"
            else:
                speaker_header = f"[{clean_speaker}]"
            
            # Add timing
            start_min = int(start_time // 60)
            start_sec = int(start_time % 60)
            end_min = int(end_time // 60)
            end_sec = int(end_time % 60)
            timing_info = f" ({start_min:02d}:{start_sec:02d} - {end_min:02d}:{end_sec:02d})"
            speaker_header += timing_info
            
            f.write(f"{speaker_header}\n")
            f.write(f"{content}\n\n")
    
    print(f"‚úÖ Speaker-separated transcript created: {speakers_path}")
    return speakers_path, grouped

## Step 7: Generate ITU Summary

In [None]:
def generate_itu_summary(transcript_content):
    """Generate ITU-focused summary using Gemini"""
    model = setup_gemini_api()
    if not model:
        print("‚ö†Ô∏è Gemini API not available, skipping ITU summary")
        return None
    
    prompt = f"""
You are an ITU staff member writing a brief internal summary for colleagues.

Analyze this meeting transcript and write a concise summary focusing ONLY on what matters to ITU's work.

ITU FOCUS AREAS (prioritize what's most relevant):
‚Ä¢ Standards & Technical work (ITU-T, ITU-R)
‚Ä¢ Digital inclusion & development (ITU-D)
‚Ä¢ Emerging tech (AI, 5G/6G, IoT)
‚Ä¢ Cybersecurity & trust
‚Ä¢ Spectrum management
‚Ä¢ Digital transformation initiatives
‚Ä¢ ICT capacity building

FORMAT:
**Key ITU-Relevant Points:**
‚Ä¢ [Most important point for ITU]
‚Ä¢ [Second priority point]

**Potential ITU Actions/Opportunities:**
‚Ä¢ [What ITU could/should do based on this meeting]

Maximum 150 words total. If minimal ICT content, write "Limited relevance to ITU mandate - primarily [topic]"

TRANSCRIPT:
{transcript_content}

Provide your ITU-focused summary:
"""
    
    try:
        print("üè¢ Generating ITU-focused summary...")
        response = model.generate_content(prompt)
        summary = response.text.strip()
        print(f"‚úÖ ITU summary generated ({len(summary)} characters)")
        return summary
    except Exception as e:
        print(f"‚ùå Error generating ITU summary: {e}")
        return None

## Step 8: Generate Professional Meeting Notes

In [None]:
def generate_meeting_notes(transcript_content):
    """Generate professional meeting notes using Gemini"""
    model = setup_gemini_api()
    if not model:
        print("‚ö†Ô∏è Gemini API not available, skipping meeting notes")
        return None
    
    prompt = f"""
Create professional meeting notes in ITU/UN diplomatic style.

Use this EXACT structure:

**MEETING OVERVIEW**
Brief purpose, key participants, main themes (2-3 sentences only)

**KEY DISCUSSIONS**
Main topics with speaker attribution. Format: "[Speaker Name, Organization] emphasized that..."

**POSITIONS & RECOMMENDATIONS**
Member state positions and organizational viewpoints

**DECISIONS & ACTION ITEMS**
‚Ä¢ Specific decisions made
‚Ä¢ Action items with responsible parties
‚Ä¢ Timelines and next steps

**TECHNICAL MATTERS** (only if significant technical content)
Standards, specifications, implementation issues

STYLE: Formal UN/ITU language, third person, speaker attribution, concise sections.

TRANSCRIPT:
{transcript_content}

Generate professional meeting notes:
"""
    
    try:
        print("üìù Generating professional meeting notes...")
        response = model.generate_content(prompt)
        notes = response.text.strip()
        print(f"‚úÖ Meeting notes generated ({len(notes)} characters)")
        return notes
    except Exception as e:
        print(f"‚ùå Error generating meeting notes: {e}")
        return None

def create_word_document(notes_content, title, output_dir):
    """Create formatted Word document"""
    if not Document:
        print("‚ö†Ô∏è python-docx not available, saving as text file")
        text_path = Path(output_dir) / f"meeting_notes_{datetime.now().strftime('%Y%m%d')}.txt"
        with open(text_path, 'w', encoding='utf-8') as f:
            f.write(f"MEETING NOTES\n{'='*50}\n\n")
            f.write(f"Title: {title}\n")
            f.write(f"Date: {datetime.now().strftime('%B %d, %Y')}\n\n")
            f.write(notes_content)
        return text_path
    
    # Create Word document
    doc = Document()
    
    # Set margins
    sections = doc.sections
    for section in sections:
        section.top_margin = Inches(1)
        section.bottom_margin = Inches(1)
        section.left_margin = Inches(1)
        section.right_margin = Inches(1)
    
    # Add header
    header = doc.sections[0].header
    header_para = header.paragraphs[0]
    header_para.text = "International Telecommunication Union"
    header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    # Document title
    title_heading = doc.add_heading('MEETING NOTES', 0)
    title_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    # Meeting title
    meeting_title = doc.add_heading(title, 1)
    meeting_title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    # Date
    date_para = doc.add_paragraph()
    date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    date_run = date_para.add_run(f"Date: {datetime.now().strftime('%B %d, %Y')}")
    date_run.italic = True
    
    # Separator
    doc.add_paragraph("_" * 80).alignment = WD_ALIGN_PARAGRAPH.CENTER
    doc.add_paragraph()
    
    # Add content
    lines = notes_content.split('\n')
    for line in lines:
        line = line.strip()
        if not line:
            continue
        
        # Section headers
        if line.startswith('**') and line.endswith('**'):
            section_title = line[2:-2].strip()
            doc.add_heading(section_title, 2)
        # Bullet points
        elif line.startswith('‚Ä¢') or line.startswith('-'):
            doc.add_paragraph(line[1:].strip(), style='List Bullet')
        # Regular paragraphs
        else:
            doc.add_paragraph(line)
    
    # Footer
    footer = doc.sections[0].footer
    footer_para = footer.paragraphs[0]
    footer_para.text = f"Generated on {datetime.now().strftime('%B %d, %Y at %H:%M UTC')} | ITU Processing System"
    footer_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    # Save document
    safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
    safe_title = safe_title.replace(' ', '_')[:30]
    doc_path = Path(output_dir) / f"Meeting_Notes_{safe_title}_{datetime.now().strftime('%Y%m%d')}.docx"
    doc.save(str(doc_path))
    
    print(f"‚úÖ Word document created: {doc_path}")
    return doc_path

## Main Processing Pipeline

In [None]:
def process_meeting(input_source, title="Meeting", is_url=True):
    """Complete processing pipeline"""
    print(f"üöÄ Starting processing pipeline for: {title}")
    print("=" * 60)
    
    # Create session directory
    session_dir = Path(OUTPUT_DIR) / f"meeting_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    session_dir.mkdir(exist_ok=True)
    
    try:
        # Step 1: Get audio
        if is_url:
            audio_path, metadata = download_audio_from_url(input_source, session_dir)
            if not title or title == "Meeting":
                title = metadata.get('title', 'Meeting')
        else:
            audio_path, metadata = use_local_audio_file(input_source, session_dir)
        
        # Step 2: Transcribe
        transcript_path, srt_path, segments = transcribe_audio(audio_path, session_dir, WHISPER_MODEL)
        
        # Step 3: Convert to JSON for AI processing
        json_segments = srt_to_json(srt_path)
        
        # Step 4: Extract speaker context
        with open(transcript_path, 'r', encoding='utf-8') as f:
            full_transcript = f.read()
        
        speaker_info = extract_speaker_context(full_transcript)
        speaker_context = create_speaker_context_prompt(speaker_info)
        
        # Step 5: Fill speakers using AI
        filled_transcript = fill_speakers_in_transcript(json_segments, speaker_context)
        
        # Step 6: Create speaker-separated transcript
        speakers_path, structured_segments = create_speaker_separated_transcript(
            filled_transcript, session_dir, title
        )
        
        # Step 7: Generate ITU summary
        itu_summary = generate_itu_summary(full_transcript)
        if itu_summary:
            summary_path = session_dir / 'itu_summary.txt'
            with open(summary_path, 'w', encoding='utf-8') as f:
                f.write(itu_summary)
        
        # Step 8: Generate meeting notes
        meeting_notes = generate_meeting_notes(full_transcript)
        if meeting_notes:
            # Save as text
            notes_text_path = session_dir / 'meeting_notes.txt'
            with open(notes_text_path, 'w', encoding='utf-8') as f:
                f.write(meeting_notes)
            
            # Save as Word document
            doc_path = create_word_document(meeting_notes, title, session_dir)
        
        # Summary
        print("\n" + "=" * 60)
        print("‚úÖ PROCESSING COMPLETE!")
        print(f"üìÅ Output directory: {session_dir}")
        print(f"üéµ Audio: {audio_path.name}")
        print(f"üìù Transcript: {transcript_path.name}")
        print(f"üé¨ Subtitles: {srt_path.name}")
        print(f"üë• Speaker transcript: {speakers_path.name}")
        if itu_summary:
            print(f"üè¢ ITU summary: itu_summary.txt")
        if meeting_notes:
            print(f"üìÑ Meeting notes: meeting_notes.txt")
            if Document:
                print(f"üìé Word document: {doc_path.name}")
        
        return {
            'session_dir': session_dir,
            'audio_path': audio_path,
            'transcript_path': transcript_path,
            'srt_path': srt_path,
            'speakers_path': speakers_path,
            'segments': structured_segments,
            'itu_summary': itu_summary,
            'meeting_notes': meeting_notes
        }
        
    except Exception as e:
        print(f"‚ùå Processing failed: {str(e)}")
        import traceback
        traceback.print_exc()
        return None

## Usage Examples

Run the cells below to process your content:

In [None]:
# Example 1: Process a YouTube video
# Replace with your video URL
video_url = "https://www.youtube.com/watch?v=your-video-id"
# result = process_meeting(video_url, "Sample YouTube Video", is_url=True)

In [None]:
# Example 2: Process a local audio file
# Replace with your audio file path
audio_file = "path/to/your/audio/file.mp3"
# result = process_meeting(audio_file, "Local Audio File", is_url=False)

In [None]:
# Example 3: Process UN WebTV content
# Replace with actual UN WebTV URL
webtv_url = "https://webtv.un.org/asset/your-asset-id"
# result = process_meeting(webtv_url, "UN WebTV Meeting", is_url=True)

## Simple Processing Function

For quick processing without all the setup:

In [None]:
def quick_process(input_source, title=None):
    """Quick processing function - detects if input is URL or file"""
    
    # Auto-detect if input is URL or file
    is_url = input_source.startswith(('http://', 'https://'))
    
    if not title:
        if is_url:
            title = "Video Processing"
        else:
            title = Path(input_source).stem
    
    return process_meeting(input_source, title, is_url)

# Usage:
# result = quick_process("https://youtube.com/watch?v=abc123")
# result = quick_process("/path/to/audio.mp3")