# GPU-Accelerated Chunked Audio Transcription
### For AI Learning Marathon - USA Task

**Why chunking?** The full 7h50min audio is too large to process at once (memory crash).

**Solution:** Process in 10-minute chunks with GPU â†’ Fast & memory-efficient

**Instructions:**
1. Upload audio_task_43.mp3 to your Google Drive
2. Enable GPU: Runtime â†’ Change runtime type â†’ GPU â†’ T4 GPU
3. Run all cells

**Estimated time:** 45-90 minutes

In [None]:
# Install dependencies
!pip install -q openai-whisper

In [None]:
# Check GPU
import torch
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("âš  No GPU! Enable it: Runtime â†’ Change runtime type â†’ GPU")

In [None]:
# Mount Google Drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Update this path if your file is in a different location
audio_file = "/content/drive/MyDrive/audio_task_43.mp3"

if os.path.exists(audio_file):
    print(f"âœ“ Found: {audio_file}")
    print(f"  Size: {os.path.getsize(audio_file) / (1024**2):.1f} MB")
else:
    print(f"âš  Not found. Searching...")
    import subprocess
    result = subprocess.run(['find', '/content/drive/MyDrive', '-name', 'audio_task_43.mp3'],
                          capture_output=True, text=True, timeout=60)
    files = [f for f in result.stdout.strip().split('\n') if f]
    if files:
        audio_file = files[0]
        print(f"âœ“ Found at: {audio_file}")

In [None]:
# Configuration
CHUNK_DURATION_MIN = 10  # Process 10 minutes at a time
TOTAL_DURATION_MIN = 470  # 7h50min

import whisper
import subprocess
import re
import json
from pathlib import Path

# Load model once
print("Loading Whisper 'base' model (fast on GPU)...")
model = whisper.load_model("base")
print(f"âœ“ Model loaded on {'GPU' if next(model.parameters()).is_cuda else 'CPU'}")

print(f"\nWill process {TOTAL_DURATION_MIN // CHUNK_DURATION_MIN} chunks of {CHUNK_DURATION_MIN} min each")

In [None]:
# Process audio in chunks
import time

all_keywords = {}
keyword_locations = {}

num_chunks = (TOTAL_DURATION_MIN + CHUNK_DURATION_MIN - 1) // CHUNK_DURATION_MIN
print("="*70)
print(f"Processing {num_chunks} chunks...")
print("="*70)

start_time = time.time()

for i in range(num_chunks):
    start_min = i * CHUNK_DURATION_MIN
    start_sec = start_min * 60
    duration_sec = CHUNK_DURATION_MIN * 60
    
    chunk_file = f"chunk_{i:03d}.mp3"
    
    print(f"\n[{i+1}/{num_chunks}] Minutes {start_min}-{start_min+CHUNK_DURATION_MIN}")
    
    # Extract chunk with ffmpeg
    if not os.path.exists(chunk_file):
        cmd = ['ffmpeg', '-y', '-v', 'quiet', '-ss', str(start_sec),
               '-i', audio_file, '-t', str(duration_sec),
               '-acodec', 'libmp3lame', chunk_file]
        subprocess.run(cmd, check=True)
    
    # Transcribe chunk
    result = model.transcribe(chunk_file, verbose=False, fp16=torch.cuda.is_available())
    transcript = result["text"]
    
    # Search for keywords
    pattern = r"[Tt]he\s+(\d+)(?:st|nd|rd|th)?\s+letter\s+(?:in|of)\s+(?:the\s+)?keyword\s+is\s+([A-Za-z])[,\s]+([A-Za-z]+)"
    matches = re.findall(pattern, transcript)
    
    if matches:
        print(f"  âœ“âœ“âœ“ FOUND {len(matches)} KEYWORD(S)!")
        for pos, letter, phonetic in matches:
            pos_num = int(pos)
            all_keywords[pos_num] = letter.upper()
            keyword_locations[pos_num] = {
                'letter': letter.upper(),
                'phonetic': phonetic,
                'chunk': i,
                'time_min': start_min
            }
            print(f"      Position {pos_num}: {letter.upper()} ({phonetic})")
    else:
        snippet = transcript[:60].replace('\n', ' ')
        print(f"  - No keywords (sample: {snippet}...)")
    
    # Clean up chunk file to save space
    os.remove(chunk_file)
    
    # Progress update every 5 chunks
    if (i + 1) % 5 == 0:
        elapsed = (time.time() - start_time) / 60
        avg_time = elapsed / (i + 1)
        remaining = avg_time * (num_chunks - i - 1)
        print(f"\n  Progress: {i+1}/{num_chunks} ({elapsed:.1f} min elapsed, ~{remaining:.1f} min remaining)")
        print(f"  Keywords found so far: {len(all_keywords)}")

total_time = (time.time() - start_time) / 60
print(f"\n{'='*70}")
print(f"âœ“ Completed in {total_time:.1f} minutes")
print(f"{'='*70}")

In [None]:
# Display results
print("\n" + "="*70)
print("FINAL RESULTS")
print("="*70)

if all_keywords:
    print(f"\nâœ“ Found {len(all_keywords)} keyword letters:\n")
    
    for pos in sorted(all_keywords.keys()):
        loc = keyword_locations[pos]
        print(f"  Position {pos}: {loc['letter']} ({loc['phonetic']}) - at {loc['time_min']} min")
    
    # Build keyword
    max_pos = max(all_keywords.keys())
    keyword = ""
    for i in range(1, max_pos + 1):
        keyword += all_keywords.get(i, "_")
    
    print(f"\n{'='*70}")
    print(f"ðŸš© KEYWORD: {keyword}")
    print(f"{'='*70}")
    
    # Check completeness
    missing = [i for i in range(1, max_pos + 1) if i not in all_keywords]
    if missing:
        print(f"\nâš  Missing positions: {missing}")
    else:
        print(f"\nâœ“âœ“âœ“ COMPLETE! âœ“âœ“âœ“")
    
    # Save solution
    with open('SOLUTION.txt', 'w') as f:
        f.write(f"KEYWORD: {keyword}\n\n")
        f.write("Letters found:\n")
        for pos in sorted(all_keywords.keys()):
            loc = keyword_locations[pos]
            f.write(f"  Position {pos}: {loc['letter']} ({loc['phonetic']}) at {loc['time_min']} min\n")
        if missing:
            f.write(f"\nMissing positions: {missing}\n")
    
    print("\nâœ“ Solution saved to SOLUTION.txt")
    
    # Download
    from google.colab import files
    files.download('SOLUTION.txt')
else:
    print("\nâš  No keywords found!")