# GPU Audio Transcription - FIXED VERSION
### Handles both "first" and "1st" keyword formats

**Instructions:**
1. Upload audio_task_43.mp3 to your Google Drive
2. Enable GPU: Runtime ‚Üí Change runtime type ‚Üí T4 GPU
3. Run all cells

**Time:** ~60-90 minutes for full transcription

In [None]:
# Install Whisper
!pip install -q openai-whisper

In [None]:
# Check GPU
import torch
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("‚ö† WARNING: Enable GPU in Runtime ‚Üí Change runtime type")

In [None]:
# Mount Google Drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Update this path if needed
audio_file = "/content/drive/MyDrive/audio_task_43.mp3"

if os.path.exists(audio_file):
    print(f"‚úì Found: {audio_file}")
    print(f"  Size: {os.path.getsize(audio_file) / (1024**2):.1f} MB")
else:
    print(f"‚ö† Searching for audio_task_43.mp3...")
    import subprocess
    result = subprocess.run(['find', '/content/drive/MyDrive', '-name', 'audio_task_43.mp3'],
                          capture_output=True, text=True, timeout=60)
    files = [f for f in result.stdout.strip().split('\n') if f]
    if files:
        audio_file = files[0]
        print(f"‚úì Found at: {audio_file}")

In [None]:
# Configuration
CHUNK_DURATION_MIN = 10  # 10-minute chunks
TOTAL_DURATION_MIN = 470  # 7h50min

import whisper
import subprocess
import re
import time

# Create output directories
os.makedirs('audio_chunks', exist_ok=True)
os.makedirs('transcriptions', exist_ok=True)
print("‚úì Created output directories: audio_chunks/ and transcriptions/")

# Load model
print("Loading Whisper 'base' model...")
model = whisper.load_model("base")
print(f"‚úì Model on {'GPU' if next(model.parameters()).is_cuda else 'CPU'}")

num_chunks = (TOTAL_DURATION_MIN + CHUNK_DURATION_MIN - 1) // CHUNK_DURATION_MIN
print(f"\nWill process {num_chunks} chunks of {CHUNK_DURATION_MIN} minutes each")

In [None]:
# Ordinal word mapping (FIXED!)
ordinal_map = {
    'first': 1, 'second': 2, 'third': 3, 'fourth': 4, 'fifth': 5,
    'sixth': 6, 'seventh': 7, 'eighth': 8, 'ninth': 9, 'tenth': 10,
    'eleventh': 11, 'twelfth': 12, 'thirteenth': 13, 'fourteenth': 14,
    'fifteenth': 15, 'sixteenth': 16, 'seventeenth': 17, 'eighteenth': 18,
    'nineteenth': 19, 'twentieth': 20
}

def find_keywords(text):
    """Find keywords - handles both 'first' and '1st' formats"""
    # Pattern matches BOTH word ordinals AND numeric ordinals
    pattern = r"[Tt]he\s+(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|\d+(?:st|nd|rd|th)?)\s+letter\s+(?:in|of)\s+(?:the\s+)?keyword\s+is\s+([A-Za-z])[,.\s]+([A-Za-z]+)"
    
    matches = re.findall(pattern, text, re.IGNORECASE)
    
    results = []
    for ordinal, letter, phonetic in matches:
        # Convert word ordinals to numbers
        if ordinal.lower() in ordinal_map:
            pos_num = ordinal_map[ordinal.lower()]
        else:
            # Extract number from numeric ordinal (1st, 2nd, etc)
            pos_num = int(re.sub(r'[^\d]', '', ordinal))
        
        results.append((pos_num, letter.upper(), phonetic))
    
    return results

print("‚úì Keyword search function ready (handles both formats)")

In [None]:
# Process all chunks
all_keywords = {}
keyword_locations = {}

print("="*70)
print(f"Processing {num_chunks} chunks with FIXED keyword detection")
print("="*70)

start_time = time.time()

for i in range(num_chunks):
    start_min = i * CHUNK_DURATION_MIN
    start_sec = start_min * 60
    duration_sec = CHUNK_DURATION_MIN * 60
    
    chunk_file = f"audio_chunks/chunk_{i:03d}.mp3"
    transcript_file = f"transcriptions/chunk_{i:03d}.txt"
    
    print(f"\n[{i+1}/{num_chunks}] Minutes {start_min}-{start_min+CHUNK_DURATION_MIN}")
    
    # Extract chunk
    if not os.path.exists(chunk_file):
        cmd = ['ffmpeg', '-y', '-v', 'quiet', '-ss', str(start_sec),
               '-i', audio_file, '-t', str(duration_sec),
               '-acodec', 'libmp3lame', chunk_file]
        subprocess.run(cmd, check=True)
    
    # Transcribe
    result = model.transcribe(chunk_file, verbose=False, fp16=torch.cuda.is_available())
    transcript = result["text"]
    
    # Save transcription to file
    with open(transcript_file, 'w', encoding='utf-8') as f:
        f.write(f"Chunk {i} - Minutes {start_min}-{start_min+CHUNK_DURATION_MIN}\n")
        f.write("="*70 + "\n\n")
        f.write(transcript)
    
    # Search for keywords with FIXED pattern
    matches = find_keywords(transcript)
    
    if matches:
        print(f"  ‚úì‚úì‚úì FOUND {len(matches)} KEYWORD(S)! ‚úì‚úì‚úì")
        for pos_num, letter, phonetic in matches:
            all_keywords[pos_num] = letter
            keyword_locations[pos_num] = {
                'letter': letter,
                'phonetic': phonetic,
                'chunk': i,
                'time_min': start_min
            }
            print(f"      Position {pos_num}: {letter} ({phonetic})")
    else:
        snippet = transcript[:60].replace('\n', ' ')
        print(f"  - No keywords (sample: {snippet}...)")
    
    print(f"  üíæ Saved: {chunk_file} and {transcript_file}")
    
    # Progress every 5 chunks
    if (i + 1) % 5 == 0:
        elapsed = (time.time() - start_time) / 60
        avg_time = elapsed / (i + 1)
        remaining = avg_time * (num_chunks - i - 1)
        print(f"\n  ‚è± Progress: {i+1}/{num_chunks} | {elapsed:.1f}m elapsed | ~{remaining:.1f}m remaining")
        print(f"  üîë Keywords found: {len(all_keywords)}")

total_time = (time.time() - start_time) / 60
print(f"\n{'='*70}")
print(f"‚úì Completed in {total_time:.1f} minutes")
print(f"‚úì Audio chunks saved to: audio_chunks/")
print(f"‚úì Transcriptions saved to: transcriptions/")
print(f"{'='*70}")

In [None]:
# Display final results
print("\n" + "="*70)
print("FINAL RESULTS")
print("="*70)

if all_keywords:
    print(f"\n‚úì Found {len(all_keywords)} keyword letters:\n")
    
    for pos in sorted(all_keywords.keys()):
        loc = keyword_locations[pos]
        print(f"  Position {pos}: {loc['letter']} ({loc['phonetic']}) - at {loc['time_min']} min")
    
    # Build keyword
    max_pos = max(all_keywords.keys())
    keyword = ""
    for i in range(1, max_pos + 1):
        keyword += all_keywords.get(i, "_")
    
    print(f"\n{'='*70}")
    print(f"üö© KEYWORD: {keyword}")
    print(f"{'='*70}")
    
    # Check completeness
    missing = [i for i in range(1, max_pos + 1) if i not in all_keywords]
    if missing:
        print(f"\n‚ö† Missing positions: {missing}")
        print("You may need to manually search for these in the audio.")
    else:
        print(f"\n‚úì‚úì‚úì COMPLETE KEYWORD! ‚úì‚úì‚úì")
    
    # Save solution
    with open('SOLUTION.txt', 'w') as f:
        f.write(f"KEYWORD: {keyword}\n\n")
        f.write("Letters found:\n")
        for pos in sorted(all_keywords.keys()):
            loc = keyword_locations[pos]
            f.write(f"  Position {pos}: {loc['letter']} ({loc['phonetic']}) at {loc['time_min']} min\n")
        if missing:
            f.write(f"\nMissing positions: {missing}\n")
    
    print("\n‚úì Solution saved to SOLUTION.txt")
    
    # Download result
    from google.colab import files
    files.download('SOLUTION.txt')
    print("\n‚úì Downloaded SOLUTION.txt to your computer!")
else:
    print("\n‚ö† No keywords found. Check the audio file and pattern.")