# Fast Audio Transcription with GPU
### For AI Learning Marathon - USA Task

**Instructions:**
1. Upload audio_task_43.mp3 to your Google Drive (root folder or MyDrive)
2. Go to Google Colab: https://colab.research.google.com/
3. Upload this notebook
4. Enable GPU: Runtime â†’ Change runtime type â†’ GPU â†’ T4 GPU
5. Run all cells (it will ask to mount Drive - allow it)
6. Update the audio file path if needed

**Estimated time with GPU:** 30-60 minutes (vs 4-5 hours on CPU)

In [None]:
# Install Whisper
!pip install -q openai-whisper

In [None]:
# Check GPU availability
import torch
print(f"GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
else:
    print("âš  WARNING: No GPU! Go to Runtime â†’ Change runtime type â†’ GPU")

In [None]:
# Mount Google Drive and locate audio file
from google.colab import drive
import os

# Mount Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')
print("âœ“ Drive mounted!\n")

# Specify file path (update this if your file is in a subfolder)
audio_file = "/content/drive/MyDrive/audio_task_43.mp3"

# Check if file exists
if os.path.exists(audio_file):
    print(f"âœ“ Found: {audio_file}")
    print(f"  Size: {os.path.getsize(audio_file) / (1024**2):.1f} MB")
else:
    print(f"âš  File not found at: {audio_file}")
    print("\nSearching your Drive for audio_task_43.mp3...")
    
    # Search for the file
    import subprocess
    result = subprocess.run(
        ['find', '/content/drive/MyDrive', '-name', 'audio_task_43.mp3', '-type', 'f'],
        capture_output=True, text=True, timeout=60
    )
    
    found_files = [f for f in result.stdout.strip().split('\n') if f]
    
    if found_files:
        print(f"\nâœ“ Found at: {found_files[0]}")
        audio_file = found_files[0]
        print(f"  Size: {os.path.getsize(audio_file) / (1024**2):.1f} MB")
    else:
        print("\nâš  File not found in Drive!")
        print("Please upload audio_task_43.mp3 to your Google Drive and update the path above.")

In [None]:
# Load Whisper model (using 'small' for better accuracy with GPU)
import whisper

print("Loading Whisper 'small' model (better accuracy)...")
model = whisper.load_model("small")  # Using 'small' - faster and more accurate with GPU
print("âœ“ Model loaded on GPU!" if next(model.parameters()).is_cuda else "âœ“ Model loaded (CPU)")

# Optional: Unmount Drive for privacy (file is now loaded in memory)
# Uncomment the next 2 lines if you want to disconnect Drive after locating the file
# from google.colab import drive
# drive.flush_and_unmount()
# print("\nâœ“ Drive unmounted (file already loaded into Colab memory)")

In [None]:
# Transcribe entire audio (GPU-accelerated)
import time

print("="*70)
print("Transcribing entire audio...")
print("This will take 30-60 minutes with GPU")
print("="*70)

start_time = time.time()

# Transcribe with verbose output to see progress
result = model.transcribe(
    audio_file,
    language=None,  # Auto-detect (handles multilingual)
    verbose=True,    # Show progress
    fp16=torch.cuda.is_available()  # Use FP16 on GPU for speed
)

elapsed = time.time() - start_time
print(f"\nâœ“ Transcription complete in {elapsed/60:.1f} minutes")

# Save full transcript
full_transcript = result["text"]
with open('full_transcript.txt', 'w') as f:
    f.write(full_transcript)

print(f"âœ“ Saved to full_transcript.txt ({len(full_transcript)} characters)")

In [None]:
# Search for keyword patterns
import re

print("="*70)
print("Searching for keyword letters...")
print("="*70)

# Pattern: "The Nth letter in keyword is X, [phonetic]"
pattern = r"[Tt]he\s+(\d+)(?:st|nd|rd|th)?\s+letter\s+(?:in|of)\s+(?:the\s+)?keyword\s+is\s+([A-Za-z])[,\s]+([A-Za-z]+)"
matches = re.findall(pattern, full_transcript)

if matches:
    print(f"\nâœ“âœ“âœ“ FOUND {len(matches)} KEYWORD LETTERS! âœ“âœ“âœ“\n")
    
    keyword_dict = {}
    for pos, letter, phonetic in matches:
        pos_num = int(pos)
        keyword_dict[pos_num] = letter.upper()
        print(f"  Position {pos_num}: {letter.upper()} ({phonetic})")
    
    # Build keyword
    max_pos = max(keyword_dict.keys())
    keyword = ""
    for i in range(1, max_pos + 1):
        keyword += keyword_dict.get(i, "_")
    
    print(f"\n{'='*70}")
    print(f"ðŸš© KEYWORD: {keyword}")
    print(f"{'='*70}")
    
    # Check completeness
    missing = [i for i in range(1, max_pos + 1) if i not in keyword_dict]
    if missing:
        print(f"\nâš  Missing positions: {missing}")
    else:
        print(f"\nâœ“âœ“âœ“ COMPLETE! âœ“âœ“âœ“")
    
    # Save solution
    with open('SOLUTION.txt', 'w') as f:
        f.write(f"KEYWORD: {keyword}\n\n")
        f.write("Letters found:\n")
        for pos, letter, phonetic in sorted(matches, key=lambda x: int(x[0])):
            f.write(f"  Position {pos}: {letter.upper()} ({phonetic})\n")
        if missing:
            f.write(f"\nMissing: {missing}\n")
    
    print("\nâœ“ Solution saved to SOLUTION.txt")
else:
    print("\nâš  No keyword letters found!")
    print("Check full_transcript.txt manually")

In [None]:
# Download results
from google.colab import files

print("Downloading results...")
files.download('full_transcript.txt')
if os.path.exists('SOLUTION.txt'):
    files.download('SOLUTION.txt')
print("\nâœ“ Done! Check your downloads folder.")

## Manual Search (if needed)
If the automatic pattern doesn't find all letters, search manually:

In [None]:
# Show transcript preview
print("First 2000 characters of transcript:")
print("="*70)
print(full_transcript[:2000])
print("="*70)
print(f"\nTotal length: {len(full_transcript)} characters")

In [None]:
# Search for specific keywords
search_term = "keyword"  # Change this to search for other patterns

# Case-insensitive search with context
import re
pattern = re.compile(f'.{{50}}{re.escape(search_term)}.{{50}}', re.IGNORECASE)
matches = pattern.findall(full_transcript)

print(f"Found {len(matches)} occurrences of '{search_term}':")
print("="*70)
for i, match in enumerate(matches[:20], 1):  # Show first 20
    print(f"{i}. ...{match}...")
    print()