<a href="https://colab.research.google.com/github/TheVoidMonarch/transcribe-me/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🎙️ Audio Transcription with Whisper

This notebook uses OpenAI's Whisper model to transcribe audio files to SRT format.

## 🔧 Features
- Automatic language detection
- GPU acceleration (when available)
- Multiple audio format support
- Progress tracking

## 📝 Instructions
1. Click the "Copy to Drive" button at the top
2. Return to your Drive and open the notebook there
3. Let the automation handle the rest!

## ⚠️ Note
This notebook is designed to work with the automated transcription system.
Please don't modify the cells unless you know what you're doing.

In [None]:
# Install dependencies
!pip install -q openai-whisper==20231117 torch tqdm

# Install ffmpeg if needed
!which ffmpeg > /dev/null || apt install -y ffmpeg > /dev/null

print("✅ Dependencies installed successfully!")

In [None]:
import torch
import whisper
from pathlib import Path
from IPython.display import display, HTML

# Check CUDA
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Set device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load model
print("\n📚 Loading Whisper model...")
model = whisper.load_model("base", device=DEVICE)
print("✅ Model loaded successfully!")

In [None]:
def format_timestamp(seconds: float) -> str:
    """Convert seconds to SRT timestamp format."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = seconds % 60
    msecs = int((secs % 1) * 1000)
    return f"{hours:02d}:{minutes:02d}:{int(secs):02d},{msecs:03d}"

def save_as_srt(segments: list, output_file: str) -> None:
    """Save transcription segments as SRT file."""
    with open(output_file, 'w', encoding='utf-8') as f:
        for i, seg in enumerate(segments, start=1):
            # Write segment number
            f.write(f"{i}\n")

            # Write timestamps
            start = format_timestamp(seg['start'])
            end = format_timestamp(seg['end'])
            f.write(f"{start} --> {end}\n")

            # Write text
            f.write(f"{seg['text'].strip()}\n\n")

def display_progress(text: str, color: str = 'blue') -> None:
    """Display progress message with color."""
    display(HTML(f"<p style='color: {color}'>{text}</p>"))

In [None]:
def transcribe_audio(audio_path: str) -> str:
    """Transcribe audio file and save as SRT."""
    try:
        # Transcribe
        display_progress("🎯 Transcribing audio...")
        result = model.transcribe(
            audio_path,
            verbose=True,
            language=None,  # Auto-detect language
            task="transcribe",
            fp16=torch.cuda.is_available()  # Use FP16 if GPU available
        )

        # Save as SRT
        output_file = str(Path(audio_path).with_suffix('')) + "_transcribed.srt"
        display_progress("💾 Saving transcription...")
        save_as_srt(result["segments"], output_file)

        # Show completion message
        display_progress(f"✅ Transcription saved to: {output_file}", 'green')
        if result.get("language"):
            display_progress(f"🌍 Detected language: {result['language']}", 'green')

        return output_file

    except Exception as e:
        display_progress(f"❌ Error during transcription: {str(e)}", 'red')
        raise

# Handle file upload and transcription
from google.colab import files

display_progress("📤 Please upload an audio file...")
uploaded = files.upload()

for filename in uploaded.keys():
    display_progress(f"\n🎵 Processing: {filename}")
    output_file = transcribe_audio(filename)
    display_progress("\n📥 Downloading transcription...")
    files.download(output_file)