<a href="https://colab.research.google.com/github/JoshuaStorm1017/FreeTranscriber/blob/main/FreeTrasnscribe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@title 🎙️ Easy Audio/Video Transcriber (with URL support) 📝
#@markdown 1. Run this cell (click the "play" button or press Shift+Enter).
#@markdown 2. It will install necessary tools (may take a minute or two the first time).
#@markdown 3. Choose your **Input Method**: "Upload File" or "Enter URL".
#@markdown 4.  - If "Upload File": Click "Choose Files" to upload your file.
#@markdown     - If "Enter URL": Paste the direct link to your audio/video file in the "File URL" box.
#@markdown 5. Select the **Media Type** (Audio or Video).
#@markdown 6. Click the "Transcribe" button.
#@markdown 7. Wait for the magic! The transcript will appear below, and a download link for a .txt file will be provided.

# --- 1. Install necessary libraries ---
# -q for "quiet" installation
print("Installing necessary libraries... This might take a minute or two the first time.")
!pip install -q openai-whisper moviepy requests
!apt-get -qq install ffmpeg # Ensure ffmpeg is available for moviepy

import whisper
import moviepy.editor as mp
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
from google.colab import files
import os
import datetime
import requests # For downloading from URL
import urllib.parse # For parsing URLs
import torch # To check for GPU availability for fp16

# --- 2. Global variables and helper functions ---
MODEL_NAME = "small" # "tiny", "base", "small", "medium", "large". "small" is a good balance.
UPLOADED_FILE_INFO = {} # Store uploaded file data from FileUpload widget
DOWNLOADED_FILE_PATH_FROM_URL = None # Store path of file downloaded from URL
TRANSCRIPT_TEXT = ""
model = None # To load the model only once

def load_whisper_model():
    global model
    if model is None:
        print(f"Loading Whisper model ('{MODEL_NAME}')... This can take some time, especially for larger models.")
        try:
            model = whisper.load_model(MODEL_NAME)
            print(f"Whisper model '{MODEL_NAME}' loaded successfully!")
        except Exception as e:
            print(f"Error loading Whisper model: {e}")
            print("This might be due to insufficient RAM/GPU memory. Try a smaller model (e.g., 'tiny' or 'base')")
            print("Or, go to Runtime > Change runtime type and ensure a GPU is selected (if available).")
            model = None
    return model

def get_formatted_timestamp():
    return datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

def get_filename_from_url(url):
    try:
        parsed_url = urllib.parse.urlparse(url)
        filename = os.path.basename(parsed_url.path)
        if not filename: # If URL ends with / or has no clear filename
            # Try to get it from content-disposition header if we were to download,
            # but for now, just create a generic one.
            content_type = requests.head(url, timeout=10).headers.get('content-type', '').split('/')[0]
            extension_map = {'audio': 'mp3', 'video': 'mp4', 'application': 'bin'} # very basic map
            ext = extension_map.get(content_type, 'dat')
            filename = f"downloaded_media_{get_formatted_timestamp()}.{ext}"
        return filename
    except Exception:
        return f"downloaded_media_{get_formatted_timestamp()}"


# --- 3. Widget Setup ---
input_method_radio = widgets.RadioButtons(
    options=['Upload File', 'Enter URL'],
    description='Input Method:',
    value='Upload File',
    disabled=False,
)

file_type_dropdown = widgets.Dropdown(
    options=['Audio', 'Video'],
    value='Audio', # Default, user should pick based on their file
    description='Media Type:',
    disabled=False,
)

file_uploader = widgets.FileUpload(
    accept='.mp3,.wav,.m4a,.ogg,.flac,.mp4,.mov,.avi,.mkv',
    multiple=False,
    description='Choose File',
    layout={'display': 'flex'} # Initially visible
)

url_input_text = widgets.Text(
    value='',
    placeholder='e.g., https://example.com/audio.mp3',
    description='File URL:',
    layout={'width': '95%', 'display': 'none'}, # Initially hidden
    disabled=False
)

transcribe_button = widgets.Button(
    description='Transcribe',
    disabled=True,
    button_style='info',
    tooltip='Upload a file or enter a URL, then click to transcribe',
    icon='microphone'
)

download_button = widgets.Button(
    description='Download Transcript (.txt)',
    disabled=True,
    button_style='success',
    tooltip='Download the transcript as a text file',
    icon='download'
)

output_area = widgets.Output()
transcription_display = widgets.Textarea(
    value='',
    placeholder='Transcription will appear here...',
    description='Transcript:',
    layout={'height': '200px', 'width': '95%'},
    disabled=True
)

# --- 4. Event Handlers ---
def on_input_method_change(change):
    """Shows/hides UI elements based on input method selection."""
    global UPLOADED_FILE_INFO, DOWNLOADED_FILE_PATH_FROM_URL

    if change.new == 'Upload File':
        file_uploader.layout.display = 'flex'
        url_input_text.layout.display = 'none'
        DOWNLOADED_FILE_PATH_FROM_URL = None # Clear any downloaded file path info
        if url_input_text.value: # Clear the text if user switches back
            url_input_text.value = ""

        # UPLOADED_FILE_INFO retains its state from before switching.
        # If it's empty (e.g., first time, or after URL mode cleared it), button is disabled.
        transcribe_button.disabled = not bool(UPLOADED_FILE_INFO)

        # If the file_uploader widget still visually shows a file (from before switching modes)
        # but our UPLOADED_FILE_INFO is empty (because we switched to URL then back),
        # inform the user.
        if not UPLOADED_FILE_INFO and file_uploader.value:
            with output_area:
                clear_output(wait=True) # Clear other messages from output_area
                print("Switched to 'Upload File' mode. If a file name is shown from a previous selection, please re-select it or choose a new file to activate the 'Transcribe' button.")

    elif change.new == 'Enter URL':
        file_uploader.layout.display = 'none'
        url_input_text.layout.display = 'flex'
        UPLOADED_FILE_INFO = {} # CRITICAL: Clear our program's state for any previously uploaded file.
        # The FileUpload widget's visual might not clear, but our logic relies on UPLOADED_FILE_INFO.
        transcribe_button.disabled = not url_input_text.value.strip()

    download_button.disabled = True
    transcription_display.value = ""

def on_file_upload_change(change):
    """Handles file upload and enables transcribe button if in 'Upload File' mode."""
    global UPLOADED_FILE_INFO
    with output_area:
        clear_output(wait=True) # Clear previous messages in output_area
        if file_uploader.value: # file_uploader.value is a dict: {'filename': {metadata..., content...}}
            uploaded_file_data_list = list(file_uploader.value.values())
            if uploaded_file_data_list:
                UPLOADED_FILE_INFO = uploaded_file_data_list[0]
                filename = UPLOADED_FILE_INFO['metadata']['name']
                print(f"File '{filename}' selected.")
                if input_method_radio.value == 'Upload File':
                    transcribe_button.disabled = False
            else: # Should be rare if file_uploader.value is truthy
                UPLOADED_FILE_INFO = {}
                if input_method_radio.value == 'Upload File':
                    transcribe_button.disabled = True
                print("File selection event, but no file data processed. Please try again.")
        else: # User cleared the file selection in the browser, or initial state
            UPLOADED_FILE_INFO = {}
            if input_method_radio.value == 'Upload File':
                transcribe_button.disabled = True
            print("No file selected, or selection cleared.")
    download_button.disabled = True
    transcription_display.value = ""

def on_url_input_change(change):
    """Enables transcribe button if a URL is entered and in 'Enter URL' mode."""
    if input_method_radio.value == 'Enter URL':
        transcribe_button.disabled = not change.new.strip() # Enable if URL is not empty
    download_button.disabled = True # Reset download and transcript
    transcription_display.value = ""


def on_transcribe_button_clicked(b):
    global TRANSCRIPT_TEXT, UPLOADED_FILE_INFO, DOWNLOADED_FILE_PATH_FROM_URL

    current_file_to_process = None
    original_filename_for_cleanup = None # Store the path of the file we might need to clean up

    # Disable buttons during processing
    transcribe_button.disabled = True
    download_button.disabled = True
    file_uploader.disabled = True
    url_input_text.disabled = True
    file_type_dropdown.disabled = True
    input_method_radio.disabled = True

    with output_area:
        clear_output(wait=True)

        # --- Determine input source and prepare file ---
        if input_method_radio.value == 'Upload File':
            if not UPLOADED_FILE_INFO:
                print("Error: No file has been selected for upload, or selection was cleared. Please select a file.")
                _reenable_inputs(); return

            filename = UPLOADED_FILE_INFO['metadata']['name']
            content = UPLOADED_FILE_INFO['content']
            # Save uploaded file to a temporary path in Colab for processing
            current_file_to_process = f"/content/{filename}"
            original_filename_for_cleanup = current_file_to_process # Mark for potential cleanup
            with open(current_file_to_process, 'wb') as f:
                f.write(content)
            print(f"Processing uploaded file: {filename}")

        elif input_method_radio.value == 'Enter URL':
            url = url_input_text.value.strip()
            if not url:
                print("Error: No URL provided!")
                _reenable_inputs(); return

            print(f"Attempting to download from URL: {url}")
            try:
                # Use a generic name or try to get from URL; ensure it's unique enough
                filename_from_url = get_filename_from_url(url)
                DOWNLOADED_FILE_PATH_FROM_URL = f"/content/{filename_from_url}"
                current_file_to_process = DOWNLOADED_FILE_PATH_FROM_URL
                original_filename_for_cleanup = DOWNLOADED_FILE_PATH_FROM_URL # Mark for cleanup

                response = requests.get(url, stream=True, timeout=180) # 180s timeout
                response.raise_for_status()
                total_size = int(response.headers.get('content-length', 0))
                downloaded_size = 0

                with open(DOWNLOADED_FILE_PATH_FROM_URL, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=81920): # Increased chunk size
                        f.write(chunk)
                        downloaded_size += len(chunk)
                        if total_size > 0:
                            progress = (downloaded_size / total_size) * 100
                            print(f"Downloading... {downloaded_size / (1024*1024):.2f}MB / {total_size / (1024*1024):.2f}MB ({progress:.1f}%)", end='\r')
                        else:
                            print(f"Downloading... {downloaded_size / (1024*1024):.2f}MB (total size unknown)", end='\r')
                print(f"\nDownloaded successfully to: {DOWNLOADED_FILE_PATH_FROM_URL}   ") # Spaces to clear line
            except requests.exceptions.Timeout:
                print(f"\nError: The download from {url} timed out. Check the URL and your connection.")
                _reenable_inputs(); return
            except requests.exceptions.RequestException as e:
                print(f"\nError downloading from URL {url}: {e}")
                _reenable_inputs(); return
            except Exception as e:
                print(f"\nAn unexpected error occurred during download: {e}")
                _reenable_inputs(); return
        else:
            print("Error: Invalid input method selected.")
            _reenable_inputs(); return

        if not current_file_to_process or not os.path.exists(current_file_to_process):
            print(f"Error: File to process ('{current_file_to_process}') is not available or could not be prepared.")
            _reenable_inputs(); return

        print(f"\nStarting transcription for: {os.path.basename(current_file_to_process)}")
        print("Please be patient, this may take a while...")

        audio_path_to_transcribe = current_file_to_process
        selected_media_type = file_type_dropdown.value

        current_whisper_model = load_whisper_model()
        if not current_whisper_model:
            print("Transcription cannot proceed: Whisper model failed to load.")
            _reenable_inputs(keep_transcribe_disabled_if_no_input=True); return

        try:
            temp_extracted_audio_path = None
            if selected_media_type == 'Video':
                print("Extracting audio from video...")
                video_clip = mp.VideoFileClip(current_file_to_process)
                if not video_clip.audio:
                    print("Error: The video file does not seem to contain an audio track.")
                    _reenable_inputs(keep_transcribe_disabled_if_no_input=True); return

                # Create a unique name for the extracted audio
                base_vid_name = os.path.splitext(os.path.basename(current_file_to_process))[0]
                temp_extracted_audio_path = f"/content/extracted_audio_{base_vid_name}_{get_formatted_timestamp()}.wav"

                video_clip.audio.write_audiofile(temp_extracted_audio_path, codec='pcm_s16le')
                video_clip.close()
                if video_clip.audio: video_clip.audio.close() # Ensure audio resources are freed
                audio_path_to_transcribe = temp_extracted_audio_path
                print(f"Audio extracted to: {audio_path_to_transcribe}")

            print(f"Transcribing '{os.path.basename(audio_path_to_transcribe)}' with Whisper model '{MODEL_NAME}'...")
            use_fp16 = torch.cuda.is_available()
            if use_fp16:
                print("GPU detected. Using FP16 for faster transcription.")
            else:
                print("No GPU detected or not using CUDA. Using FP32. This might be slower.")


            result = current_whisper_model.transcribe(audio_path_to_transcribe, fp16=use_fp16)
            TRANSCRIPT_TEXT = result["text"]

            print("\n--- Transcription Complete! ---")
            transcription_display.value = TRANSCRIPT_TEXT
            download_button.disabled = False

            # Clean up temporary extracted audio file
            if temp_extracted_audio_path and os.path.exists(temp_extracted_audio_path):
                os.remove(temp_extracted_audio_path)
                print(f"Cleaned up temporary audio file: {temp_extracted_audio_path}")

            # Clean up the main processed file (either uploaded or downloaded)
            if original_filename_for_cleanup and os.path.exists(original_filename_for_cleanup):
                 # Only remove if it wasn't the *source* of extracted audio that's still needed for some reason
                 # (though by this point, it shouldn't be).
                 # And ensure it's not the same as temp_extracted_audio_path if that wasn't cleaned for some reason
                if original_filename_for_cleanup != temp_extracted_audio_path :
                    os.remove(original_filename_for_cleanup)
                    print(f"Cleaned up processed file: {original_filename_for_cleanup}")
                    if original_filename_for_cleanup == DOWNLOADED_FILE_PATH_FROM_URL:
                        DOWNLOADED_FILE_PATH_FROM_URL = None # Reset if it was the downloaded file

        except Exception as e:
            print(f"\n--- An error occurred during transcription or audio extraction: ---")
            print(e)
            import traceback
            traceback.print_exc() # Print full traceback for debugging
        finally:
            _reenable_inputs(keep_transcribe_disabled_if_no_input=True)
            # download_button should be enabled only if TRANSCRIPT_TEXT is not empty
            download_button.disabled = not bool(TRANSCRIPT_TEXT)


def _reenable_inputs(keep_transcribe_disabled_if_no_input=False):
    """Helper to re-enable input widgets after processing or error."""
    file_uploader.disabled = False
    url_input_text.disabled = False
    file_type_dropdown.disabled = False
    input_method_radio.disabled = False
    if keep_transcribe_disabled_if_no_input:
        if input_method_radio.value == 'Upload File':
            transcribe_button.disabled = not bool(UPLOADED_FILE_INFO)
        elif input_method_radio.value == 'Enter URL':
            transcribe_button.disabled = not url_input_text.value.strip()
        else: # Should not happen
            transcribe_button.disabled = True
    else: # This case isn't typically used, but for completeness
        transcribe_button.disabled = False


def on_download_button_clicked(b):
    global TRANSCRIPT_TEXT
    if not TRANSCRIPT_TEXT:
        with output_area:
            # clear_output(wait=True) # Let's not clear if there's other important info
            print("No transcript available to download.")
        return

    # No need to clear output_area for download, just offer the file
    # with output_area:
    #     clear_output(wait=True)
    base_filename_for_dl = "transcription"
    # Try to get a base filename from the original input more robustly
    if input_method_radio.value == 'Upload File' and UPLOADED_FILE_INFO and 'metadata' in UPLOADED_FILE_INFO:
        base_filename_for_dl = os.path.splitext(UPLOADED_FILE_INFO['metadata']['name'])[0]
    elif input_method_radio.value == 'Enter URL' and url_input_text.value:
         parsed_url_filename = get_filename_from_url(url_input_text.value)
         base_filename_for_dl = os.path.splitext(parsed_url_filename)[0]

    transcript_filename = f"{base_filename_for_dl}_transcript_{get_formatted_timestamp()}.txt"
    try:
        with open(transcript_filename, "w", encoding="utf-8") as f:
            f.write(TRANSCRIPT_TEXT)
        print(f"Transcript prepared as '{transcript_filename}'. Offering for download...")
        files.download(transcript_filename)
        # Remove the local server-side copy after download is initiated
        # Note: files.download() is asynchronous in some ways with Colab's backend.
        # A short delay or trying to remove it later might be more robust, but usually fine.
        if os.path.exists(transcript_filename):
             # os.remove(transcript_filename) # Optional: clean up .txt file from Colab after download
             pass # Let's keep it for this session for now. Colab cleans up /content on termination.

    except Exception as e:
        with output_area:
            print(f"Error preparing transcript for download: {e}")


# --- 5. Link event handlers to widgets ---
input_method_radio.observe(on_input_method_change, names='value')
file_uploader.observe(on_file_upload_change, names='value')
url_input_text.observe(on_url_input_change, names='value')
transcribe_button.on_click(on_transcribe_button_clicked)
download_button.on_click(on_download_button_clicked)

# --- 6. Display the UI ---
clear_output(wait=True) # Clear installation messages before showing UI

display(HTML("<h2>🎙️ Easy Audio/Video Transcriber (with URL support) 📝</h2>"))
display(HTML("""
<p><b>Instructions:</b></p>
<ol>
    <li>Select your <b>Input Method</b>: "Upload File" or "Enter URL".</li>
    <li>If "Upload File": Click <b>'Choose File'</b> to upload your media.</li>
    <li>If "Enter URL": Paste the direct link to your audio/video file in the <b>'File URL'</b> box.
        (e.g., <code>https://example.com/my_podcast_episode.mp3</code>).
        Make sure it's a direct link to the file, not a webpage containing the file.</li>
    <li>Select the <b>Media Type</b> (Audio or Video) appropriate for your file.</li>
    <li>Click the <b>'Transcribe'</b> button.</li>
    <li>The transcription will appear in the text box below. You can then click <b>'Download Transcript (.txt)'</b>.</li>
</ol>
<p><b>Note:</b> The first time you transcribe, the AI model needs to be downloaded (takes a few minutes). Using a GPU (Runtime > Change runtime type) is highly recommended for speed. Large files or long URLs might take significant time to download and process.</p>
"""))

ui_layout = widgets.VBox([
    input_method_radio,
    file_type_dropdown,
    file_uploader, # Will be shown/hidden by logic
    url_input_text, # Will be shown/hidden by logic
    widgets.HBox([transcribe_button, download_button]),
    output_area,
    transcription_display
])

display(ui_layout)

# Pre-load the model after UI is displayed, messages will go into output_area
if not model:
    with output_area: # Show loading message in the output area
        load_whisper_model()

VBox(children=(RadioButtons(description='Input Method:', options=('Upload File', 'Enter URL'), value='Upload F…

Transcript prepared as 'espplus102_2025_transcript_20250514_162138.txt'. Offering for download...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>