<a href="https://colab.research.google.com/github/ShawneilRodrigues/ChunkSeek/blob/main/vidoe_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install yt-dlp moviepy requests

import os
import yt_dlp
from moviepy.editor import *
import requests
from google.colab import userdata

# Step 1 & 2: Download YouTube video and extract audio
def download_youtube_audio(url, output_path):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }],
        'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        filename = ydl.prepare_filename(info)
        return os.path.splitext(filename)[0] + '.wav'

# Step 3: Convert audio to text using Deepgram API
def audio_to_text(audio_path, deepgram_api_key):
    url = "https://api.deepgram.com/v1/listen"

    with open(audio_path, 'rb') as audio:
        response = requests.post(
            url,
            headers={
                "Authorization": f"Token {deepgram_api_key}",
                "Content-Type": "audio/wav"
            },
            data=audio
        )

    if response.status_code != 200:
        raise Exception(f"Deepgram API error: {response.status_code}, {response.text}")

    return response.json()['results']['channels'][0]['alternatives'][0]['transcript']

# Step 4: Generate audio from text using Eleven Labs API
def text_to_speech(text, eleven_labs_api_key, voice_id):
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"

    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": eleven_labs_api_key
    }

    data = {
        "text": text,
        "model_id": "eleven_monolingual_v1",
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }

    response = requests.post(url, json=data, headers=headers)

    if response.status_code != 200:
        raise Exception(f"Eleven Labs API error: {response.status_code}, {response.text}")

    return response.content

# Main execution
def main():
    # Replace with your actual API keys and voice ID
    DEEPGRAM_API_KEY = userdata.get('deepgram')
    ELEVEN_LABS_API_KEY = userdata.get('Eleven')
    VOICE_ID = 'pNInz6obpgDQGcFmaJgB'

    # YouTube video URL
    video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Replace with your video URL

    # Output paths
    output_dir = "/content"
    audio_path = os.path.join(output_dir, "extracted_audio.wav")
    final_audio_path = os.path.join(output_dir, "final_audio.mp3")

    try:
        # Step 1 & 2: Download video and extract audio
        print("Downloading video and extracting audio...")
        audio_path = download_youtube_audio(video_url, output_dir)
        print(f"Audio saved to: {audio_path}")

        # Step 3: Convert audio to text
        print("Converting audio to text...")
        transcript = audio_to_text(audio_path, DEEPGRAM_API_KEY)
        print(f"Transcript: {transcript}")

        # Step 4: Generate new audio from text
        print("Generating new audio...")
        audio_content = text_to_speech(transcript, ELEVEN_LABS_API_KEY, VOICE_ID)

        with open(final_audio_path, "wb") as audio_file:
            audio_file.write(audio_content)

        print(f"Process completed. Final audio saved to {final_audio_path}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()

Collecting yt-dlp
  Downloading yt_dlp-2024.10.7-py3-none-any.whl.metadata (171 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/171.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m163.8/171.3 kB[0m [31m5.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m171.3/171.3 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting brotli (from yt-dlp)
  Downloading Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.5 kB)
Collecting mutagen (from yt-dlp)
  Downloading mutagen-1.47.0-py3-none-any.whl.metadata (1.7 kB)
Collecting pycryptodomex (from yt-dlp)
  Downloading pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting websockets>=13.0 (from yt-dlp)
  Downloading websockets-13.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x8

  if event.key is 'enter':



Downloading video and extracting audio...
[youtube] Extracting URL: https://www.youtube.com/watch?v=dQw4w9WgXcQ
[youtube] dQw4w9WgXcQ: Downloading webpage
[youtube] dQw4w9WgXcQ: Downloading ios player API JSON
[youtube] dQw4w9WgXcQ: Downloading mweb player API JSON
[youtube] dQw4w9WgXcQ: Downloading player a4d559ec
[youtube] dQw4w9WgXcQ: Downloading m3u8 information
[info] dQw4w9WgXcQ: Downloading 1 format(s): 251
[download] Destination: /content/Rick Astley - Never Gonna Give You Up (Official Music Video).webm
[download] 100% of    3.28MiB in 00:00:00 at 17.91MiB/s  
[ExtractAudio] Destination: /content/Rick Astley - Never Gonna Give You Up (Official Music Video).wav
Deleting original file /content/Rick Astley - Never Gonna Give You Up (Official Music Video).webm (pass -k to keep)
Audio saved to: /content/Rick Astley - Never Gonna Give You Up (Official Music Video).wav
Converting audio to text...
Transcript: 
Generating new audio...
Process completed. Final audio saved to /content/fin

In [None]:
import os
import yt_dlp
from moviepy.editor import *
import requests
from google.colab import userdata

# Step 1 & 2: Download YouTube video and extract audio
def download_youtube_audio(url, output_path):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
        }],
        'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'),
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        filename = ydl.prepare_filename(info)
        audio_path = os.path.splitext(filename)[0] + '.wav'

        # Check if file exists and has content
        if not os.path.exists(audio_path):
            raise Exception(f"Audio file not created: {audio_path}")
        if os.path.getsize(audio_path) == 0:
            raise Exception(f"Audio file is empty: {audio_path}")

        return audio_path

# Step 3: Convert audio to text using Deepgram API
def audio_to_text(audio_path, deepgram_api_key):
    url = "https://api.deepgram.com/v1/listen"

    with open(audio_path, 'rb') as audio:
        response = requests.post(
            url,
            headers={
                "Authorization": f"Token {deepgram_api_key}",
                "Content-Type": "audio/wav"
            },
            data=audio
        )

    if response.status_code != 200:
        raise Exception(f"Deepgram API error: {response.status_code}, {response.text}")

    transcript = response.json()['results']['channels'][0]['alternatives'][0]['transcript']
    if not transcript:
        raise Exception("Deepgram returned an empty transcript")

    return transcript

# Step 4: Generate audio from text using Eleven Labs API
def text_to_speech(text, eleven_labs_api_key, voice_id):
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"

    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": eleven_labs_api_key
    }

    data = {
        "text": text,
        "model_id": "eleven_monolingual_v1",
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }

    response = requests.post(url, json=data, headers=headers)

    if response.status_code != 200:
        raise Exception(f"Eleven Labs API error: {response.status_code}, {response.text}")

    if len(response.content) == 0:
        raise Exception("Eleven Labs returned empty audio content")

    return response.content

# Main execution
def main():
    # Replace with your actual API keys and voice ID
    DEEPGRAM_API_KEY = userdata.get('deepgram')
    ELEVEN_LABS_API_KEY = userdata.get('Eleven')
    VOICE_ID = 'pNInz6obpgDQGcFmaJgB'

    # YouTube video URL
    video_url = "https://www.youtube.com/watch?v=dQw4w9WgXcQ"  # Replace with your video URL

    # Output paths
    output_dir = "/content"
    final_audio_path = os.path.join(output_dir, "final_audio.mp3")

    try:
        # Step 1 & 2: Download video and extract audio
        print("Downloading video and extracting audio...")
        audio_path = download_youtube_audio(video_url, output_dir)
        print(f"Audio saved to: {audio_path}")

        # Step 3: Convert audio to text
        print("Converting audio to text...")
        transcript = audio_to_text(audio_path, DEEPGRAM_API_KEY)
        print(f"Transcript: {transcript}")

        # Step 4: Generate new audio from text
        print("Generating new audio...")
        audio_content = text_to_speech(transcript, ELEVEN_LABS_API_KEY, VOICE_ID)

        with open(final_audio_path, "wb") as audio_file:
            audio_file.write(audio_content)

        # Check if final audio file exists and has content
        if not os.path.exists(final_audio_path):
            raise Exception(f"Final audio file not created: {final_audio_path}")
        if os.path.getsize(final_audio_path) == 0:
            raise Exception(f"Final audio file is empty: {final_audio_path}")

        print(f"Process completed. Final audio saved to {final_audio_path}")

    except Exception as e:
        print(f"An error occurred: {str(e)}")

if __name__ == "__main__":
    main()

Downloading video and extracting audio...
[youtube] Extracting URL: https://www.youtube.com/watch?v=dQw4w9WgXcQ
[youtube] dQw4w9WgXcQ: Downloading webpage
[youtube] dQw4w9WgXcQ: Downloading ios player API JSON
[youtube] dQw4w9WgXcQ: Downloading mweb player API JSON
[youtube] dQw4w9WgXcQ: Downloading m3u8 information
[info] dQw4w9WgXcQ: Downloading 1 format(s): 251
[download] Destination: /content/Rick Astley - Never Gonna Give You Up (Official Music Video).webm
[download] 100% of    3.28MiB in 00:00:00 at 21.06MiB/s  
[ExtractAudio] Destination: /content/Rick Astley - Never Gonna Give You Up (Official Music Video).wav
Deleting original file /content/Rick Astley - Never Gonna Give You Up (Official Music Video).webm (pass -k to keep)
Audio saved to: /content/Rick Astley - Never Gonna Give You Up (Official Music Video).wav
Converting audio to text...
An error occurred: Deepgram returned an empty transcript


In [None]:
import os
import requests
import json

def audio_to_text(audio_path, deepgram_api_key):
    url = "https://api.deepgram.com/v1/listen"

    with open(audio_path, 'rb') as audio:
        try:
            response = requests.post(
                url,
                headers={
                    "Authorization": f"Token {deepgram_api_key}",
                    "Content-Type": "audio/wav"
                },
                data=audio
            )

            response.raise_for_status()  # Raises an HTTPError for bad responses

            result = response.json()

            # Check if the expected keys exist in the response
            if 'results' not in result or 'channels' not in result['results'] or len(result['results']['channels']) == 0:
                raise KeyError("Unexpected response structure from Deepgram API")

            transcript = result['results']['channels'][0]['alternatives'][0]['transcript']

            if not transcript:
                print("Warning: Deepgram returned an empty transcript")

            return transcript

        except requests.exceptions.RequestException as e:
            print(f"HTTP Request failed: {e}")
            if response.text:
                print(f"Response content: {response.text}")
            raise
        except json.JSONDecodeError:
            print("Failed to decode JSON response from Deepgram API")
            print(f"Response content: {response.text}")
            raise
        except KeyError as e:
            print(f"KeyError: {e}")
            print(f"Actual response structure: {json.dumps(result, indent=2)}")
            raise
        except Exception as e:
            print(f"Unexpected error: {e}")
            raise

# The rest of your script remains the same
from IPython.display import Audio

# After extracting the audio
print("Playing extracted audio:")
display(Audio('/content/Rick Astley - Never Gonna Give You Up (Official Music Video).wav'))

# After converting to text
print("Converting audio to text:")
transcript = audio_to_text(audio_path, deepgram_api_key)
print(f"Transcript: {transcript}")

Playing extracted audio:
Buffered data was truncated after reaching the output size limit.