# Install the necessary libraries mentioned in Cell 1.

In [None]:
# Cell 1
!pip install yt-dlp
!pip install pydub
!pip install openai
!pip install git+https://github.com/openai/whisper.git
!pip install ipywidgets
!pip install srt
!apt-get update
!apt-get install -y ffmpeg
!pip install concurrent.futures

# The following cell will import required libraries and has functions to:


1.   Create the required directory to store the files being generated.
2.   Download the audio from Youtube or download the video from Drive. (Ensure the drive link has edit access for all.)
3.  Once the audio files are downloaded/ generated and stored in the system, the program will use whisper to generate the transcript for the given audio file.
4.   Once the transcript is generated, the SRT library generates the captions file.
5.   Post this the transcript is shared to GPT model via API to generate the chapters for youtube descriptions and the transcript along with the keywords and title of the blog are shared with GPT model to generate the blog file.
6. GPT Model: gpt-4-turbo, prompt can be adjusted as per requirement.






In [2]:
# Cell 2
import yt_dlp
from pydub import AudioSegment
import whisper
import torch
import os
import shutil
import srt
from datetime import timedelta
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
from openai import OpenAI
import concurrent.futures

# Create directories
os.makedirs('sample_data', exist_ok=True)

def download_youtube_audio(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': 'sample_data/audio.%(ext)s',  # Save with fixed name
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])
    print("Downloaded YouTube audio.")

def download_drive_video(file_id):
    # Using direct download URL for publicly accessible Google Drive files
    download_url = f"https://drive.google.com/uc?id={file_id}&export=download"
    os.system(f"wget -O sample_data/video.mp4 '{download_url}'")
    print("Downloaded Google Drive video.")

    audio = AudioSegment.from_file('sample_data/video.mp4')
    audio.export('sample_data/audio.mp3', format='mp3')
    print("Extracted audio from video.")

def save_file(content, filename):
    with open(filename, 'w') as file:
        file.write(content)
    print(f"Saved file: {filename}")

def generate_transcript(file_path):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = whisper.load_model("base", device=device)
    result = model.transcribe(file_path)
    transcript = result['text']
    save_file(transcript, 'sample_data/Transcripts.txt')
    return transcript, result['segments']

def generate_captions(segments):
    subtitles = []
    for i, segment in enumerate(segments):
        start = timedelta(seconds=segment['start'])
        end = timedelta(seconds=segment['end'])
        content = segment['text']
        subtitles.append(srt.Subtitle(index=i, start=start, end=end, content=content))
    srt_content = srt.compose(subtitles)
    save_file(srt_content, 'sample_data/Captions.srt')
    return srt_content

def generate_youtube_description(transcript, api_key):
    client = OpenAI(api_key=api_key)
    prompt = f"Based on the following transcript, identify the main topics and create a YouTube description with chapters:\n\nTranscript:\n{transcript}"

    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500
    )
    description_content = response.choices[0].message.content.strip()
    save_file(description_content, 'sample_data/Youtube_Description.txt')
    return description_content

def generate_blog(transcript, keywords, title, api_key):
    client = OpenAI(api_key=api_key)
    prompt = f"Title: {title}\nKeywords: {keywords}\n\nTranscript:\n{transcript}\n\nPlease write an SEO-optimized blog post based on the above transcript, using the provided title and keywords. The blog post should be around 5000 words and should not include timestamps."

    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2048
    )
    blog_content = response.choices[0].message.content.strip()
    save_file(blog_content, 'sample_data/Blog.txt')
    return blog_content

def update_output(output_widget, message):
    with output_widget:
        clear_output(wait=True)
        display(HTML(f"<p>{message}</p>"))

# Create and display UI



*   Run the cell to generate the frontend interaction.
*   The frontend form will ask for the OpenAI API key, link to the video, source of video (drive/ youtube), blog title and the keywords to be used in the blog.



In [None]:
# Cell 3
api_key_input = widgets.Text(
    value='',
    placeholder='Enter your OpenAI API key',
    description='API Key:',
    disabled=False
)

url_input = widgets.Text(
    value='',
    placeholder='Enter the video URL',
    description='Video URL:',
    disabled=False
)

file_type_input = widgets.Dropdown(
    options=['youtube', 'drive'],
    value='youtube',
    description='File Type:',
    disabled=False,
)

title_input = widgets.Text(
    value='',
    placeholder='Enter the blog title',
    description='Blog Title:',
    disabled=False
)

keywords_input = widgets.Text(
    value='',
    placeholder='Enter keywords, separated by commas',
    description='Keywords:',
    disabled=False
)

submit_button = widgets.Button(
    description='Submit',
    disabled=False,
    button_style='success',
    tooltip='Submit the form',
    icon='check'
)

output = widgets.Output()

def on_submit(b):
    with output:
        output.clear_output()
        api_key = api_key_input.value
        url = url_input.value
        file_type = file_type_input.value
        keywords = keywords_input.value
        title = title_input.value

        # Validate inputs
        if not api_key or not url or not keywords or not title:
            update_output(output, "Please fill in all fields.")
            return

        # Process video
        try:
            os.makedirs('sample_data', exist_ok=True)
            update_output(output, "Downloading audio...")
            if file_type == 'youtube':
                download_youtube_audio(url)
                file_path = 'sample_data/audio.mp3'
            elif file_type == 'drive':
                file_id = url.split('id=')[1]
                download_drive_video(file_id)
                file_path = 'sample_data/audio.mp3'

            # Ensure the file exists before proceeding
            if not os.path.exists(file_path):
                update_output(output, f"Error: {file_path} does not exist.")
                return

            update_output(output, "Generating transcript...")
            transcript, segments = generate_transcript(file_path)

            update_output(output, "Generating captions...")
            captions = generate_captions(segments)

            with concurrent.futures.ThreadPoolExecutor() as executor:
                future_description = executor.submit(generate_youtube_description, transcript, api_key)
                future_blog = executor.submit(generate_blog, transcript, keywords, title, api_key)

                youtube_description = future_description.result()
                blog = future_blog.result()

            update_output(output, "Processing completed.")

            # Display results and download links
            display(HTML(f"<h3>Transcript:</h3><pre>{transcript}</pre>"))
            display(HTML(f"<h3>Captions:</h3><pre>{captions}</pre>"))
            display(HTML(f"<h3>Youtube Description:</h3><pre>{youtube_description}</pre>"))
            display(HTML(f"<h3>Blog:</h3><pre>{blog}</pre>"))
            display(HTML(f'<a href="sample_data/Transcripts.txt" download>Download Transcript</a>'))
            display(HTML(f'<a href="sample_data/Captions.srt" download>Download Captions</a>'))
            display(HTML(f'<a href="sample_data/Youtube_Description.txt" download>Download Youtube Description</a>'))
            display(HTML(f'<a href="sample_data/Blog.txt" download>Download Blog</a>'))
        except Exception as e:
            update_output(output, f"Error processing video: {e}")

submit_button.on_click(on_submit)

# Display UI
display(api_key_input, url_input, file_type_input, title_input, keywords_input, submit_button, output)

# Function to clear the generated files.


In [None]:
# Cell 4
def clear_downloads():
    folder = 'sample_data'
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print(f'Failed to delete {file_path}. Reason: {e}')

# Clear all downloads and generated files
clear_downloads()
print("All downloaded and generated files have been cleared.")