<a href="https://colab.research.google.com/github/0xM4sk/pub/blob/main/Gpt3_5Summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google-api-python-client
!pip install youtube-transcript-api
!pip install transformers
!pip install gradio
!pip install torch
!pip install openai

In [None]:
import gradio as gr
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
import torch
import isodate
import re
import openai

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

SYSTEM_PROMPT = """
Your task is to deliver an in-depth analysis of video transcripts, offering nuanced insights that are easily digestible. Focus on a detailed exploration of the content, with particular emphasis on explaining terminology and providing a thorough section-by-section summary.
"""

USER_PROMPT_TEMPLATE = """
Your task is to provide an in-depth analysis of a provided video transcript, structured to both inform and engage readers. Your narrative should unfold with clarity and insight, reflecting the style of a Paul Graham essay. Follow these major headings for organization:

# Intro
Begin with a narrative introduction that captivates the reader, setting the stage for an engaging exploration of bilingualism. Start with an anecdote or a surprising fact to draw in the reader, then succinctly summarize the main themes and objectives of the video.

# ELI5
Immediately follow with an ELI5 (Explain Like I'm 5) section. Use simple language and analogies to make complex ideas accessible and engaging, ensuring clarity and simplicity.

# Terminologies
- List and define key terminologies mentioned in the video in bullet points. Provide comprehensive yet understandable definitions for someone not familiar with the subject matter. Ensure this section naturally transitions from the ELI5, enriching the reader's understanding without overwhelming them.

# Summary
Your summary should unfold as a detailed and engaging narrative essay, deeply exploring the content of the video. This section is the core of your analysis and should be both informative and thought-provoking. When crafting your summary, delve deeply into the video’s main themes. Provide a comprehensive analysis of each theme, backed by examples from the video and relevant research in the field. This section should read as a compelling essay, rich in detail and analysis, that not only informs the reader but also stimulates a deeper consideration of the topic's nuances and complexities. Strive for a narrative that is as enriching and engaging as it is enlightening. Please include headings and subheadings to organize your analysis effectively if needed. It should be as detailed and comprehensive as possible.

# Takeaways
- End with actionable takeaways in bullet points, offering practical advice or steps based on the video content. These should relate directly to the insights discussed in your essay and highlight their real-world relevance and impact.
\n\n\nText: {}:"""

# Load OpenAI API key
openai.api_key = 'YOUR_OPENAI_API_KEY'

# Function to get video ID from a YouTube URL
def get_video_id(url):
    from urllib.parse import urlparse, parse_qs
    query = urlparse(url).query
    params = parse_qs(query)
    return params.get('v', [None])[0]

# Function to retrieve the transcript
def get_transcript(video_url):
    try:
        video_id = get_video_id(video_url)
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return ' '.join([item['text'] for item in transcript])
    except Exception as e:
        raise ValueError(f"Error retrieving transcript: {e}")

# Function to preprocess text
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)  # Remove extra whitespace
    text = re.sub(r'(?<!\.)\n', '. ', text)  # Ensure proper punctuation
    text = re.sub(r'\.\s*\.', '.', text)  # Remove double periods
    return text

# Function to summarize the transcript using OpenAI API
def summarize_transcript(transcript, system_prompt, user_prompt_template):
    summarize_prompt = user_prompt_template.format(transcript)

    response = openai.ChatCompletion.create(
        model="gpt-4",  # Use the appropriate model name
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": summarize_prompt},
        ],
        temperature=0,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
    )
    return response['choices'][0]['message']['content']

# Gradio function to summarize YouTube video transcript
def summarize_youtube_video(url):
    try:
        # Extract video ID
        video_id = get_video_id(url)

        # Get video details
        api_key = 'YOUR_YOUTUBE_API_KEY'
        youtube = build('youtube', 'v3', developerKey=api_key)
        request = youtube.videos().list(part="contentDetails", id=video_id)
        response = request.execute()
        duration = response['items'][0]['contentDetails']['duration']
        video_duration_seconds = int(isodate.parse_duration(duration).total_seconds())

        # Fetch the transcript
        transcript_text = get_transcript(url)

        # Preprocess the transcript
        transcript_text = preprocess_text(transcript_text)

        # Summarize the transcript
        final_summary = summarize_transcript(transcript_text, SYSTEM_PROMPT, USER_PROMPT_TEMPLATE)

        return final_summary, f"Video Duration: {video_duration_seconds // 60} minutes and {video_duration_seconds % 60} seconds"

    except Exception as e:
        return str(e), "Error in processing video"

# Gradio interface using the new components
input_url = gr.Textbox(label="YouTube URL")
output_summary = gr.Textbox(label="Detailed Summary", lines=10)
output_duration = gr.Textbox(label="Video Duration")

gr.Interface(
    fn=summarize_youtube_video,
    inputs=[input_url],
    outputs=[output_summary, output_duration],
    title="YouTube Video Summarizer",
    description="Enter a YouTube URL to summarize its transcript with a general overview, main points, and TLDR.",
).launch(share=True, debug=True)
