In [None]:
!pip install ipywidgets fpdf youtube-transcript-api transformers torch openai google-generativeai



In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
from fpdf import FPDF
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import pipeline
import re
import torch
import openai
import google.generativeai as genai

In [None]:
# Configure OpenAI API
openai.api_key = "YOUR_OPENAI_API_KEY"

# Configure Google Gemini API
genai.configure(api_key="YOUR_GEMINI_API_KEY")

In [None]:
# Create widgets
youtube_link = widgets.Text(
    placeholder="Enter YouTube URL",
    description="YouTube URL:",
    disabled=False
)

save_pdf = widgets.RadioButtons(
    options=["Yes", "No"],
    description="Download Transcript?",
    disabled=False
)

start_button = widgets.Button(description="Start Analysis")
output = widgets.Output()

In [None]:
def get_youtube_subtitles(video_id):
    """
    Fetch subtitles for a YouTube video using its video ID.
    """
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        text = " ".join([entry['text'] for entry in transcript])
        return text
    except Exception as e:
        print(f" Error fetching subtitles: {e}")
        return None

In [None]:
def clean_text(text):
    """
    Clean the text by removing non-ASCII characters and extra spaces.
    """
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)  # Remove non-ASCII characters
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    return text.strip()

In [None]:
def save_transcript_as_pdf(transcript, filename="transcript.pdf"):
    """
    Save the transcript as a PDF file.
    """
    try:
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        pdf.multi_cell(0, 10, transcript)
        pdf.output(filename)
        print(f"Transcript saved as {filename}")
    except Exception as e:
        print(f" Error saving PDF: {e}")

In [None]:
def load_sentiment_model():
    """
    Load the sentiment analysis model.
    """
    try:
        model_name = "cardiffnlp/twitter-roberta-base-sentiment"
        device = 0 if torch.cuda.is_available() else -1
        sentiment_analyzer = pipeline("sentiment-analysis", model=model_name, device=device)
        print(f" Loaded sentiment model: {model_name}")
        return sentiment_analyzer
    except Exception as e:
        print(f" Error loading {model_name}, using default sentiment model.")
        return pipeline("sentiment-analysis")  # Fallback model

In [None]:
def analyze_sentiment(text, youtube_link):
    """
    Perform sentiment analysis on the transcript.
    """
    print(f" Performing sentiment analysis for: {youtube_link}")

    try:
        sentiment_analyzer = load_sentiment_model()
        label_mapping = {
            "LABEL_0": "NEGATIVE",
            "LABEL_1": "NEUTRAL",
            "LABEL_2": "POSITIVE"
        }

        max_length = 512
        chunks = [text[i:i + max_length] for i in range(0, len(text), max_length)]

        results = []
        for chunk in chunks:
            result = sentiment_analyzer(chunk)
            for res in result:
                res['label'] = label_mapping.get(res['label'], "UNKNOWN")
            results.extend(result)

        positive_score = sum(1 for r in results if r['label'] == 'POSITIVE')
        negative_score = sum(1 for r in results if r['label'] == 'NEGATIVE')
        neutral_score = sum(1 for r in results if r['label'] == 'NEUTRAL')

        overall_sentiment = max(
            [("POSITIVE", positive_score), ("NEGATIVE", negative_score), ("NEUTRAL", neutral_score)],
            key=lambda x: x[1]
        )[0]

        print("\n=== Sentiment Analysis ===")
        print(f"Overall Sentiment: {overall_sentiment}")
        print(f"Positive Score: {positive_score}")
        print(f"Negative Score: {negative_score}")
        print(f"Neutral Score: {neutral_score}")

        return {
            "Overall Sentiment": overall_sentiment,
            "Positive Score": positive_score,
            "Negative Score": negative_score,
            "Neutral Score": neutral_score,
            "Detailed Results": results
        }
    except Exception as e:
        print(f"Error during sentiment analysis: {e}")
        return None

In [None]:
def summarize_text(text, youtube_link):
    print(f" Performing summarization for: {youtube_link}")
    try:
        device = 0 if torch.cuda.is_available() else -1
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)

        # Dynamically adjust max_input_length based on text length
        max_input_length = 1024
        max_summary_length = 150
        min_summary_length = 50

        # Split text into chunks dynamically
        text_length = len(text)
        if text_length <= max_input_length:
            chunks = [text]  # Use the entire text if it's shorter than max_input_length
        else:
            chunks = [text[i:i + max_input_length] for i in range(0, text_length, max_input_length)]

        summaries = []
        for chunk in chunks:
            # Adjust max_length and min_length for small chunks
            chunk_length = len(chunk)
            if chunk_length < max_summary_length:
                max_length = max(10, chunk_length // 2)  # Ensure max_length is at least 10
                min_length = max(5, chunk_length // 4)   # Ensure min_length is at least 5
            else:
                max_length = max_summary_length
                min_length = min_summary_length

            summary = summarizer(chunk, max_length=max_length, min_length=min_length, do_sample=False)
            summaries.append(summary[0]['summary_text'])

        combined_summary = " ".join(summaries)
        final_summary = summarizer(
            combined_summary,
            max_length=max_summary_length,
            min_length=min_summary_length,
            do_sample=False
        )
        final_summary_text = final_summary[0]['summary_text']

        final_summary_text = re.sub(r'\b\w+ heel\b', 'the person', final_summary_text, flags=re.IGNORECASE)
        final_summary_text = re.sub(r'\b\w+ says\b', 'they mention', final_summary_text, flags=re.IGNORECASE)
        final_summary_text = re.sub(r'\b\w+ is\b', 'they are', final_summary_text, flags=re.IGNORECASE)

        print("\n Final Summary Generated")
        return final_summary_text
    except Exception as e:
        print(f" Error during summarization: {e}")
        return None

In [None]:
def analyze_with_gemini(text):
    """
    Analyze the transcript using Google Gemini.
    """
    try:
        # Use the correct model name
        model = genai.GenerativeModel('gemini-1.5-pro-latest')  # Use 'gemini-1.5-pro-001' if preferred

        # Generate content using the model
        response = model.generate_content(f"Analyze the following text and provide insights: {text}")

        # Return the generated text
        return response.text
    except Exception as e:
        print(f" Error analyzing text with Gemini: {e}")
        return None

In [None]:
def on_start_button_click(b):
    with output:
        clear_output()
        print(" Processing... Please wait.")

        try:
            video_id = youtube_link.value.split("v=")[1].split("&")[0]
        except Exception as e:
            print(f" Invalid YouTube URL: {e}")
            return

        # Fetch subtitles
        transcript = get_youtube_subtitles(video_id)
        if not transcript:
            print("Failed to fetch subtitles.")
            return

        transcript = clean_text(transcript)
        print(f"Subtitles fetched successfully!\n")

        # Perform sentiment analysis
        sentiment_results = analyze_sentiment(transcript, youtube_link.value)
        if sentiment_results:
            print("\n=== Sentiment Analysis ===")
            print(f"Overall Sentiment: {sentiment_results['Overall Sentiment']}")
            print(f"Positive Score: {sentiment_results['Positive Score']}")
            print(f"Negative Score: {sentiment_results['Negative Score']}")
            print(f"Neutral Score: {sentiment_results['Neutral Score']}")

        # Perform summarization
        summary = summarize_text(transcript, youtube_link.value)
        if summary:
            print("\n=== Summary ===")
            print(summary)
        else:
            print("Summarization failed.")

        # Analyze with Gemini
        gemini_insights = analyze_with_gemini(transcript)
        if gemini_insights:
            print("\n=== Gemini Insights ===")
            print(gemini_insights)

        # Save as PDF if requested
        if save_pdf.value == "Yes":
            save_transcript_as_pdf(transcript)
        else:
            print("\n📄 Transcript not saved as PDF.")

In [None]:
# Attach the function to the button
start_button.on_click(on_start_button_click)

# Display widgets
display(youtube_link, save_pdf, start_button, output)

Text(value='https://www.youtube.com/watch?v=7AltDNiJ1gs', description='YouTube URL:', placeholder='Enter YouTu…

RadioButtons(description='Download Transcript?', options=('Yes', 'No'), value='Yes')

Button(description='Start Analysis', style=ButtonStyle())

Output()

In [None]:
def list_available_models():
    """
    List all available models and their supported methods.
    """
    try:
        models = genai.list_models()
        for model in models:
            print(f"Model Name: {model.name}")
            print(f"Supported Methods: {model.supported_generation_methods}")
            print("-" * 40)
    except Exception as e:
        print(f" Error listing models: {e}")

# Call the function to list models
list_available_models()

Model Name: models/chat-bison-001
Supported Methods: ['generateMessage', 'countMessageTokens']
----------------------------------------
Model Name: models/text-bison-001
Supported Methods: ['generateText', 'countTextTokens', 'createTunedTextModel']
----------------------------------------
Model Name: models/embedding-gecko-001
Supported Methods: ['embedText', 'countTextTokens']
----------------------------------------
Model Name: models/gemini-1.0-pro-vision-latest
Supported Methods: ['generateContent', 'countTokens']
----------------------------------------
Model Name: models/gemini-pro-vision
Supported Methods: ['generateContent', 'countTokens']
----------------------------------------
Model Name: models/gemini-1.5-pro-latest
Supported Methods: ['generateContent', 'countTokens']
----------------------------------------
Model Name: models/gemini-1.5-pro-001
Supported Methods: ['generateContent', 'countTokens', 'createCachedContent']
----------------------------------------
Model Name:

In [None]:
!pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib ipywidgets



In [None]:
from googleapiclient.discovery import build

In [None]:
# Step 1: Install Required Libraries
!pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib ipywidgets google-generativeai

# Step 2: Import Libraries
import ipywidgets as widgets
from IPython.display import display, clear_output
from googleapiclient.discovery import build
import google.generativeai as genai

# Step 3: Configure APIs
# Replace with your actual API keys
YOUTUBE_API_KEY = "YOUR_YOUTUBE_API_KEY"  # YouTube Data API key
GEMINI_API_KEY = "YOUR_GEMINI_API_KEY"  # Google Gemini API key

# Configure Google Gemini API
genai.configure(api_key=GEMINI_API_KEY)

# Step 4: Define Functions

def fetch_video_metadata(video_id, api_key):
    """
    Fetch video metadata (title, description, thumbnails) using YouTube Data API.
    """
    try:
        # Initialize the YouTube Data API client
        youtube = build('youtube', 'v3', developerKey=api_key)

        # Fetch video details
        request = youtube.videos().list(
            part='snippet',
            id=video_id
        )
        response = request.execute()

        if not response['items']:
            print(" No metadata found for the video.")
            return None

        metadata = response['items'][0]['snippet']
        return {
            'title': metadata['title'],
            'description': metadata['description'],
            'thumbnail_url': metadata['thumbnails']['default']['url']
        }
    except Exception as e:
        print(f" Error fetching video metadata: {e}")
        return None

def analyze_metadata_with_gemini(metadata):
    """
    Analyze video metadata using Google Gemini.
    """
    try:
        # Use the correct model name
        model = genai.GenerativeModel('gemini-1.5-pro-latest')  # Use 'gemini-1.5-pro-001' if preferred

        # Create a prompt for multimodal analysis
        prompt = f"""
        Analyze the following video metadata and provide insights:
        - Title: {metadata['title']}
        - Description: {metadata['description']}
        - Thumbnail URL: {metadata['thumbnail_url']}
        """

        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        print(f" Error analyzing metadata with Gemini: {e}")
        return None

# Step 5: Create Widgets for User Input
youtube_link = widgets.Text(
    placeholder="Enter YouTube URL",
    description="YouTube URL:",
    disabled=False
)

start_button = widgets.Button(description="Start Multimodal Analysis")
output = widgets.Output()

# Step 6: Handle Button Click
def on_start_button_click(b):
    with output:
        clear_output()
        print(" Processing... Please wait.")

        try:
            # Extract video ID from the URL
            video_id = youtube_link.value.split("v=")[1].split("&")[0]
            print(f" Extracted Video ID: {video_id}")
        except Exception as e:
            print(f" Invalid YouTube URL: {e}")
            return

        # Fetch video metadata
        metadata = fetch_video_metadata(video_id, YOUTUBE_API_KEY)
        if not metadata:
            print(" Failed to fetch video metadata.")
            return

        print(f"Video metadata fetched successfully!\n")
        print(f"Title: {metadata['title']}")
        print(f"Description: {metadata['description']}")
        print(f"Thumbnail URL: {metadata['thumbnail_url']}")

        # Analyze metadata with Gemini
        gemini_insights = analyze_metadata_with_gemini(metadata)
        if gemini_insights:
            print("\n=== Gemini Insights ===")
            print(gemini_insights)
        else:
            print(" Failed to analyze metadata with Gemini.")

# Step 7: Attach Button Click Handler and Display Widgets
start_button.on_click(on_start_button_click)

# Display widgets
display(youtube_link, start_button, output)



Text(value='', description='YouTube URL:', placeholder='Enter YouTube URL')

Button(description='Start Multimodal Analysis', style=ButtonStyle())

Output()

In [None]:
!pip install fastapi uvicorn google-api-python-client google-auth-httplib2 google-auth-oauthlib google-generativeai transformers torch

Collecting fastapi
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting starlette<0.47.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.46.1-py3-none-any.whl.metadata (6.2 kB)
Downloading fastapi-0.115.11-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading starlette-0.46.1-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, starlette, fastapi
Successfully installed fastapi-0.115.11 starlette-0.46.1 uvicorn-0.34.0


In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [None]:
# Step 2: Run the API (Optimized)
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import pipeline
import asyncio
import google.generativeai as genai
from googleapiclient.discovery import build
import nest_asyncio
from pyngrok import ngrok
import uvicorn

# Allow nested asyncio for Colab
nest_asyncio.apply()

# Initialize FastAPI app
app = FastAPI()

# Configure APIs
YOUTUBE_API_KEY = "YOUR_YOUTUBE_API_KEY"  # Replace with your YouTube Data API key
GEMINI_API_KEY = "YOUR_GEMINI_API_KEY"  # Replace with your Google Gemini API key
genai.configure(api_key=GEMINI_API_KEY)

# Configure ngrok
NGROK_AUTHTOKEN = "YOUR_ngrok authtoken"  # Replace with your ngrok authtoken
ngrok.set_auth_token(NGROK_AUTHTOKEN)


class YouTubeRequest(BaseModel):
    youtube_url: str


async def get_youtube_subtitles(video_id: str):
    """ Fetch YouTube subtitles asynchronously. """
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([entry["text"] for entry in transcript])
    except Exception:
        return None


async def fetch_video_metadata(video_id: str, api_key: str):
    """ Fetch YouTube video metadata asynchronously. """
    try:
        youtube = build("youtube", "v3", developerKey=api_key)
        request = youtube.videos().list(part="snippet,statistics", id=video_id)
        response = request.execute()
        return response.get("items", [None])[0]
    except Exception:
        return None


async def analyze_sentiment(text: str):
    """ Perform sentiment analysis asynchronously. """
    sentiment_pipeline = pipeline("sentiment-analysis")
    return sentiment_pipeline(text[:512])  # Limiting input to avoid processing overhead


async def summarize_text(text: str):
    """ Summarize text using transformers pipeline asynchronously. """
    summarization_pipeline = pipeline("summarization")
    return summarization_pipeline(text[:1024])  # Limiting input size to speed up execution


async def analyze_metadata_with_gemini(metadata):
    """ Analyze metadata with Gemini AI asynchronously. """
    try:
        response = genai.generate_text(f"Analyze the following video metadata: {metadata}")
        return response
    except Exception:
        return None


@app.post("/analyze")
async def analyze_youtube_video(request: YouTubeRequest):
    """ Handles YouTube video analysis request asynchronously. """
    try:
        video_id = request.youtube_url.split("v=")[1].split("&")[0]
    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Invalid YouTube URL: {e}")

    # Run tasks in parallel using asyncio.gather()
    transcript, metadata = await asyncio.gather(
        get_youtube_subtitles(video_id),
        fetch_video_metadata(video_id, YOUTUBE_API_KEY),
    )

    if not transcript:
        raise HTTPException(status_code=404, detail="Failed to fetch subtitles.")

    if not metadata:
        raise HTTPException(status_code=404, detail="Failed to fetch video metadata.")

    # Run text processing tasks concurrently
    sentiment_results, summary, gemini_insights = await asyncio.gather(
        analyze_sentiment(transcript),
        summarize_text(transcript),
        analyze_metadata_with_gemini(metadata),
    )

    return {
        "transcript": transcript,
        "sentiment_analysis": sentiment_results,
        "summary": summary,
        "metadata": metadata,
        "gemini_insights": gemini_insights
    }


# Run the API with ngrok
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)

# Run Uvicorn in a background thread
import threading

def run_uvicorn():
    uvicorn.run(app, host="0.0.0.0", port=8000, loop="asyncio")

threading.Thread(target=run_uvicorn, daemon=True).start()
