In [None]:
# You will need to install the required libraries first in a Colab cell:
!pip install langchain langchain-openai langchain-community youtube-transcript-api

# --- 1. Import necessary libraries and securely access the API Key ---
import os
from google.colab import userdata
from langchain_community.document_loaders import YoutubeLoader
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnableLambda
from langchain.schema.output_parser import StrOutputParser
from langchain_openai import ChatOpenAI

# Securely get the API key from Colab secrets
# This is the standard and secure way to handle API keys in Colab.
# It replaces the need for python-dotenv and .env files.
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

# It's also a good practice to set it as an environment variable
# for libraries that might implicitly look for it.
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY


# --- 2. Define the YouTube URL to be analyzed ---
YOUTUBE_URL = "https://www.youtube.com/watch?v=8KkKuTCFvzI" # Example: Tim Urban's "Inside the mind of a master procrastinator"


# --- 3. Create a ChatOpenAI model ---
# The model will now use the API key we've set.
model = ChatOpenAI(model="gpt-4")


# --- 4. Define a function to load the video transcript ---
def load_transcript(url: str) -> str:
    """Loads the transcript from a YouTube URL using YoutubeLoader."""
    try:
        loader = YoutubeLoader.from_youtube_url(url, add_video_info=False, language=["en", "id"])
        documents = loader.load()
        return " ".join([doc.page_content for doc in documents])
    except Exception as e:
        print(f"Could not load transcript: {e}")
        return "Transcript not available for this video."


# --- 5. Define the prompt template for the summary ---
summary_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a YouTube video critic. Your goal is to provide a clear and concise summary of the video based on its transcript."),
        ("human", "Please provide a brief summary of the video with the following transcript:\n\n---\n\n{transcript}"),
    ]
)


# --- 6. Create the chain using LangChain Expression Language (LCEL) ---
chain = (
    RunnableLambda(load_transcript)
    | summary_template
    | model
    | StrOutputParser()
)


# --- 7. Run the chain ---
print(f"Generating summary for: {YOUTUBE_URL}\n")
result = chain.invoke(YOUTUBE_URL)

print("--- Summary ---")
print(result)


Collecting langchain-openai
  Downloading langchain_openai-0.3.23-py3-none-any.whl.metadata (2.3 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.25-py3-none-any.whl.metadata (2.9 kB)
Collecting youtube-transcript-api
  Downloading youtube_transcript_api-1.1.0-py3-none-any.whl.metadata (23 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading langchain_core-0.3.65-py3-none-any.whl.metadata (5.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl

In [None]:
# --- 1. Install the official Google API client library ---
!pip install google-api-python-client -q

# --- 2. Import necessary libraries ---
import os
from googleapiclient.discovery import build
from google.colab import userdata
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain_openai import ChatOpenAI

# --- 3. Set up API Keys and Models ---
# Make sure your OPENAI_API_KEY and YOUTUBE_API_KEY are set in Colab Secrets.

# Set up OpenAI key and model (if not already done in a previous cell)
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
model = ChatOpenAI(model="gpt-4")

# Get the YouTube API Key from secrets
YOUTUBE_API_KEY = userdata.get('YOUTUBE_API_KEY')

# Define the video URL
YOUTUBE_URL = "https://www.youtube.com/watch?v=8KkKuTCFvzI"


# --- 4. Function to Fetch Comments using the YouTube Data API ---
def get_video_comments(api_key, video_url, max_results=30):
    """
    Fetches comments from a YouTube video using the YouTube Data API v3.
    """
    comments = []
    try:
        # We need the video ID, which is the part of the URL after "v="
        video_id = video_url.split("=")[1].split("&")[0]

        # Build the YouTube service object
        youtube = build('youtube', 'v3', developerKey=api_key)

        # Make the API request to fetch the comment threads
        request = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            maxResults=max_results,
            textFormat='plainText'
        )
        response = request.execute()

        # Extract the comment text from the response
        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)

        return comments, None  # Return comments and no error

    except Exception as e:
        # Return an empty list and the error message
        return [], str(e)

# --- 5. Fetch and Analyze the comments ---
print("Fetching comments using the YouTube Data API...")
comments_list, error = get_video_comments(YOUTUBE_API_KEY, YOUTUBE_URL)

if error:
    print(f"\nAn error occurred while fetching comments: {error}")
    print("Please check the following:\n1. Your YouTube API Key is correct and active.\n2. The 'YouTube Data API v3' is enabled in your Google Cloud project.\n3. The video has comments enabled.")
else:
    print(f"Successfully fetched {len(comments_list)} comments.")

    # Prepare comments for the LLM if we have any
    if comments_list:
        all_comments_text = "\n\n---\n\n".join(comments_list)

        # Define the prompt template for analysis
        analysis_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", "You are an expert at analyzing social media comments. Your task is to read a list of YouTube comments and provide a summary of the overall sentiment and key discussion points."),
                ("human", """Please analyze the following YouTube comments:

<comments>
{comment_list}
</comments>

Based on these comments, please provide:
1.  **Overall Sentiment:** What is the general feeling (e.g., overwhelmingly positive, mixed, mostly negative)?
2.  **Key Themes:** What are the 2-3 main topics or ideas that people are repeatedly talking about?
3.  **Constructive Feedback or Questions:** Are there any common suggestions or questions being asked in the comments?
4.  **A Memorable Quote:** Pick one comment that you think best represents the overall reaction."""),
            ]
        )

        # Create and run the analysis chain
        analysis_chain = analysis_prompt | model | StrOutputParser()
        print("\nAnalyzing comments with the LLM...")
        analysis_result = analysis_chain.invoke({"comment_list": all_comments_text})

        print("\n--- Comment Analysis ---")
        print(analysis_result)
    else:
        print("\nNo comments were fetched to analyze.")



Fetching comments using the YouTube Data API...
Successfully fetched 30 comments.

Analyzing comments with the LLM...

--- Comment Analysis ---
1. **Overall Sentiment:** The general sentiment appears to be mixed. There are quite a few positive comments about the content, appreciating the insights provided, and expressing personal growth. However, there is also significant criticism concerning the gender exclusivity of the study discussed.
   
2. **Key Themes:** The main discussion points include: (a) the importance of relationships and social bonds in leading a good life and achieving longevity, (b) commentary and reactions to the study discussed in the talk, particularly its focus only on men and not including women, and (c) apparent testimonials related to someone named 'mansaarnault' and the beneficial effects of their rituals on people's lives.

3. **Constructive Feedback or Questions:** The most common feedback and question is about the study's focus only on men and exclusion of w

In [1]:
# @title Web & YouTube Creator Sentiment Analysis Agent

# Step 1: Install necessary packages
!pip install -q langchain langgraph langchain_openai python-dotenv tavily-python google-api-python-client

# Step 2: Import libraries and set up API keys from Colab secrets
import os
import re
from google.colab import userdata
from typing import TypedDict, Annotated, Optional, List
import json
from uuid import uuid4

from langgraph.graph import StateGraph, END, add_messages
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langchain_core.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.memory import MemorySaver

# Set API keys from Colab secrets
try:
    os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
    os.environ["TAVILY_API_KEY"] = userdata.get('TAVILY_API_KEY')
    YOUTUBE_API_KEY = userdata.get('YOUTUBE_API_KEY')
except userdata.SecretNotFoundError:
    print("ERROR: Secrets not found. Please ensure you have 'OPENAI_API_KEY', 'TAVILY_API_KEY', and 'YOUTUBE_API_KEY' set in your Colab secrets (left panel -> 🔑).")
    # Exit the script if keys are not found
    exit()

# Step 3: Define the tools and the state for the graph

# Initialize the Tavily Search tool
search_tool = TavilySearchResults(max_results=10, name="tavily_search_results_json")

def extract_video_id(url: str) -> Optional[str]:
    """Extracts the YouTube video ID from a URL using regex."""
    patterns = [
        r"(?:v=|\/v\/|youtu\.be\/|embed\/|\/v\/|\/e\/|watch\?v=|\?v=)([^#\&\?]*).*",
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    return None

@tool
async def get_youtube_channel_info(video_url: str) -> dict:
    """
    Fetches the channel title (creator's name) for a given YouTube video URL.
    This should be the first tool used to identify the creator.
    """
    from googleapiclient.discovery import build

    video_id = extract_video_id(video_url)
    if not video_id:
        return {"error": "Invalid YouTube URL provided. Could not extract video ID."}

    try:
        youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)

        video_response = youtube.videos().list(
            part='snippet',
            id=video_id
        ).execute()

        if not video_response.get('items'):
            return {"error": "Video not found."}

        channel_id = video_response['items'][0]['snippet']['channelId']

        channel_response = youtube.channels().list(
            part='snippet',
            id=channel_id
        ).execute()

        if not channel_response.get('items'):
            return {"error": "Channel not found."}

        channel_title = channel_response['items'][0]['snippet']['title']
        return {"channel_name": channel_title}

    except Exception as e:
        return {"error": f"An error occurred while fetching video/channel details: {str(e)}"}


class State(TypedDict):
    """The state of our graph, which is a list of messages."""
    messages: Annotated[list, add_messages]

# Step 4: Define the graph components

# Set up the AI model and bind the tools to it
llm = ChatOpenAI(model="gpt-4o")
tools = [search_tool, get_youtube_channel_info]
llm_with_tools = llm.bind_tools(tools)

# Define the nodes of the graph
async def agent_node(state: State):
    """Invokes the LLM to get a response or decide on a tool call."""
    response = await llm_with_tools.ainvoke(state["messages"])
    return {"messages": [response]}

async def tool_node(state: State):
    """Executes the tool calls requested by the agent."""
    last_message = state["messages"][-1]
    if not hasattr(last_message, "tool_calls") or not last_message.tool_calls:
        return {"messages": []}

    tool_messages = []
    for tool_call in last_message.tool_calls:
        tool_name = tool_call["name"]
        tool_to_call = next((t for t in tools if t.name == tool_name), None)

        if not tool_to_call:
            error_message = f"Error: Tool '{tool_name}' not found."
            tool_messages.append(ToolMessage(content=error_message, tool_call_id=tool_call["id"]))
            continue

        try:
            tool_output = await tool_to_call.ainvoke(tool_call["args"])
            tool_messages.append(ToolMessage(
                content=json.dumps(tool_output),
                tool_call_id=tool_call["id"]
            ))
        except Exception as e:
            error_message = f"Error executing tool '{tool_name}': {e}"
            tool_messages.append(ToolMessage(content=error_message, tool_call_id=tool_call["id"]))

    return {"messages": tool_messages}

# Define the router logic
def tool_router(state: State):
    """Checks the last message for tool calls and decides the next step."""
    last_message = state["messages"][-1]
    if hasattr(last_message, "tool_calls") and len(last_message.tool_calls) > 0:
        return "tool_node"
    else:
        return END

# Step 5: Construct the graph
graph_builder = StateGraph(State)
graph_builder.add_node("agent", agent_node)
graph_builder.add_node("tool_node", tool_node)
graph_builder.set_entry_point("agent")
graph_builder.add_conditional_edges("agent", tool_router, {"tool_node": "tool_node", END: END})
graph_builder.add_edge("tool_node", "agent")

memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)

# Step 6: Define an async function to run the agent and stream the output
async def run_analysis(youtube_url: str, thread_id: str):
    """Runs the full analysis process for a given YouTube URL."""
    print(f"\nAnalyzing sentiment for the creator of YouTube URL: '{youtube_url}'")
    print("-" * 30)

    # The initial prompt guides the agent through the multi-step process.
    initial_prompt = f"""
    Please perform a sentiment analysis for the creator of the YouTube video at the following URL: {youtube_url}.

    Your task involves three steps:
    1.  First, use the `get_youtube_channel_info` tool to find the name of the content creator or channel.
    2.  Next, take the channel name you just found and use it as the query for the `tavily_search_results_json` tool. This will gather general web sentiment and information about the creator.
    3.  Finally, analyze all the search results to determine the overall public sentiment towards the creator/channel.

    Your final answer should be a comprehensive report that includes:
    - The name of the channel/creator you analyzed.
    - The overall sentiment (e.g., Overwhelmingly Positive, Mixed, Mostly Negative, etc.).
    - Key themes, praise, or criticisms found across the web search results.
    - Reference at least 2-3 of the source URLs that informed your analysis.
    """

    config = {"configurable": {"thread_id": thread_id}}

    final_response = None
    async for event in graph.astream_events(
        {"messages": [HumanMessage(content=initial_prompt)]},
        config=config,
        version="v2"
    ):
        kind = event["event"]
        if kind == "on_chain_end":
            if event["name"] == "agent" and event["data"].get('output'):
              final_response = event["data"]["output"]["messages"][-1].content
        elif kind == "on_tool_start":
            print(f"Tool Started: {event['name']} with args {event['data']['input']}")
        elif kind == "on_tool_end":
            print(f"Tool Ended: {event['name']}")

    print("\n--- SENTIMENT ANALYSIS COMPLETE ---")
    print(final_response)

# Step 7: Main execution block to run the analysis
async def main():
    # Prompt user for input
    query = input("Please enter the YouTube video URL to analyze the creator's sentiment: ")
    if not query:
        print("No URL entered. Exiting.")
        return

    thread_id = str(uuid4())
    await run_analysis(query, thread_id)

# Run the main function.
await main()


  search_tool = TavilySearchResults(max_results=10, name="tavily_search_results_json")


Please enter the YouTube video URL to analyze the creator's sentiment: https://www.youtube.com/watch?v=egGAWaRwJgo

Analyzing sentiment for the creator of YouTube URL: 'https://www.youtube.com/watch?v=egGAWaRwJgo'
------------------------------
Tool Started: get_youtube_channel_info with args {'video_url': 'https://www.youtube.com/watch?v=egGAWaRwJgo'}
Tool Ended: get_youtube_channel_info
Tool Started: tavily_search_results_json with args {'query': 'MONKEY MAJIK OFFICIAL CHANNEL'}
Tool Ended: tavily_search_results_json

--- SENTIMENT ANALYSIS COMPLETE ---
### Sentiment Analysis Report for "MONKEY MAJIK OFFICIAL CHANNEL"

**Channel/Creator Analyzed:** MONKEY MAJIK OFFICIAL CHANNEL

**Overall Sentiment:** Mixed

**Key Themes, Praise, and Criticisms:**

1. **Popularity and Music Quality:**
   - MONKEY MAJIK is known for producing popular music videos and has a dedicated fanbase. Several official music videos like "O.G. Summer" and "fly" are highlighted in their playlists, showcasing their