In [None]:
import os

from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())

In [None]:
from utils import get_google_api_client

google_api_client = get_google_api_client()

In [None]:
from googleapiclient.discovery import build
from langchain.agents import Tool
import json, os


def get_latest_video_id(channel_id):
    youtube = build(
        serviceName="youtube",
        version="v3",
        developerKey=os.getenv("GOOGLE_API_KEY"),
    )
    request = youtube.search().list(
        part="snippet",
        channelId=channel_id,
        order="date",
        maxResults=1,
    )

    response = request.execute()

    print(f"\nRepsonse: {json.dumps(obj=response, indent=4)}")  # DEBUG

    latest_video = response["items"][0]
    video_id = latest_video["id"]["videoId"]
    # video_id = "ghDaTYbmC9o"  # TODO(dnh): temporary hack
    video_title = latest_video["snippet"]["title"]

    return video_id, video_title


get_latest_video_tool = Tool(
    name="Get Latest YouTube Video",
    func=get_latest_video_id,
    description="Fetches the ID and title of the latest video on a YouTube channel specified by a YouTube channel ID.",
)

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi


# TODO(dnh): Try RAG for this instead so we don't have to stuff large transcripts into context window
def get_video_transcript(video_id, max_length=4000):
    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
    transcript = transcript_list.find_transcript(["en"]).fetch()

    total_length = sum(segment["duration"] for segment in transcript)
    middle_time = total_length / 2

    condensed_transcript = ""
    for segment in transcript:
        if segment["start"] >= middle_time:
            text = segment["text"]
            if len(condensed_transcript) + len(text) <= max_length:
                condensed_transcript += " " + text
            else:
                break

    return condensed_transcript.strip()


get_transcript_tool = Tool(
    name="Get YouTube Channel Transcript by Video ID",
    func=get_video_transcript,
    description="Retrieves the transcript of a given YouTube video from the video id.",
)

In [None]:
tools = [
    get_latest_video_tool,
    get_transcript_tool,
]

In [None]:
from langchain_openai import OpenAI

# Initialize LLM
llm = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), temperature=0.7)

In [None]:
from langchain.agents import initialize_agent

# Initialize the autonomous agent
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent="zero-shot-react-description",
    verbose=True,
)

In [None]:
# High-level prompt for the agent
prompt = "Find the latest episode from the channel with channel ID UCwzCMiicL-hBUzyjWiJaseg and provide a transcript."

# Run the autonomous agent
result = agent.run(prompt)

In [None]:
# Debugging transcript fetching
# from youtube_transcript_api import YouTubeTranscriptApi

# video_id = "nwXSS0wBsFA"
# transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
# transcript = transcript_list.find_transcript(["en"]).fetch()
# print(transcript)