In [1]:
from dotenv import load_dotenv
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_core.documents import Document
from langchain_community.tools import DuckDuckGoSearchRun
from typing import TypedDict
import os

In [2]:
# Load environment variables
load_dotenv()

# Initialize APIs
youtube = build('youtube', 'v3', developerKey=os.getenv("YOUTUBE_API_KEY"))
embedder = OpenAIEmbeddings(model="text-embedding-3-small")
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)

In [3]:
class SearchResult:
    def __init__(self, search_result):
        self.video_id = search_result['id']['videoId']
        self.title = search_result['snippet']['title']
        self.transcript = self._get_transcript()

    def _get_transcript(self):
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(self.video_id)
            return " ".join([item['text'] for item in transcript_list])
        except Exception as e:
            print(f"Transcript error for {self.video_id}: {str(e)}")
            return ""

In [4]:
def search_yt(query, max_results=3):
    try:
        request = youtube.search().list(
            part="snippet",
            maxResults=max_results,
            q=query,
            videoCaption='closedCaption',
            type='video',
        )
        return request.execute().get('items', [])
    except Exception as e:
        print(f"YouTube API error: {str(e)}")
        return []

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

In [6]:
def create_faiss_index(items):
    documents = []
    for item in items:
        result = SearchResult(item)
        if not result.transcript:
            continue
            
        full_text = f"Title: {result.title}\nTranscript: {result.transcript}"
        chunks = text_splitter.split_text(full_text)
        
        for chunk in chunks:
            documents.append(Document(
                page_content=chunk,
                metadata={
                    "video_id": result.video_id,
                    "title": result.title
                }
            ))
    
    if not documents:
        print("No valid documents created")
        return None
    
    try:
        return FAISS.from_documents(documents, embedder)
    except Exception as e:
        print(f"FAISS error: {str(e)}")
        return None

In [7]:
class State(TypedDict):
    topic: str
    author: str
    question: str
    objective_check: bool
    youtube_summary: str
    recent_fact_checks: str
    validate_yt_summary: str
    validate_fact_checks: str
    generate_summary: str

In [8]:
from typing import Optional
from langchain_core.runnables import RunnableLambda

In [9]:
def get_topic_details(state=None) -> dict:
    return {
        "topic": "Rich Dad Poor Dad",
        "author": "Robert Kiyosaki"
    }

In [10]:
def get_user_qnuestion(state):
    return {
        "question": "current economic condition what is the best investment?"
    }

In [11]:
def YouTube_search(state: State):
    try:
        print("\n=== Starting YouTube Search ===")
        
        # Step 1: Search YouTube
        items = search_yt(f"{state['topic']} {state['author']}")
        if not items:
            print("❌ No search results found")
            return state
        print("✅ Valid search results found")
        
        # Step 2: Create FAISS index
        faiss_index = create_faiss_index(items)
        if not faiss_index:
            print("❌ FAISS index creation failed")
            return state
        print("✅ FAISS index created successfully")
        
        # Step 3: Setup RAG chain
        template = """Analyze and respond as {author} would. Rules:
        1. Use ONLY provided context
        2. Maintain {author}'s style
        3. State "Need information" if context is insufficient
        
        Context: {context}
        Question: {question}
        {author}'s analysis:"""
        
        prompt = ChatPromptTemplate.from_template(template)
        
        rag_chain = (
            {"context": faiss_index.as_retriever(), 
             "question": RunnablePassthrough(),
             "author": lambda _: state["author"]}
            | prompt 
            | llm
            | StrOutputParser()
        )
        
        # Step 4: Generate summary
        state["youtube_summary"] = rag_chain.invoke(state["question"])
        print("✅ YouTube summary generated successfully")
        return state
        
    except Exception as e:
        print(f"\n❌ Critical search error: {str(e)}")
        return state

In [None]:
def fact_checks(state: State):
    try:
        print("\n=== Starting Fact Check ===")
        
        # Step 1: Search YouTube
        search = DuckDuckGoSearchRun()
        items = search_yt(f"{state['topic']} {state['author']}")
        if not items:
            print("❌ No search results found")
            return state
        print("✅ Valid search results found")
        
        # Step 2: Create FAISS index
        faiss_index = create_faiss_index(items)
        if not faiss_index:
            print("❌ FAISS index creation failed")
            return state
        print("✅ FAISS index created successfully")
        
        # Step 3: Setup RAG chain
        template = """Analyze and respond as {author} would. Rules:
        1. Use ONLY provided context
        2. Maintain {author}'s style
        3. State "Need information" if context is insufficient
        
        Context: {context}
        Question: {question}
        {author}'s analysis:"""
        
        prompt = ChatPromptTemplate.from_template(template)
        
        rag_chain = (
            {"context": faiss_index.as_retriever(), 
             "question": RunnablePassthrough(),
             "author": lambda _: state["author"]}
            | prompt 
            | llm
            | StrOutputParser()
        )
        
        # Step 4: Generate summary
        state["youtube_summary"] = rag_chain.invoke(state["question"])
        print("✅ YouTube summary generated successfully")
        return state
        
    except Exception as e:
        print(f"\n❌ Critical search error: {str(e)}")
        return state