In [73]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_core.documents import Document
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings,ChatOpenAI
load_dotenv()
import googleapiclient.discovery

In [74]:
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")
os.environ["YOUTUBE_API_KEY"] = os.getenv("YOUTUBE_API_KEY")

In [75]:
embedder=OpenAIEmbeddings(model="text-embedding-3-small")

In [76]:
youtube = googleapiclient.discovery.build(serviceName='youtube', version='v3', developerKey=os.getenv("YOUTUBE_API_KEY"))

In [86]:
class Search_Result:
    def __init__(self, search_result) -> None:
        self.video_id = search_result['id']['videoId']
        self.title = search_result['snippet']['title']
        self.description = search_result['snippet']['description']
        self.thumbnails = search_result['snippet']['thumbnails']['default']['url']
        self.transcript = self._get_transcript()  # New property
        print(self.video_id)
        
    def _get_transcript(self):
        """Retrieve YouTube transcript using youtube-transcript-api"""
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(self.video_id)
            return " ".join([item['text'] for item in transcript_list])
        except Exception as e:
            print(f"Error getting transcript for {self.video_id}: {str(e)}")
            return ""

In [87]:
class Search_Response:
    def __init__(self, search_response) -> None:
        self.prev_page_token = search_response.get('prevPageToken')
        self.next_page_token = search_response.get('nextPageToken')
        self.search_results = [Search_Result(item) for item in search_response.get('items', [])]

In [88]:
def search_yt(query, max_results=5, page_token=None):
    request = youtube.search().list(
        part="snippet",
        maxResults=max_results,
        pageToken=page_token,
        q=query,
        videoCaption='closedCaption',
        type='video',
    )
    return Search_Response(request.execute())

In [89]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# Modify your create_faiss_index function:
def create_faiss_index(search_response):
    documents = []
    for result in search_response.search_results:
        metadata = {
            "video_id": result.video_id,
            "title": result.title
        }
        # Split text into chunks
        chunks = text_splitter.split_text(
            f"Title: {result.title}\nTranscript: {result.transcript}"
        )
        # Create multiple documents per video
        for chunk in chunks:
            documents.append(Document(page_content=chunk, metadata=metadata))
    
    return FAISS.from_documents(documents, embedder)

In [90]:
#llm=ChatOpenAI(model="gpt-4o")
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)

In [None]:
if __name__ == "__main__":
    # User inputs
    topic = input("Enter the Topic to be discussed: ")
    author = input("Enter the Author Name: ")
    
    # Search YouTube
    search_response = search_yt(f"{topic} {author}", max_results=3)
    
    # Create and save FAISS index
    faiss_index = create_faiss_index(search_response)
    faiss_index.save_local("youtube_index")
    
    # Define prompt template
    template = """Act as an expert analyst of {author}'s work. Follow these rules strictly:

    1. Base answers ONLY on provided context from verified sources:
       - YouTube video transcripts
       - Published articles
       - Author's own writings

    2. Match {author}'s distinctive style:
       - Philosophical yet accessible tone
       - Historical framework for modern issues
       - Balanced skepticism about technology
       - Emphasis on collective human narratives

    3. If question cannot be answered using context:
       - Clearly state "This is not my area of expertise" or "I need more sources"
       - Do NOT fabricate information
       - Do NOT speculate or invent answers
       - Do NOT use external knowledge

    Context provided:
    {context}

    Question: {question}

    Provide a thorough analysis in {author}'s voice:"""
    
    prompt = ChatPromptTemplate.from_template(template)
    
    # Create RAG chain
    rag_chain = (
        {"context": faiss_index.as_retriever(), 
         "question": RunnablePassthrough(),
         "author": lambda _: author}
        | prompt 
        | llm
        | StrOutputParser()
    )
    
  

nzj7Wg4DAbs
Mde2q7GFCrw
2w37ty9gGU8

Yuval noah-style Answer:
------------------------------
Yuval Noah Harari would explain the role of storytelling in human evolution by emphasizing the power of fiction and imagination in shaping our collective behavior and cooperation. He would likely argue that storytelling has been crucial in enabling humans to build complex societies, achieve remarkable feats like building the Pyramids or reaching the moon, and create large-scale networks of cooperation.

Harari would highlight that storytelling allows humans to believe in shared myths and narratives that bind strangers together, enabling them to work towards common goals. He might point out that while other animals lack the capacity to be persuaded by fictional stories, humans have the unique ability to create and believe in narratives that transcend individual or tribal boundaries.

Additionally, Harari might suggest that religion is a prime example of how storytelling has played a significant 

In [92]:
# Query handling
query = input("\nEnter your question about the topic: ")
response = rag_chain.invoke(query)
print(f"\n{author}-style Answer:\n{'-'*30}\n{response}")


Yuval noah-style Answer:
------------------------------
On the individual level, when comparing Homo sapiens to Neanderthals, it is important to note that Neanderthals actually had bigger brains than us. This may lead one to question how Homo sapiens were able to out-compete Neanderthals and other human-like species. The key to our success lies not in individual abilities but in our collective ability to cooperate in large numbers.

Around 70,000 years ago, Homo sapiens developed an incredible ability to cooperate in unlimited numbers. This led to the formation of large networks for political, commercial, and religious purposes. We began trading items over vast distances, spreading ideas, and adopting artistic fashions. Our ability to build these extensive networks of cooperation set us apart from other species.

While Neanderthals and chimpanzees could cooperate in smaller groups, Homo sapiens excelled in forming global trade networks with billions of people. This capacity for large-