In [1]:
from langchain_community.chat_models import ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import HuggingFaceDatasetLoader, YoutubeLoader
from langchain_community.chat_models import ChatOpenAI
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
from openai import OpenAI
import tiktoken
import numpy as np
import os

In [6]:
load_dotenv()

OPEN_ROUTER_API_KEY = os.getenv("OPEN_ROUTER_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [7]:
# Hugging face embeddings
text = "Some text for testing"
hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/bert-large-nli-max-tokens")
query_result = hf_embeddings.embed_query(text)

In [8]:
# Free Llama 3.1 API via OpenRouter
# Use this instead of OpenAI if you don't have an OpenAI account with credits

openrouter_client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=OPEN_ROUTER_API_KEY
)

In [9]:
tokenizer = tiktoken.get_encoding('cl100k_base')

def tiktoken_length(text):
  tokens = tokenizer.encode(text, disallowed_special=())
  return len(tokens)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000,
                                               chunk_overlap=100,
                                               length_function=tiktoken_length)

In [10]:
# Loading data
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=WA9gVKKPsBo", add_video_info=True)
data = loader.load()

In [11]:
texts = text_splitter.split_documents(data)

In [12]:
vectorstore = PineconeVectorStore(index_name="customer-support", embedding=hf_embeddings)

index_name = 'customer-support'
namespace = "langchain-docs"

In [13]:
for document in texts:
  vectorstore_from_texts = PineconeVectorStore.from_texts([f"Source: {t.metadata['source']}, Title: {t.metadata['title']} \n\n Content: {t.page_content}" for t in texts], hf_embeddings, index_name=index_name, namespace=namespace)

In [19]:
from pinecone import Pinecone

In [20]:
# Initialize
pc = Pinecone(api_key=PINECONE_API_KEY)

pinecone_index = pc.Index(index_name)

In [28]:
template = """Answer the question based only on the following context:
{context}
Question: {question}"""

prompt = ChatPromptTemplate.from_template(template)

In [34]:
retreiver = vectorstore.as_retriever()

chain = (
  RunnableParallel({"context": retreiver, "question": RunnablePassthrough()})
  | prompt
  | openrouter_client.chat.completions.create
  | StrOutputParser()
)

In [35]:
chain.invoke("Tell me about Lex's thoughts")

TypeError: create() takes 1 argument(s) but 2 were given

In [21]:
query = "What was Lex's overall reaction to the whole video?"

In [22]:
query_embeddings = hf_embeddings.embed_query(query)

In [23]:
top_matches = pinecone_index.query(vector=query_embeddings, top_k=10, include_metadata=True, namespace=namespace)

In [24]:
contexts = [item['metadata']['text'] for item in top_matches['matches']]

In [25]:
augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts) + "\n--------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

In [26]:
primer = f"""You are a personal assistant. Answer any questions that I have about the YouTube video provided.
You always answer questions based only on the information you have been provided.
"""

res = openrouter_client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": primer},
    {"role": "user", "content": augmented_query}
  ]
)

answer = res.choices[0].message.content

In [27]:
print(answer)

Based on the provided context, there is no information regarding Lex's overall reaction to the entire video. The content shared focuses on the discussion between the guest, Jordan Jonas, and Lex Fridman on various topics such as survival, happiness, personal experiences, and family history.
