In [1]:
from langchain_community.chat_models import ChatOpenAI
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import HuggingFaceDatasetLoader, YoutubeLoader
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone
from sklearn.metrics.pairwise import cosine_similarity
from dotenv import load_dotenv
from openai import OpenAI
import tiktoken
import numpy as np
import os

In [2]:
load_dotenv()

OPEN_ROUTER_API_KEY = os.getenv("OPEN_ROUTER_API_KEY")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [19]:
# Hugging face embeddings
text = "Some text for testing"
hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/bert-large-nli-max-tokens")
query_result = hf_embeddings.embed_query(text)

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.01k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/626 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/399 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [20]:
# Free Llama 3.1 API via OpenRouter
# Use this instead of OpenAI if you don't have an OpenAI account with credits

openrouter_client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key=OPEN_ROUTER_API_KEY
)

In [21]:
tokenizer = tiktoken.get_encoding('cl100k_base')

def tiktoken_length(text):
  tokens = tokenizer.encode(text, disallowed_special=())
  return len(tokens)

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000,
                                               chunk_overlap=100,
                                               length_function=tiktoken_length)

In [23]:
# Loading data
loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=WA9gVKKPsBo", add_video_info=True)
data = loader.load()

In [24]:
texts = text_splitter.split_documents(data)

In [25]:
vectorstore = PineconeVectorStore(index_name="customer-support", embedding=hf_embeddings)

index_name = 'customer-support'
namespace = "langchain-docs"

In [26]:
for document in texts:
  vectorstore_from_texts = PineconeVectorStore.from_texts([f"Source: {t.metadata['source']}, Title: {t.metadata['title']} \n\n Content: {t.page_content}" for t in texts], hf_embeddings, index_name=index_name, namespace=namespace)

In [27]:
from pinecone import Pinecone

In [28]:
# Initialize
pc = Pinecone(api_key=PINECONE_API_KEY)

pinecone_index = pc.Index(index_name)

In [60]:
query = "What was Lex's overall reaction to the whole video?"

In [61]:
query_embeddings = hf_embeddings.embed_query(query)

In [62]:
top_matches = pinecone_index.query(vector=query_embeddings, top_k=10, include_metadata=True, namespace=namespace)

In [63]:
contexts = [item['metadata']['text'] for item in top_matches['matches']]

In [64]:
augmented_query = "<CONTEXT>\n" + "\n\n-------\n\n".join(contexts) + "\n--------\n</CONTEXT>\n\n\n\nMY QUESTION:\n" + query

In [65]:
primer = f"""You are a personal assistant. Answer any questions that I have about the YouTube video provided.
You always answer questions based only on the information you have been provided.
"""

res = openrouter_client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": primer},
    {"role": "user", "content": augmented_query}
  ]
)

answer = res.choices[0].message.content

In [66]:
print(answer)

I'm sorry, but based on the provided content, I do not have information about Lex's overall reaction to the whole video. The provided text mainly consists of the discussion between Jordan Jonas and Lex Fridman on various topics such as happiness, pursuing spiritual fullness, and their personal experiences. If you would like, I can provide a summary of the main points discussed in the content.
