In [87]:
#importing global dependencies
import os
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from uuid import uuid4
from tqdm.autonotebook import tqdm
import tiktoken

In [88]:
#importing langchain dependencies
import langchain
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate

In [89]:
# FREE LLM ALTERNATIVES
# Option 1: Groq (Free tier with rate limits)
from langchain_groq import ChatGroq

"""
# Option 2: HuggingFace Pipeline (Local inference)
from langchain_huggingface import HuggingFacePipeline
# Option 3: Ollama (Local LLM server)
from langchain_community.llms import Ollama
"""

'\n# Option 2: HuggingFace Pipeline (Local inference)\nfrom langchain_huggingface import HuggingFacePipeline\n# Option 3: Ollama (Local LLM server)\nfrom langchain_community.llms import Ollama\n'

In [90]:
# FREE EMBEDDING ALTERNATIVES
# Option 1: HuggingFace Sentence Transformers (Free)

from langchain_huggingface import HuggingFaceEmbeddings

# Option 2: Ollama Embeddings (Free local)
# from langchain_community.embeddings import OllamaEmbeddings

In [91]:
# FREE VECTOR STORE ALTERNATIVES  
# Option 1: ChromaDB (Completely free)
#from langchain_community.vectorstores import Chroma
# Option 2: FAISS (Free, in-memory)
#from langchain_community.vectorstores import FAISS

# Option 3: Keep Pinecone (has free tier, but limited)

import pinecone
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec


In [92]:
# FREE SEARCH ALTERNATIVES
# Option 1: DuckDuckGo Search (Free, no API key needed) - generally worse
# from langchain_community.tools import DuckDuckGoSearchResults 
# Option 2: Keep Tavily if you have free tier

from langchain_tavily import TavilySearch
from langchain.tools import Tool

In [93]:
# AGENTS
from langchain.agents import AgentExecutor, Tool, AgentType
from langchain.agents.react.agent import create_react_agent
from langchain import hub

In [94]:
# Load environmental variables from a .env file
load_dotenv()

True

In [95]:
#load the datasets

loader = CSVLoader(
    file_path="./Datasets/tedx-transcripts.csv",
    encoding="utf-8",
    source_column="transcript",
    metadata_columns= ["main_speaker", "name", "speaker_occupation", "title", "url", "description"]
)

data = loader.load()

len(data)

2467

In [96]:
#tokenization
#In a given String the number of Tokens are counted by

def num_tokens(question, encoding_name):
    
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = encoding.encode(question)

    return encoding, num_tokens


question = "How many TEDx talk transcripts are on the transcripts-dataset?"

encoding, num_tokens = num_tokens(question, "cl100k_base")

print(f'Number of Words: {len(question.split())}')
print(f'Number of Characters: {len(question)}')
print(f'List of Tokens: {num_tokens}')
print(f'Nr of Tokens: {len(num_tokens)}')


Number of Words: 9
Number of Characters: 62
List of Tokens: [4438, 1690, 84296, 87, 3137, 61412, 527, 389, 279, 61412, 1773, 8534, 30]
Nr of Tokens: 13


In [97]:
#decoding tokenizer
def decode_tokens(tokens, encoding):
    return encoding.decode(tokens)

decoded_question = decode_tokens(num_tokens, encoding)
print(f'Decoded Question: {decoded_question}')

Decoded Question: How many TEDx talk transcripts are on the transcripts-dataset?


#Embeddings
They are dense numerical representations of data such as words, sentences or images in a continuous high dimensional space where semantic relationships are preserved.

In [98]:
#embeddings
#defining cosine similarity function

def cosine_similarity(query_embedding, doc_embedding):
    
    # Compute the cosine similarity between two embeddings - query and document embeddings
    dot_product = np.dot(query_embedding, doc_embedding)

    # Compute the L2 norms (magnitudes) of the embeddings - query and document embeddings
    norm_a = np.linalg.norm(query_embedding)
    norm_b = np.linalg.norm(doc_embedding)

    #default case - if either embedding is zero, return 0
    if norm_a == 0 or norm_b == 0:
        return 0
    return dot_product / (norm_a * norm_b) # Cosine similarity

In [99]:
#Using Ada v2 default model for embeddings
question = "Who spoke about the 'The surprising science of happiness' TED Talk?"
document = "The surprising science of happiness is a TED Talk by Dan Gilbert, a psychologist at Harvard University."

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
hf_token = os.getenv("HF_API_KEY")

query_embedding = embedding.embed_query(question)
document_embedding = embedding.embed_query(document)

cosine_sim = cosine_similarity(query_embedding, document_embedding)

print(f'Query Embedding: {query_embedding[:5]} Query Dimensions: {len(query_embedding)}')
print(f'Document Embedding: {document_embedding[:5]} Document Dimension: {len(document_embedding)}')
print(f'Cosine Similarity: {cosine_sim}')


Query Embedding: [-0.0777687206864357, 0.0603812113404274, -0.004912718199193478, 0.11627016961574554, 0.029163332656025887] Query Dimensions: 384
Document Embedding: [-0.0889691635966301, 0.06262019276618958, -0.026936212554574013, 0.1271456927061081, -0.03308025747537613] Document Dimension: 384
Cosine Similarity: 0.8051514197516081


In [100]:
#Using Text-embedding-3-large model for embeddings
question = "Who spoke about the 'The surprising science of happiness' TED Talk?"
document = "The surprising science of happiness is a TED Talk by Dan Gilbert, a psychologist at Harvard University."

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
hf_token = os.getenv("HF_API_KEY")

query_embedding = embedding.embed_query(question)
document_embedding = embedding.embed_query(document)

cosine_sim = cosine_similarity(query_embedding, document_embedding)

print(f'Query Embedding: {query_embedding[:5]} Query Dimensions: {len(query_embedding)}')  # Print first 5 values for brevity
print(f'Document Embedding: {document_embedding[:5]} Document Dimension: {len(document_embedding)}')  # Print first 5 values for brevity
print(f'Cosine Similarity: {cosine_sim}')


Query Embedding: [-0.0777687206864357, 0.0603812113404274, -0.004912718199193478, 0.11627016961574554, 0.029163332656025887] Query Dimensions: 384
Document Embedding: [-0.0889691635966301, 0.06262019276618958, -0.026936212554574013, 0.1271456927061081, -0.03308025747537613] Document Dimension: 384
Cosine Similarity: 0.8051514197516081


Here's a shorter, clearer version of your documentation:

---

### Text Splitters

LLMs like GPT-3.5-turbo-0125 have a **context window** limit—this is the maximum number of tokens the model can process at once (16,385 tokens for this model).

To handle longer texts, we use a **text splitter** with the following configuration:

* **Model**: `gpt-3.5-turbo-0125` (context window: 16,385 tokens)
* **Chunk Size**: Number of tokens per chunk
* **Chunk Overlap**: Tokens shared between consecutive chunks
* **Separators**: Ordered list of characters used to split text

---


In [101]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    model_name="gpt-3.5-turbo-0125",
    chunk_size=512,
    chunk_overlap=20,
    separators=["\n\n", "\n", " ", ""]
)

# Example usage
#text_data = "The quick brown fox jumps over the lazy dog."
#docs = text_splitter.create_documents([text_data])
#print(docs)

### Vector Store

A **Vector Store** is a database optimized for storing and retrieving high-dimensional **vector embeddings**, enabling LLMs to understand context and meaning for more accurate responses.

### Indexing with Pinecone

**Pinecone** is a high-performance, serverless vector store used for fast vector search and retrieval.

To use Pinecone, first create an **Index** with the following parameters:

* **Index Name**
* **Dimension**: Must match the embedding model's output size
* **Metric**: Should align with the one used during embedding training (e.g., cosine, dot product)
* **Serverless Specs**: Define resource allocation and region

In [102]:
load_dotenv()
#Initialising PineconeDb (FINALLY)
index_name = "langchain-pinecone-test"
PINECONE_API_KEY = load_dotenv("PINECONE_API_KEY")

pc = Pinecone(
    api_key=PINECONE_API_KEY,
    environment="us-west1-gcp"
)

In [105]:
#Create Index
pc.create_index(
    name=index_name,
    dimension=384,
    metric="cosine",
    spec = ServerlessSpec(
        cloud = 'aws',
        region = 'us-east-1'
        )
)

index = pc.Index(index_name)

In [104]:
# # Delete Index

pc.delete_index(index_name)

In [106]:
#Index Listed
pc.list_indexes()

[
    {
        "name": "langchain-pinecone-test",
        "metric": "cosine",
        "host": "langchain-pinecone-test-uyspnoi.svc.aped-4627-b74a.pinecone.io",
        "spec": {
            "serverless": {
                "cloud": "aws",
                "region": "us-east-1"
            }
        },
        "status": {
            "ready": true,
            "state": "Ready"
        },
        "vector_type": "dense",
        "dimension": 384,
        "deletion_protection": "disabled",
        "tags": null
    }
]

In [107]:
# Describe Pinecone Index

index = pc.Index(index_name)

index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {},
 'total_vector_count': 0,
 'vector_type': 'dense'}

# Namespaces

Pinecone lets you split data into namespaces within an index, enabling queries on specific groups. For example, you can organize data by content, language, or any custom category.

In this example, we’ll upload 100 records to one namespace, then split them into two more namespaces of 50 each—making a total of three namespaces.

In [108]:
#Creating Main Namespace

splits = text_splitter.split_documents(data[:100])

embed = embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

db = PineconeVectorStore.from_documents(documents=splits,
                                        embedding=embed,
                                        index_name=index_name,
                                        namespace="main"
                                        )

In [109]:
# Creating Vectorstore of Main index - PineconeVectorStore

vectorstore = PineconeVectorStore(index_name=index_name,
                                  namespace="main",
                                  embedding=embed)

In [110]:
# Search for similarity

query = "Who is Al Gore"

similarity = vectorstore.similarity_search(query, k=4)

for i in range(len(similarity)):
  print(f"-------Result Nr. {i}-------")
  print(f"Main Speaker: {similarity[i].metadata['main_speaker']}")
  print(f" ")

In [111]:
# Search for similarity with score

query = "Who is Al Gore"

similarity_with_score = vectorstore.similarity_search_with_score(query, k=4)

for i in range(len(similarity_with_score)):
  print(f"-------Result Nr. {i}-------")
  print(f"Title: {similarity_with_score[i][0].metadata['title']}")
  print(f"Main Speaker: {similarity_with_score[i][0].metadata['main_speaker']}")
  print(f"Score: {similarity_with_score[i][1]}")
  print(f" ")


In [112]:
# Create Chunked Metadata

def chunked_metadata_embeddings(documents, embed):

    chunked_metadata = []

    chunked_text = text_splitter.split_documents(documents)

    for index, text in enumerate(tqdm(chunked_text)):


        payload = {
              "metadata": {
                  "source": text.metadata['source'],
                  "row": text.metadata['row'],
                  "chunk_num": index,
                  "main_speaker": text.metadata['main_speaker'],
                  "name": text.metadata['name'],
                  "speaker_occupation": text.metadata['speaker_occupation'],
                  "title": text.metadata['title'],
                  "url": text.metadata['url'],
                  "description": text.metadata['description'],
              },
              "id": str(uuid4()),
              "values": embed.embed_documents([text.page_content])[0]  # Assuming `embed` is defined elsewhere
          }

        chunked_metadata.append(payload)

    return chunked_metadata

In [113]:
# Create first split

split_one = chunked_metadata_embeddings(data[:50], embed)
len(split_one)

  0%|          | 0/402 [00:00<?, ?it/s]

402

In [114]:
# Create second split

split_two = chunked_metadata_embeddings(data[50:100], embed)
len(split_two)

  0%|          | 0/335 [00:00<?, ?it/s]

335

In [115]:
# Upsert the document

def batch_upsert(split,
                 index ,
                 namespace,
                 batch_size):

    print(f"Split Length: {len(split)}")
    for i in range(0, len(split), batch_size):

      batch = split[i:i + batch_size]

      index.upsert(vectors=batch,
                   namespace=namespace)

In [116]:
batch_upsert(split_one, index, "first_split", 10)

Split Length: 402


In [117]:
# Function to find item with main_speaker

def find_item_with_row(metadata_list, main_speaker):
    for item in metadata_list:
        if item['metadata']['main_speaker'] == main_speaker:
            return item

# Call the function to find item with main_speaker = Al Gore
result_item = find_item_with_row(split_one, "Al Gore")

# Print the result
print(f'Chunk Nr: {result_item["metadata"]["chunk_num"]}')
print(f'Chunk ID: {result_item["id"]}')
print(f'Chunk Title: {result_item["metadata"]["title"]}')

Chunk Nr: 9
Chunk ID: 47cde3fb-9a78-47a8-ac45-369d44029558
Chunk Title: Averting the climate crisis


In [None]:
# index.delete(namespace="last_split", delete_all=True)

In [118]:
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'first_split': {'vector_count': 402},
                'main': {'vector_count': 737}},
 'total_vector_count': 1139,
 'vector_type': 'dense'}

In [119]:
batch_upsert(split_two, index, "last_split", 20)

Split Length: 335


In [120]:
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'first_split': {'vector_count': 402},
                'last_split': {'vector_count': 335},
                'main': {'vector_count': 737}},
 'total_vector_count': 1474,
 'vector_type': 'dense'}

In [121]:
# Define Users

query_one = "Who is Al Gore?"
query_two = "Who is Rick Warren?"

# Users dictionary
users = [{
            'name': 'John',
            'namespace': 'first_split',
            'query': query_one

            },
           {
             "name": "Jane",
             "namespace": 'last_split',
             "query": query_two
           }]

def vectorize_query(embed, query):

    return embed.embed_query(query)


In [122]:
# Create our vectors for each of our queries:

query_vector_one = vectorize_query(embed, query_one)
query_vector_two = vectorize_query(embed, query_two)

In [123]:
len(query_vector_one), len(query_vector_two)

(384, 384)

In [124]:
# Define a list of new key-value pairs to add to each user

new_key_value_pairs = [
    {'vector_query': query_vector_one},
    {'vector_query': query_vector_two},

]
# Loop through the list of users and the list of new key-value pairs
for user, new_pair in zip(users, new_key_value_pairs):
    user.update(new_pair)


In [125]:
users[0]["name"], users[1]["name"]

('John', 'Jane')

In [126]:
users[0].keys()


dict_keys(['name', 'namespace', 'query', 'vector_query'])

In [127]:
print(f"Name: {users[0]['name']}")
print(f"Namespace: {users[0]['namespace']}")
print(f"Query: {users[0]['query']}")
print(f"Vector Query: {users[0]['vector_query'][:3]}")

Name: John
Namespace: first_split
Query: Who is Al Gore?
Vector Query: [0.015368515625596046, 0.06578097492456436, 0.018664006143808365]


In [128]:
# Query the namespace

john = [t for t in users if t.get('name') == 'John'][0]

john_query_vector = john['vector_query']
john_namespace = john['namespace']

index.query(vector=john_query_vector, top_k=2, include_metadata=True, namespace=john_namespace)

{'matches': [{'id': '8012d21e-21ff-455a-ab12-a00336bd2972',
              'metadata': {'chunk_num': 306.0,
                           'description': 'In this passionate talk, legendary '
                                          'spacecraft designer Burt Rutan '
                                          'lambasts the US government-funded '
                                          'space program for stagnating and '
                                          'asks entrepreneurs to pick up where '
                                          'NASA has left off.',
                           'main_speaker': 'Burt Rutan',
                           'name': 'Burt Rutan: The real future of space '
                                   'exploration',
                           'row': 37.0,
                           'source': 'I want to start off by saying, Houston, '
                                     "we have a problem. We're entering a "
                                     'second generation o

RAG

Now that we have set up our namespaces, we can prepare our RAG pipeline. We will do so, using Agents

Retrieval

In [129]:
# Create vectorstore
embed = embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = PineconeVectorStore(index_name=index_name,
                                  namespace="main",
                                  embedding=embed)

In [130]:
# Retrieval


# Chat completion llm wit Groq
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
llm = ChatGroq(
    groq_api_key=GROQ_API_KEY,
    model_name="openai/gpt-oss-20b",   # lightweight Groq model
    temperature=0.0,                   # deterministic
    max_tokens=512,                    # shorter, faster responses
    top_p=0.8,                         # slight diversity, avoids overthinking
    frequency_penalty=0.2,             # discourage long/repetitive text
    presence_penalty=0.0,              # no unnecessary topic drift
    #stop=["\n\n", "###"],              # stops before rambling
    timeout=15                         # fail fast if too slow
)

# Conversational memory
conversational_memory = ConversationBufferWindowMemory(
                        memory_key='chat_history',
                        k=5,
                        return_messages=True)

# Retrieval qa chain
qa_db = RetrievalQA.from_chain_type(
                                    llm=llm,
                                    chain_type="stuff",
                                    retriever=vectorstore.as_retriever())

                    top_p was transferred to model_kwargs.
                    Please confirm that top_p is what you intended.
  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                    frequency_penalty was transferred to model_kwargs.
                    Please confirm that frequency_penalty is what you intended.
  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
                    presence_penalty was transferred to model_kwargs.
                    Please confirm that presence_penalty is what you intended.
  validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)



Augmented

We are going to use a slightly modified prompt template. First we download the react template, which is a common template using toools and agents and then we will add the instruction of in which tool to look up first.

In [131]:
prompt = hub.pull("hwchase17/react")

print(prompt.template)

Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


# Now we will replace this line:

'''
Action: the action to take, should be one of [{tool_names}]

'''

# By this line:

'''
Action: the action to take, should be one of [{tool_names}]. Always look first in Pinecone Document Store

'''

In [132]:
# Set prompt template

template= '''
          Answer the following questions as best you can. You have access to the following tools:

          {tools}

          Use the following format:

          Question: the input question you must answer
          Thought: you should always think about what to do
          Action: the action to take, should be one of [{tool_names}]. Always look first in Pinecone Document Store
          Action Input: the input to the action
          Observation: the result of the action
          ... (this Thought/Action/Action Input/Observation can repeat 2 times)
          Thought: I now know the final answer
          Final Answer: the final answer to the original input question

          Begin!

          Question: {input}
          Thought:{agent_scratchpad}
          '''

prompt = PromptTemplate.from_template(template)


Generation with Agent

We are going to set up 2 tools for our agent:

    ## Tavily Search API: Tavily search over several sources like Bing or Google and returns the most relevant content. It offers 1000 API calls per month for free.

    ## Vectorstore: Our vector store will be used to look for the information first.

In [None]:
load_dotenv()
from langchain.agents import initialize_agent, AgentType
from langchain.agents import create_openai_functions_agent, AgentExecutor
from langchain import hub
#Setup tools and agent

TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')

# Grab a default prompt for function agents
functions_prompt = hub.pull("hwchase17/openai-functions-agent")

# Initialize Tavily
tavily = TavilySearch(
    max_results=10,   # same as before
    topic="general",  # default search scope
    api_key=TAVILY_API_KEY
)


agent = initialize_agent(
    tools=tools,
    llm=llm,  # your ChatGroq model
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    memory=conversational_memory,
    verbose=True,
    handle_parsing_errors=True
)

# Build the executor properly
agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    memory=conversational_memory,
    verbose=True,
    handle_parsing_errors=True
)

# Wrap in executor 
'''

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    memory=conversational_memory,
    verbose=True,
    handle_parsing_errors=True
)
'''

# Define tools
tools = [
    Tool(
        name="Pinecone Document Store",
        func=qa_db.run,
        description="Use it to lookup information from the Pinecone Document Store",
    ),

    Tool(
        name="Tavily",
        func=tavily.invoke,   
        description="Use this to lookup information from Tavily",
    )
]

# Create agent with initialize_agent 
'''
agent = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # Similar to ReAct but wrapped
    memory=conversational_memory,
    verbose=True,
    handle_parsing_errors=True
)
'''

'\nagent = initialize_agent(\n    tools=tools,\n    llm=llm,\n    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # Similar to ReAct but wrapped\n    memory=conversational_memory,\n    verbose=True,\n    handle_parsing_errors=True\n)\n'

Once everything is set up, we can start making queries and check how the agents behave in terms priorization of agent, search quality and answers.

In [156]:
result = agent.invoke({
    "input": "Can you give me one title of a TED talk of Al Gore as main speaker?"
})
print(result)


KeyError: 'intermediate_steps'

In [138]:
agent_executor.invoke({"input":"Did you find the previous title 'The Case for Optimism on Climate Change' in the Pinecone Document Store?"})



[1m> Entering new AgentExecutor chain...[0m


APIError: Internal Server Error