# 🔎 Full RAG Chatbot with Pinecone + LangChain (Google Colab)

This notebook implements:
- URL ingestion
- Text chunking
- HuggingFace embeddings
- Pinecone vector database
- Retrieval-Augmented Generation (RAG)
- Local open-source LLM (no OpenAI)


In [None]:
!pip -q install pinecone-client langchain-community sentence-transformers transformers accelerate unstructured nltk

In [None]:
import os
import nltk
nltk.download('punkt')

# 🔐 PASTE YOUR PINECONE API KEY HERE
os.environ['PINECONE_API_KEY'] = 'PASTE_YOUR_API_KEY_HERE'

# 🔐 PASTE YOUR INDEX HOST HERE (from Pinecone dashboard)
INDEX_NAME = 'lamaproject'
INDEX_HOST = 'PASTE_INDEX_HOST_HERE'


In [None]:
from pinecone import Pinecone

pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
index = pc.Index(name=INDEX_NAME, host=INDEX_HOST)


In [None]:
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone as PineconeVectorStore
from transformers import pipeline
import uuid


In [None]:
URLS = [
    'https://blog.openai.com/paper-review-llama-2-open-foundation-and-fine-tuned-chat-models/',
    'https://www.mosaicml.com/blog/mpt-7b',
    'https://stability.ai/blog/stability-ai-launches-the-first-of-its-stablelm-suite-of-language-models'
]

loader = UnstructuredURLLoader(urls=URLS)
documents = loader.load()
print('Loaded documents:', len(documents))


In [None]:
text_splitter = CharacterTextSplitter(
    separator='\n',
    chunk_size=1000,
    chunk_overlap=200
)

text_chunks = text_splitter.split_documents(documents)
print('Total chunks:', len(text_chunks))


In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name='sentence-transformers/all-mpnet-base-v2',
    model_kwargs={'device': 'cpu'}
)


In [None]:
texts = [doc.page_content for doc in text_chunks]
vectors = embeddings.embed_documents(texts)

upserts = [
    (str(uuid.uuid4()), vector, {'text': text})
    for vector, text in zip(vectors, texts)
]

index.upsert(vectors=upserts)
print('✅ Data successfully stored in Pinecone')


In [None]:
vectorstore = PineconeVectorStore(
    index=index,
    embedding=embeddings,
    text_key='text'
)


In [None]:
generator = pipeline(
    'text-generation',
    model='TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    max_new_tokens=300,
    temperature=0.3
)


In [None]:
def ask_question(query, k=3):
    docs = vectorstore.similarity_search(query, k=k)
    context = '\n\n'.join([d.page_content for d in docs])
    prompt = f'''You are a helpful AI assistant.
Use the context below to answer the question.

Context:
{context}

Question:
{query}

Answer:
'''
    return generator(prompt)[0]['generated_text']


In [None]:
print('🤖 Chatbot ready! Type exit to stop.')
while True:
    q = input('You: ')
    if q.lower() == 'exit':
        break
    print('\nBot:', ask_question(q), '\n')
