<a href="https://colab.research.google.com/github/Elvificent/ticket/blob/bot_data/manyfileschatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 🔧 STEP 1: Install required packages
!pip install -q langchain langchain-community langchain-google-genai chromadb pypdf

# 🔧 STEP 2: Imports
import os
import requests
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma

# 🔑 STEP 3: Set your Gemini API key
os.environ["GOOGLE_API_KEY"] = "AIzaSyAllDb85-EYZSDQjd8tVyF_Kg5WG8HPOjc"


In [None]:
# 📄 STEP 4: GitHub PDF URLs
pdf_urls = [
    "https://raw.githubusercontent.com/Elvificent/ticket/add-chatbot-data/tesla%20testing.pdf",
    "https://raw.githubusercontent.com/Elvificent/ticket/add-chatbot-data/model3.pdf",
    "https://raw.githubusercontent.com/Elvificent/ticket/add-chatbot-data/modelY.pdf",
    "https://raw.githubusercontent.com/Elvificent/ticket/add-chatbot-data/modelS.pdf",
    "https://raw.githubusercontent.com/Elvificent/ticket/add-chatbot-data/modelX.pdf",
    "https://raw.githubusercontent.com/Elvificent/ticket/add-chatbot-data/cybertruck.pdf"
]

In [None]:
# 📥 STEP 5: Download PDFs
pdf_files = []
for url in pdf_urls:
    filename = url.split("/")[-1]
    response = requests.get(url)
    with open(filename, "wb") as f:
        f.write(response.content)
    pdf_files.append(filename)

In [None]:
# 📚 STEP 6: Load, split, and chunk all PDFs
all_docs = []

for file in pdf_files:
    try:
        loader = PyPDFLoader(file)
        pages = loader.load()
        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
        chunks = splitter.split_documents(pages)
        all_docs.extend(chunks)
        print(f"✅ Loaded: {file} ({len(chunks)} chunks)")
    except Exception as e:
        print(f"❌ Failed to load {file}: {e}")

✅ Loaded: tesla%20testing.pdf (1 chunks)
✅ Loaded: model3.pdf (934 chunks)
✅ Loaded: modelY.pdf (1069 chunks)
✅ Loaded: modelS.pdf (1072 chunks)
✅ Loaded: modelX.pdf (1120 chunks)
✅ Loaded: cybertruck.pdf (1047 chunks)


In [None]:
# STEP 7: Embed and store in Chroma
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

vectorstore = Chroma.from_documents(
    documents=all_docs,
    embedding=embedding,
    persist_directory="./chroma_db"
)

print(f"\n✅ Total embedded chunks: {len(all_docs)}")


✅ Total embedded chunks: 5243


old

In [None]:

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# Initialize Gemini model
llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", temperature=0.2)

# Build the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)




new

In [None]:

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# Initialize Gemini model
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)

# Build the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(),
    return_source_documents=True
)

In [None]:
# Example question
query = "do you have any information about tesla chatbot testing model?"
response = qa_chain.invoke({"query": query})
print("💬 Answer:", response["result"])

💬 Answer: Yes, the Tesla chatbot is a testing model and it is tested by Techcare.


In [None]:
# Example question
query = "what is the software version of modely? "
response = qa_chain.invoke({"query": query})
print("💬 Answer:", response["result"])

💬 Answer: The provided text states that Model 3 has the latest software. It does not specify a version number.
