In [None]:
pip install torch transformers langchain langchain-community langchain-huggingface sentence-transformers faiss-cpu


Collecting langchain-community
  Downloading langchain_community-0.3.23-py3-none-any.whl.metadata (2.5 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downlo

# New Section

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import os

class RAGChatbot:
    def __init__(self):
        # Initialize a larger T5 model for better performance
        self.model_name = "google/flan-t5-large"  # Upgraded from base to large
        print(f"Loading model: {self.model_name}...")
        self.tokenizer = T5Tokenizer.from_pretrained(self.model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(self.model_name)
        print("Model loaded successfully!")

        # Initialize embeddings with explicit model name
        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        self.vectorstore = None

    def load_document(self, file_path):
        """Load and process a document for RAG"""
        # Load document
        loader = TextLoader(file_path)
        documents = loader.load()

        # Split text into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=800,  # Increased chunk size
            chunk_overlap=200  # Increased overlap
        )
        chunks = text_splitter.split_documents(documents)

        # Create vector store
        self.vectorstore = FAISS.from_documents(chunks, self.embeddings)
        print(f"✅ Document loaded and processed into {len(chunks)} chunks")

    def get_relevant_context(self, query, k=4):
        """Retrieve relevant context from the document"""
        if self.vectorstore is None:
            return ""

        # Get relevant documents
        docs = self.vectorstore.similarity_search(query, k=k)

        # Combine the context
        context = "\n".join([doc.page_content for doc in docs])
        return context

    def generate_response(self, prompt, context=""):
        """Generate a response using T5"""
        try:
            # Create a more structured prompt that emphasizes using the context
            if context:
                # Format the prompt to emphasize using the context
                full_prompt = (
                    f"You are a helpful AI assistant. Answer the following question based ONLY on the information provided below. "
                    f"If the information doesn't contain the answer, say 'I don't have enough information to answer that question.' "
                    f"Do not make up information or use knowledge outside of what is provided.\n\n"
                    f"Information: {context}\n\n"
                    f"Question: {prompt}\n\n"
                    f"Answer:"
                )
            else:
                full_prompt = f"Answer this question: {prompt}"

            # Tokenize input with truncation
            inputs = self.tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=1024)

            # Generate response
            result = self.model.generate(
                **inputs,
                max_new_tokens=200,
                temperature=0.2,  # Lower temperature for more focused responses
                top_k=50,
                top_p=0.9,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id
            )

            # Decode response
            response = self.tokenizer.decode(result[0], skip_special_tokens=True)

            # If the response is too generic or doesn't seem to use the context
            if context and (len(response) < 30 or "don't know" in response.lower() or "not sure" in response.lower() or "information" in response.lower()):
                # Try again with a more direct prompt
                direct_prompt = (
                    f"You are a helpful AI assistant. Your task is to extract and present information from the provided text. "
                    f"Answer the question using ONLY the information provided. Do not make up information. "
                    f"If the information doesn't contain the answer, say 'I don't have enough information.'\n\n"
                    f"Information: {context}\n\n"
                    f"Question: {prompt}\n\n"
                    f"Answer:"
                )

                inputs = self.tokenizer(direct_prompt, return_tensors="pt", truncation=True, max_length=1024)
                result = self.model.generate(
                    **inputs,
                    max_new_tokens=200,
                    temperature=0.1,  # Even lower temperature for more focused responses
                    top_k=30,
                    top_p=0.8,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id
                )
                response = self.tokenizer.decode(result[0], skip_special_tokens=True)

            return response
        except Exception as e:
            return f"Error generating response: {str(e)}"

def main():
    # Initialize chatbot
    chatbot = RAGChatbot()

    # Load document if provided
    document_path = input("Enter the path to your document (or press Enter to skip): ")
    if document_path and os.path.exists(document_path):
        chatbot.load_document(document_path)
    else:
        print("No document provided or file not found. Continuing without document context.")

    print("\nChat with the bot (type 'exit' to stop):")
    while True:
        # Get user input
        prompt = input("\nYou: ")
        if prompt.lower() == "exit":
            break

        # Get relevant context if document is loaded
        context = chatbot.get_relevant_context(prompt) if chatbot.vectorstore else ""

        # Generate and print response
        response = chatbot.generate_response(prompt, context)
        print("Bot:", response)

if __name__ == "__main__":
    main()

Loading model: google/flan-t5-large...
Model loaded successfully!
Enter the path to your document (or press Enter to skip): /sample_document.txt
✅ Document loaded and processed into 8 chunks

Chat with the bot (type 'exit' to stop):

You: hi
Bot: I don't have enough information.

You: Whats Ai
Bot: a broad field of computer science focused on creating intelligent machines that can perform tasks that typically require human intelligence

You: What is Machine Learning?
Bot: Machine learning is a subset of AI that focuses on developing systems that can learn from and make decisions based on data.

You: What is Deep Learning?
Bot: Deep learning is a subset of machine learning that uses neural networks with many layers.

You: Exit
