In [None]:
# Main RAG System - Speak with Your Book
# This notebook loads the pre-built model and database to create an interactive chat system

import os
import json
import chromadb
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import warnings
warnings.filterwarnings('ignore')

# 1. CONFIGURATION

In [None]:
class RAGConfig:
    """Configuration for the RAG System"""
    
    # Paths
    MODEL_PATH = "development/outputs/fine_tuned_model"
    DATABASE_PATH = "development/outputs/vector_database"
    
    # Model settings
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    MAX_LENGTH = 512
    TEMPERATURE = 0.7
    
    # Retrieval settings
    RETRIEVAL_K = 5
    CHUNK_SIZE = 500
    
    # Embedding model
    EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

In [None]:
config = RAGConfig()
print(f"Initializing RAG System...")
print(f"Device: {config.DEVICE}")
print(f"Model Path: {config.MODEL_PATH}")
print(f"Database Path: {config.DATABASE_PATH}")

# 2. LOAD FINE-TUNED MODEL

In [None]:
try:
    # Load tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(config.MODEL_PATH)
    model = AutoModelForCausalLM.from_pretrained(
        config.MODEL_PATH,
        torch_dtype=torch.float16 if config.DEVICE == "cuda" else torch.float32,
        device_map="auto" if config.DEVICE == "cuda" else None,
        trust_remote_code=True
    )
    
    # Create HuggingFace pipeline
    from transformers import pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=config.MAX_LENGTH,
        temperature=config.TEMPERATURE,
        do_sample=True,
        device=0 if config.DEVICE == "cuda" else -1
    )
    
    # Wrap in LangChain
    llm = HuggingFacePipeline(pipeline=pipe)
    print("Model loaded successfully!")
    
except Exception as e:
    print(f"Error loading model: {e}")
    print("Falling back to default model...")
    # Fallback to base model
    llm = HuggingFacePipeline.from_model_id(
        model_id="Qwen/Qwen-1_8B-Chat",
        task="text-generation",
        model_kwargs={"temperature": config.TEMPERATURE, "max_length": config.MAX_LENGTH}
    )

# 3. LOAD VECTOR DATABASE

In [None]:
try:
    # Initialize embeddings
    embeddings = HuggingFaceEmbeddings(
        model_name=config.EMBEDDING_MODEL,
        model_kwargs={'device': config.DEVICE}
    )
    
    # Load ChromaDB
    chroma_client = chromadb.PersistentClient(path=config.DATABASE_PATH)
    vectorstore = Chroma(
        client=chroma_client,
        embedding_function=embeddings,
        collection_name="book_collection"
    )
    
    # Test database
    collection_count = vectorstore._collection.count()
    print(f"✅ Database loaded successfully!")
    print(f"📚 Total documents in database: {collection_count}")
    
except Exception as e:
    print(f"❌ Error loading database: {e}")
    print("💡 Make sure the database was created properly in development phase")
    vectorstore = None

# 4. CREATE RAG CHAIN

In [None]:
# Custom prompt template
prompt_template = """
أنت مساعد شخصي يمكنكك من التحدث عن الكتب التي قرأتها والرد على اسئلة المستخدمين حول محتوى الكتب باللغة العربية الفصحي
المحتوي من الكتاب:
{context}

السؤال : {question}

التعليمات: 
- الإجابة بناءً على السياق المقدم
- إذا كانت الإجابة غير موجودة في السياق، قل "لا أستطيع العثور على هذه المعلومات في الكتاب"
- كن محددًا واستشهد بالتفاصيل ذات الصلة
- اجعل إجابتك مركزة وذات صلة

الإجابة:"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

# Create retrieval chain
if vectorstore is not None:
    retriever = vectorstore.as_retriever(
        search_kwargs={"k": config.RETRIEVAL_K}
    )
    
    rag_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": PROMPT},
        return_source_documents=True
    )
    
    print("RAG Chain created successfully!")
else:
    rag_chain = None
    print("Cannot create RAG chain without database")

# 5. CHAT INTERFACE

In [None]:
class BookChatbot:
    """Interactive chatbot for book conversations"""
    
    def __init__(self, rag_chain, vectorstore):
        self.rag_chain = rag_chain
        self.vectorstore = vectorstore
        self.chat_history = []
    
    def ask_question(self, question: str):
        """Ask a question and get an answer"""
        if not self.rag_chain:
            return "RAG system not properly initialized"
        
        try:
            # Get answer from RAG chain
            response = self.rag_chain({"query": question})
            
            # Extract answer and sources
            answer = response["result"]
            sources = response["source_documents"]
            
            # Store in history
            self.chat_history.append({
                "question": question,
                "answer": answer,
                "sources_count": len(sources)
            })
            
            return {
                "answer": answer,
                "sources": sources,
                "sources_count": len(sources)
            }
            
        except Exception as e:
            return f"Error processing question: {e}"
    
    def get_relevant_chunks(self, question: str, k: int = 3):
        """Get relevant document chunks for debugging"""
        if not self.vectorstore:
            return []
        
        docs = self.vectorstore.similarity_search(question, k=k)
        return [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
    
    def show_stats(self):
        """Show system statistics"""
        if not self.vectorstore:
            return "Database not loaded"
        
        collection_count = self.vectorstore._collection.count()
        chat_count = len(self.chat_history)
        
        return f"""
        RAG System Statistics:
        ━━━━━━━━━━━━━━━━━━━━━━━
        Documents in database: {collection_count}
        Questions asked: {chat_count}
        Model: Fine-tuned Qwen
        Retrieval method: Semantic similarity
        Max retrieval chunks: {config.RETRIEVAL_K}
        """

# Initialize chatbot
if rag_chain:
    chatbot = BookChatbot(rag_chain, vectorstore)
    print("Chatbot ready!")
else:
    chatbot = None
    print("Chatbot initialization failed")

# 6. TESTING & DEMO

In [None]:
print("\n" + "="*50)
print("Speck With Your Book")
print("="*50)

if chatbot:
    print(chatbot.show_stats())
    
    print("\n💡 Example Usage:")
    print("="*30)
    
    # Example questions for testing
    example_questions = [
        "What is the main theme of this book?",
        "Who are the main characters?",
        "What happens in the first chapter?",
        "Summarize the conclusion",
        "What is the author's main argument?"
    ]
    
    print("Try asking questions like:")
    for i, q in enumerate(example_questions, 1):
        print(f"   {i}. {q}")
    
    print("\nStart asking questions:")
    print("-" * 30)
    
    # Interactive chat loop
    while True:
        try:
            question = input("\nYour question (or 'quit' to exit): ").strip()
            
            if question.lower() in ['quit', 'exit', 'q']:
                print("Thanks for using the RAG system!")
                break
            
            if not question:
                continue
            
            print("Thinking...")
            response = chatbot.ask_question(question)
            
            if isinstance(response, dict):
                print(f"\nAnswer:")
                print("-" * 20)
                print(response["answer"])
                print(f"\nRetrieved {response['sources_count']} relevant chunks")
                
                # Show sources if requested
                show_sources = input("\n🔍 Show source chunks? (y/n): ").lower()
                if show_sources == 'y':
                    print("\nSource chunks:")
                    print("-" * 30)
                    for i, source in enumerate(response["sources"][:3], 1):
                        print(f"\nChunk {i}:")
                        print(f"Content: {source.page_content[:200]}...")
                        print(f"Metadata: {source.metadata}")
            else:
                print(f"{response}")
                
        except KeyboardInterrupt:
            print("\n👋 Session ended by user")
            break
        except Exception as e:
            print(f"Error: {e}")

else:
    print("System not ready. Please check model and database setup.")
    print("\n Troubleshooting:")
    print("1. Make sure development/outputs/fine_tuned_model exists")
    print("2. Make sure development/outputs/vector_database exists")
    print("3. Run the development notebooks first")