In [None]:
import sys
import os
import torch
from llama_cpp import Llama
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.utils import embedding_functions

class RAGSystem:
    def __init__(self, model_path, db_path=None):
        # Load the quantized model
        self.model = Llama(
            model_path=model_path,
            n_ctx=4096,  # Context window size
            n_gpu_layers=-1  # Use GPU if available
        )

        # Initialize embedding model
        self.embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5")

        # Connect to vector database
        self.chroma_client = chromadb.Client()
        sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
            model_name="BAAI/bge-small-en-v1.5"
        )

        # Load or create collection
        if os.path.exists('./chroma') and db_path:
            self.chroma_client = chromadb.PersistentClient(path=db_path)
            self.collection = self.chroma_client.get_collection(
                name="ai_research",
                embedding_function=sentence_transformer_ef
            )
        else:
            self.collection = self.chroma_client.create_collection(
                name="ai_research",
                embedding_function=sentence_transformer_ef
            )
            print("Warning: No existing vector database found.")

    def retrieve(self, query, n_results=3):
        """Retrieve relevant chunks for a query"""
        results = self.collection.query(
            query_texts=[query],
            n_results=n_results
        )

        documents = results["documents"][0]
        metadatas = results["metadatas"][0]

        retrieved_text = ""
        for doc, meta in zip(documents, metadatas):
            retrieved_text += f"\nFrom {meta['title']} ({meta['source']}):\n{doc}\n"

        return retrieved_text

    def generate_answer(self, query, context):
        """Generate an answer using the LLM with context"""
        prompt = f"""<|im_start|>user
I need information about the following topic:
{query}

Based on the following research paper excerpts:
{context}<|im_end|>
<|im_start|>assistant
"""

        response = self.model(
            prompt,
            max_tokens=1024,
            temperature=0.1,
            top_p=0.9,
            stop=["<|im_end|>"]
        )

        return response["choices"][0]["text"]

    def answer_question(self, query):
        """End-to-end question answering with RAG"""
        # Retrieve relevant context
        context = self.retrieve(query)

        # Generate answer
        answer = self.generate_answer(query, context)

        return answer

def main():
    # Initialize RAG system
    model_path = "https://drive.google.com/drive/folders/103vmZo3QW9L1RIN3RKvpaABOe9GSX2qR?usp=sharing"
    db_path = "./chroma" #HAVE TO INTIALIZE THIS MANUALLY
    rag_system = RAGSystem(model_path, db_path)

    # Get user input
    if len(sys.argv) > 1:
        question = sys.argv[1]
    else:
        question = input("Enter your question about AI research: ")

    # Generate and print answer
    answer = rag_system.answer_question(question)
    print(answer)

if __name__ == "__main__":
    main()