In [None]:
!pip uninstall -y huggingface_hub
!pip install huggingface_hub==0.15.1
!pip install pydantic-settings==2.0.3
!pip install transformers==4.33.0 accelerate==0.22.0 einops==0.6.1 langchain==0.0.300 xformers==0.0.21 \
bitsandbytes==0.41.1 sentence_transformers==2.2.2 chromadb==0.4.15
!pip install pypdf llama_index
!pip uninstall pymongo -y
!pip install "pymongo[srv]==3.13.0"
!pip install pyngrok

import dns
!pip install scipy==1.10.1



In [None]:
!pip install -U bitsandbytes


In [None]:
    from torch import cuda, bfloat16
    import torch
    import transformers
    from transformers import AutoTokenizer
    from time import time
    import chromadb
    from chromadb.config import Settings
    from langchain.llms import HuggingFacePipeline
    from langchain.document_loaders import TextLoader
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    from langchain.embeddings import HuggingFaceEmbeddings
    from langchain.chains import RetrievalQA
    from langchain.vectorstores import Chroma
    
    import json
    from pymongo import MongoClient
    import os


In [None]:
from huggingface_hub import login
login("")


from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

# Model details
model_id = "meta-llama/Llama-2-7b-chat-hf"  # Example for LLaMA 2 7B Chat Model
device = "cuda" if torch.cuda.is_available() else "cpu"

# Quantization Configuration
# for gpu

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)



In [None]:
!pip install triton==2.1.0


In [None]:
time_1 = time()
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
time_2 = time()
print(f"Prepare model, tokenizer: {round(time_2-time_1, 3)} sec.")

In [None]:
time_1 = time()
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto",)
time_2 = time()
print(f"Prepare pipeline: {round(time_2-time_1, 3)} sec.")

In [None]:
def test_model(tokenizer, pipeline, prompt_to_test):
    """
    Perform a query
    print the result
    Args:
        tokenizer: the tokenizer
        pipeline: the pipeline
        prompt_to_test: the prompt
    Returns
        None
    """
    # adapted from https://huggingface.co/blog/llama2#using-transformers
    time_1 = time()
    sequences = pipeline(
        prompt_to_test,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,)
    time_2 = time()
    print(f"Test inference: {round(time_2-time_1, 3)} sec.")
    for seq in sequences:
        print(f"Result: {seq['generated_text']}")

In [None]:
test_model(tokenizer,
           query_pipeline,
           "Please explain what are Newton's Law of Motion. Keep it in 100 words.")

In [None]:
llm = HuggingFacePipeline(pipeline=query_pipeline)
# checking again that everything is working fine
llm(prompt="Please explain what are Newton's Law of Motion. Keep it in 100 words.")

In [None]:
model_name = "BAAI/bge-large-en"  # ✅ Better retrieval model
model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(
    model_name=model_name, 
    model_kwargs=model_kwargs
)

print("Embedding model loaded successfully!")


In [None]:
pip install cohere


In [None]:
import cohere  
from flask import Flask, request, jsonify
from pyngrok import ngrok, conf
from pymongo import MongoClient
import os

# 🔹 Set your NGROK_AUTH_TOKEN here
NGROK_AUTH_TOKEN = ""  # ⬅️ Replace with your actual token
conf.get_default().auth_token = NGROK_AUTH_TOKEN

# Initialize Flask
app = Flask(__name__)

# MongoDB Connection
client = MongoClient("")
db = client[""]
collection = db[""]

# ChromaDB Setup
chroma_path = "./chroma_db"
os.makedirs(chroma_path, exist_ok=True)  

# Initialize persistent ChromaDB client
chroma_client = chromadb.PersistentClient(path=chroma_path)
co = cohere.Client("")  # ✅ Use your API key



def strip_extra_conversation(response_text):
    """
    Preserves the first Bot or 🔹 Bot response while stripping any subsequent content.
    """
    # Identify the markers indicating Bot responses
    strip_markers = ["\nBot:", "\n🔹 Bot:"]
    
    # Find the earliest occurrence of either "Bot:" or "🔹 Bot:"
    first_marker_index = None
    for marker in strip_markers:
        if marker in response_text:
            marker_index = response_text.find(marker)
            if first_marker_index is None or marker_index < first_marker_index:
                first_marker_index = marker_index

    # If a Bot marker exists, keep everything before it (including the first Bot response)
    if first_marker_index is not None:
        response_text = response_text[:first_marker_index + len("🔹 Bot:")].strip()

    return response_text


# 🛠 Create separate Chroma collections per key
def get_chroma_collection(key):
    return Chroma(
        persist_directory=f"./chroma_db/{key}",
        embedding_function=embeddings
    )

# Function to fetch data from MongoDB
def fetch_from_mongodb(key):
    """Fetches data from MongoDB using the key."""
    data = collection.find_one({"key": key})
    return data["text_data"] if data else None


def setup_chromadb_for_website(website_name, documents, embeddings):
    """
    Initialize and persist ChromaDB for a specific website.
    """
    directory = f"chroma_db/{website_name.replace(' ', '_')}"
    if os.path.exists(directory):
        shutil.rmtree(directory)
    vectordb = Chroma.from_documents(
        documents=documents,
        embedding=embeddings,
        persist_directory=directory
    )
    return vectordb

def upload_to_chromadb(key, text_content, embeddings):
    """
    Uploads data to ChromaDB after retrieving from MongoDB.
    """
    print(f"📤 Uploading data to ChromaDB for key: {key}...")

    # Load and split documents
    temp_file_path = f"./{key}.txt"
    with open(temp_file_path, "w", encoding="utf-8") as f:
        f.write(text_content)

    loader = TextLoader(temp_file_path, encoding="utf8")
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    all_splits = text_splitter.split_documents(documents)

    # Setup ChromaDB for the website
    vectordb = setup_chromadb_for_website(key, all_splits, embeddings)
    os.remove(temp_file_path)  # Cleanup temp file

    return vectordb


def get_chroma_collection(key):
    """Returns an existing ChromaDB collection or None if missing."""
    try:
        collection_path = f"./chroma_db/{key}"
        if not os.path.exists(collection_path):
            print(f"🚨 ChromaDB collection not found for key: {key}")
            return None  # No collection exists yet

        return Chroma(
            persist_directory=collection_path,
            embedding_function=embeddings  # Ensure embeddings are initialized
        )
    except Exception as e:
        print(f"❌ Error accessing ChromaDB for key {key}: {e}")
        return None


@app.route("/query", methods=["POST"])
def process_query():
    """Handles a user query with conversational context."""
    data = request.get_json()
    query = data.get("query")
    key = data.get("key")
    user_id = data.get("user_id")  # Ensure client sends a unique user ID

    if not query or not key or not user_id:
        return jsonify({"error": "Missing query, key, or user ID"}), 400

    if query.lower() == "resethistory":
        # Delete all conversation history for the given user_id
        collection2 = db["history"]
        result = collection2.delete_many({"user_id": user_id})

        return jsonify({
            "response": f"Conversation history has been reset for user_id: {user_id}.",
            "deleted_count": result.deleted_count
        })
    
    # 🔹 Retrieve previous conversation context
    collection2 = db["history"]
    
    conversation = collection2.find_one({"user_id": user_id, "session_id": key})
    history = conversation["messages"] if conversation else []

    # Format conversation history for better context
    formatted_history = "\n".join([f"{msg['role']}: {msg['text']}" for msg in history])
    context_query = f"You are a friendly, professional, and knowledgeable customer support agent at the company retrieved from document. Your goal is to provide accurate, concise, and helpful responses. Answer the user's query based on the available data. Avoid repeating fallback statements unless you are certain the data is unavailable. Remain polite and approachable.\n{formatted_history}\nUser: {query}\nBot:"

    # 🔍 Step 1: Try retrieving from ChromaDB
    vectordb = get_chroma_collection(key)
    if vectordb:  
        retriever = vectordb.as_retriever(search_kwargs={"k": 7})

 
        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            verbose=True
        )

        docs = qa.retriever.get_relevant_documents(context_query)
        print(docs)


        # 🔹 Step 3: Apply Reranking
        rerank_input = [doc.page_content for doc in docs]  # ✅ Extract text for reranking
        reranked_docs = co.rerank(model="rerank-english-v2.0", query=query, documents=rerank_input, top_n=10)
        
        # Keep only the top reranked docs
        docs = [docs[item.index] for item in reranked_docs.results] 

        
        if docs:
            response = qa.run(context_query)
            print(response)

            response = strip_extra_conversation(response)

            # Store new conversation data
            collection2.update_one(
                {"user_id": user_id, "session_id": key},
                {"$push": {"messages": {"role": "user", "text": query}}},
                upsert=True
            )
            collection2.update_one(
                {"user_id": user_id, "session_id": key},
                {"$push": {"messages": {"role": "bot", "text": response}}},
                upsert=True
            )

            return jsonify({"response": response})

    # 🔄 Step 2: Fallback to MongoDB if ChromaDB has no data
    mongo_data = fetch_from_mongodb(key)
    if mongo_data:
        upload_to_chromadb(key, mongo_data, embeddings)  # Store in ChromaDB
        vectordb = get_chroma_collection(key)  

        if vectordb:
            retriever = vectordb.as_retriever(search_kwargs={"k": 7})

            qa = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=retriever,
                verbose=True
            )
            
            docs = qa.retriever.get_relevant_documents(context_query)
            print(docs)
            rerank_input = [doc.page_content for doc in docs]  # ✅ Extract text for reranking
            reranked_docs = co.rerank(model="rerank-english-v2.0", query=query, documents=rerank_input, top_n=10)
        
        # Keep only the top reranked docs
            docs = [docs[item.index] for item in reranked_docs.results]  

            
            if docs:
                response = qa.run(context_query)
                print(response)
                response = strip_extra_conversation(response)

                # Store conversation data
                collection2.update_one(
                    {"user_id": user_id, "session_id": key},
                    {"$push": {"messages": {"role": "user", "text": query}}},
                    upsert=True
                )
                collection2.update_one(
                    {"user_id": user_id, "session_id": key},
                    {"$push": {"messages": {"role": "bot", "text": response}}},
                    upsert=True
                )

                return jsonify({"response": response})

    return jsonify({"response": "No relevant data found."})


if __name__ == "__main__":
    # 🔹 Start an ngrok tunnel with authentication
    public_url = ngrok.connect(5000)
    print(f"🔗 Ngrok tunnel established: {public_url}")

    # Run the Flask app
    app.run(host="0.0.0.0", port=5000)
