In [None]:
#############################################################

# Copyright 2025 North Carolina State University

# Authored by
# Pranshav Gajjar, Abiodun Ganiyu, and Vijay K. Shah
# NextG Wireless Lab, North Carolina State University

############################################################# 

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

model_kwargs = {'device': 'cuda'}

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", model_kwargs=model_kwargs)

In [None]:
from langchain.vectorstores import FAISS

combined_faiss = FAISS.load_local("FAISS", embeddings, allow_dangerous_deserialization=True)

In [None]:
retrieverb=combined_faiss.as_retriever(search_type="similarity", search_kwargs={"k": 35})

<h1>Reranker

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_huggingface.llms import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
from langchain.chains import create_retrieval_chain

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
HUGGINGFACEHUB_API_TOKEN = ""

import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN



_qa_prompt = """
You are an O-RAN assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question in as much detail as possible. \
These are the relevant documents from the official O-RAN Specifications:

{context}


The current input from the user is mentioned below:
{input}

It can happen that some documents in the provided context have incomplete information and other documets complete that.
If you see some overlap in some steps combine that to obtain the final response.
Try to generate a detailed response and clearly mention the response in a step by step manner.
The steps should be numbered.
After you have obtained all the steps and generated the response, if you find any other information that is relevant, add a section in the end called other relevant information and have an explaination of generated response and what is happening in the steps. Keep this concise.
In the end also mention the confidence scores regarding the description that you have generated. Only give one final score at the end between 0-100, after the label Confidence Score and no ther informtation
"""


qa_prompt = PromptTemplate.from_template(_qa_prompt)
llm_model = 'mistralai/Mistral-7B-Instruct-v0.3'

#llm1=HuggingFaceEndpoint(repo_id=llm_model, max_new_tokens=30000, streaming=False)
llm2=HuggingFaceEndpoint(repo_id=llm_model, max_new_tokens=4000, streaming=True)



In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Load the BAAI/bge-reranker-base model and tokenizer
reranker_model_name = 'BAAI/bge-reranker-v2-m3'
reranker_tokenizer = AutoTokenizer.from_pretrained(reranker_model_name)
reranker_model = AutoModelForSequenceClassification.from_pretrained(reranker_model_name)


In [None]:
retrieverb = combined_faiss.as_retriever(search_type="similarity", search_kwargs={"k": 100})

def query_oran_assistant_docs(prompt: str):
    """
    Takes a user query, retrieves documents, reranks them, and uses the top 10 documents
    as context to generate a detailed answer.

    Args:
        prompt (str): The user query.

    Returns:
        tuple: The final prompt, context, top documents, top document sources, and ranked results.
    """
    # Retrieve documents
    results = retrieverb.get_relevant_documents(prompt)

    # Prepare the documents for reranking
    inputs = [(prompt, result.page_content) for result in results]

    # Tokenize and encode the inputs
    encoded_inputs = reranker_tokenizer.batch_encode_plus(
        inputs, padding=True, truncation=True, return_tensors='pt'
    )

    # Get relevance scores from the reranker model
    with torch.no_grad():
        outputs = reranker_model(**encoded_inputs)
        scores = outputs.logits.squeeze(-1)  # Get relevance scores

    # Rank the results based on the scores
    ranked_results = sorted(zip(scores, results), key=lambda x: x[0], reverse=True)

    # Extract top-ranked documents, ensuring uniqueness
    unique_ranked_results = []
    seen_contents = set()

    for _, doc in ranked_results:
        if doc.page_content not in seen_contents:
            unique_ranked_results.append(doc)
            seen_contents.add(doc.page_content)
        if len(unique_ranked_results) == 15:  # Stop after collecting top 15 unique documents
            break

    # Prepare context from the top-ranked documents
    top_documents = unique_ranked_results
    context = "\n\nDocument: \n\n".join([doc.page_content for doc in top_documents])

    # Format the final prompt
    final_prompt = qa_prompt.format(input=prompt, context=context)

    # Extract sources for only the top_documents
    top_document_sources = [doc.metadata['source'] for doc in top_documents]
    unique_sources = list(dict.fromkeys(top_document_sources))  # Ensure unique sources

    return final_prompt, context, top_documents, unique_sources, ranked_results

In [None]:
query = "Give the UE Initial Access procedure between gNB-DU, gNB-CU, and AMF."
final_prompt, context, top_documents, unique_sources, ranked_results = query_oran_assistant_docs(query)
print(llm2(final_prompt))