In [1]:
import os
import gc
import torch
import chromadb
from chromadb.utils import embedding_functions
from sentence_transformers import CrossEncoder

from utils.llmclass import RAGGenerator
from utils.search import Search

# Cleanup
# gc.collect()
# torch.cuda.empty_cache()

# Configuration
DB_PATH = os.path.join(os.getcwd(), "arxiv")  # Safe path joining
# MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

In [2]:
# Initialize Client & Embedding
client = chromadb.PersistentClient(path=DB_PATH)
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
collection = client.get_or_create_collection(name="Arxiv-Database",
                                             embedding_function=embedding_func)
search = Search.get(collection)

In [None]:
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16
# )

# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
# model = AutoModelForCausalLM.from_pretrained(
#     MODEL_ID,
#     quantization_config=bnb_config,
#     # device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
# )

In [3]:
query = "What is the Assessment Misalignment problem in Large Language Models?"
docs = search(query)

print("--- Retrieved Context ---")
for doc in docs:
    print(f"Content \n: {doc['content']}")

--- Retrieved Context ---
Content 
: the reasoning ability of LLMs by alleviating the assessment misalignment problem caused by VFT.
To address the assessment misalignment problem, in this paper, we propose an alignment fine-tuning
(AFT) paradigm to improve LLM reasoning with three steps: 1) fine-tuning LLMs using COT
training data; 2) generating multiple COT responses for each question using the fine-tuned LLMs,
and categorizing them as positive and negative based on whether they deduce the correct answer;
3) calibrating the scores of positive and negative responses given by LLMs with a novel constraint
alignment (CA) loss. Specifically, the CA loss ensures that all positive scores (the scores of positive
COTs) are larger than negative scores. In addition, the negative scores are protected by a constraint
term, which is proven to be very important in preventing model degradation. Beyond just binary
positive and negative feedback, the CA loss can be seamlessly adapted to ranking situat

In [6]:
rag = RAGGenerator()

<openai.OpenAI object at 0x78122c17aa50>


In [7]:
# 1. SIMULATE RETRIEVAL
query = "How does the 'Constraint' objective in AFT prevent model degradation?"
docs = search(query)
context = ["-> ".join([doc['source'], doc['content']]) for doc in docs]
# 2. GENERATE RESPONSE
print(f"Query: {query}\n")
answer = rag.generate_answer(query, context)
print(f"Response:\n{answer}")

Query: How does the 'Constraint' objective in AFT prevent model degradation?

Response:
The Constraint objective keeps the negative scores confined to a reasonable range via a constraint term in the CA loss, which is proven to be very important in preventing model degradation.
