### full data

In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from llama_index.core import VectorStoreIndex, Settings, Document
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.postprocessor import SentenceTransformerRerank
import json
import os
from typing import List

class RAGLegalEvaluator:
    def __init__(self, 
                 base_model_path="../model/law_7b_full_data_f16",
                 embedding_model_path="../model/Qwen3-Embedding-0.6B", 
                 rerank_model_path="../model/Qwen3-Reranker-0.6B",
                 document_path="../data/data_json_rag"):
        
        self.base_model_path = base_model_path
        self.embedding_model_path = embedding_model_path
        self.rerank_model_path = rerank_model_path
        self.document_path = document_path
        
        # 初始化组件
        self.tokenizer = None
        self.model = None
        self.query_engine = None
        
    def setup_base_model(self):
        """设置基础模型"""
        print("Loading base model...")
        self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_path)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.base_model_path,
            torch_dtype="auto",
            device_map="auto"
        )
        
        # 为 LlamaIndex 创建 LLM 包装器
        llm = HuggingFaceLLM(
            model_name=self.base_model_path,
            tokenizer_name=self.base_model_path,
            context_window=8192,
            max_new_tokens=512,
            generate_kwargs={
                "temperature": 0.1,
                "do_sample": True,
            },
            model_kwargs={
                "torch_dtype": torch.float16,
            }
        )
        
        Settings.llm = llm
        return llm
    
    def setup_embedding_model(self):
        """设置 Embedding 模型"""
        print("Loading embedding model...")
        embed_model = HuggingFaceEmbedding(
            model_name=self.embedding_model_path,
            trust_remote_code=True,
            device="cuda" if torch.cuda.is_available() else "cpu"
        )
        
        Settings.embed_model = embed_model
        return embed_model
    
    def load_legal_documents(self) -> List[Document]:
        """加载法律文档"""
        print(f"Loading documents from {self.document_path}...")
        documents = []
        
        # 遍历 JSON 文件
        for filename in os.listdir(self.document_path):
            if filename.endswith('.json'):
                file_path = os.path.join(self.document_path, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                    
                    # 处理不同的 JSON 结构
                    if isinstance(data, list):
                        # 如果是列表，处理每个项目
                        for i, item in enumerate(data):
                            text_content = self._extract_text_from_json(item)
                            if text_content:
                                doc = Document(
                                    text=text_content,
                                    metadata={
                                        "source": filename,
                                        "index": i,
                                        "file_path": file_path
                                    }
                                )
                                documents.append(doc)
                    else:
                        # 如果是单个对象
                        text_content = self._extract_text_from_json(data)
                        if text_content:
                            doc = Document(
                                text=text_content,
                                metadata={
                                    "source": filename,
                                    "file_path": file_path
                                }
                            )
                            documents.append(doc)
                            
                except Exception as e:
                    print(f"Error loading {filename}: {e}")
                    continue
        
        print(f"Loaded {len(documents)} documents")
        return documents
    
    def _extract_text_from_json(self, json_obj) -> str:
        """从 JSON 对象中提取文本内容"""
        if isinstance(json_obj, str):
            return json_obj
        elif isinstance(json_obj, dict):
            # 常见的文本字段名
            text_fields = ['instruction', 'question', 'answer']
            
            # 优先查找常见文本字段
            for field in text_fields:
                if field in json_obj and json_obj[field]:
                    return str(json_obj[field])
            
            # 如果没有找到，将所有值连接起来
            text_parts = []
            for key, value in json_obj.items():
                if isinstance(value, (str, int, float)) and str(value).strip():
                    text_parts.append(f"{key}: {value}")
            
            return "\n".join(text_parts)
        else:
            return str(json_obj)
    
    def setup_rag_system(self, similarity_top_k=10, rerank_top_n=5):
        """设置 RAG 系统"""
        print("Setting up RAG system...")
        
        # 1. 设置模型
        self.setup_base_model()
        self.setup_embedding_model()
        
        # 2. 加载文档
        documents = self.load_legal_documents()
        if not documents:
            raise ValueError("No documents loaded!")
        
        # 3. 创建向量索引
        print("Building vector index...")
        index = VectorStoreIndex.from_documents(documents)
        
        # 4. 设置检索器
        retriever = VectorIndexRetriever(
            index=index,
            similarity_top_k=similarity_top_k
        )
        
        # 5. 设置 Rerank 模型
        try:
            rerank = SentenceTransformerRerank(
                model=self.rerank_model_path,
                top_n=rerank_top_n,
                device="cuda" if torch.cuda.is_available() else "cpu"
            )
            post_processors = [
                SimilarityPostprocessor(similarity_cutoff=0.7),
                rerank
            ]
        except Exception as e:
            print(f"Rerank model loading failed, using similarity only: {e}")
            post_processors = [SimilarityPostprocessor(similarity_cutoff=0.7)]
        
        # 6. 创建查询引擎
        self.query_engine = RetrieverQueryEngine(
            retriever=retriever,
            node_postprocessors=post_processors
        )
        
        print("RAG system setup complete!")
    
    def get_relevant_context(self, question: str, max_context_length=2000) -> str:
        """获取相关上下文"""
        try:
            # 使用查询引擎检索相关信息
            response = self.query_engine.query(question)
            
            # 提取相关文档的文本
            context_parts = []
            total_length = 0
            
            for node in response.source_nodes:
                text = node.node.text.strip()
                if total_length + len(text) <= max_context_length:
                    context_parts.append(text)
                    total_length += len(text)
                else:
                    # 截取剩余长度
                    remaining = max_context_length - total_length
                    if remaining > 100:  # 只有在剩余长度足够时才添加
                        context_parts.append(text[:remaining] + "...")
                    break
            
            return "\n\n".join(context_parts)
        
        except Exception as e:
            print(f"Error retrieving context: {e}")
            return ""
    
    def generate_answer_with_rag(self, question: str, options: dict) -> str:
        """使用 RAG 生成答案"""
        # 1. 获取相关上下文
        context = self.get_relevant_context(question)
        
        # 2. 构建增强的提示
        if context:
            prompt = f"""
根据以下法律知识和背景信息，回答这个法律单项选择题。请仔细阅读相关法律条文，然后选择正确的选项。

相关法律知识：
{context}

问题：{question}
A：{options['A']}
B：{options['B']}
C：{options['C']}
D：{options['D']}

请根据上述法律知识选择正确答案，只需要回答正确选项即可，不需要进行分析,回答格式：B
"""
        else:
            # 如果没有检索到相关上下文，使用原始提示
            prompt = f"""
这是一个关于法律问题的单项选择题,请根据题目选择正确的选项，只需要回答正确选项即可，不需要进行分析,回答格式：D

问题：{question}
A：{options['A']}
B：{options['B']}
C：{options['C']}
D：{options['D']}
"""
        
        # 3. 使用原有的生成逻辑
        messages = [{"role": "user", "content": prompt}]
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )
        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
        
        generated_ids = self.model.generate(
            **model_inputs,
            max_new_tokens=50,
            temperature=0.1,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )
        
        content = self.tokenizer.decode(
            generated_ids[0][model_inputs.input_ids.shape[1]:], 
            skip_special_tokens=True
        ).strip()
        
        return content
    
    def evaluate_with_rag(self, csv_list_mul, path="../data/val_csv_single/"):
        """使用 RAG 进行评测"""
        if not self.query_engine:
            raise ValueError("RAG system not initialized. Call setup_rag_system() first.")
        
        total_len = 0
        answer = []
        acc = 0
        
        for name in csv_list_mul:
            csv_name = os.path.join(path, name)
            print(f"Processing {csv_name}...")
            
            df = pd.read_csv(csv_name)
            total_len += len(df)
            
            for i in range(len(df)):
                question = df["input"][i]
                options = {
                    'A': df["A"][i],
                    'B': df["B"][i], 
                    'C': df["C"][i],
                    'D': df["D"][i]
                }
                
                # 使用 RAG 生成答案
                content = self.generate_answer_with_rag(question, options)
                
                # 清理答案格式
                content = content.strip().upper()
                # if len(content) > 1:
                #     # 提取第一个有效的选项字母
                #     for char in content:
                #         if char in ['A', 'B', 'C', 'D']:
                #             content = char
                #             break
                
                if str(content) == str(df["output"][i]):
                    acc += 1
                    
                answer.append(content)
                print(content)
                # 每处理10题显示一次进度
                if (i + 1) % 10 == 0:
                    current_acc = acc / (total_len - len(df) + i + 1)
                    print(f"Progress: {i+1}/{len(df)}, Current Accuracy: {current_acc:.4f}")
            
            print(f"Completed {name}, Current total accuracy: {acc}/{total_len}")
        
        percentage = acc / total_len
        print(f"\nFinal Results:")
        print(f"Total Questions: {total_len}")
        print(f"Correct Answers: {acc}")
        print(f"Accuracy: {percentage:.4f} ({percentage*100:.2f}%)")
        
        return percentage, answer

def main():
    """主函数"""
    # 配置文件列表
    # csv_list_mul = ["mcq_mult_cpa.csv", "mcq_mult_nje.csv", "mcq_mult_pae.csv", "mcq_mult_ungee.csv"]
    csv_list_mul = ["mcq_sing_cpa.csv","mcq_sing_lbk.csv","mcq_sing_nje.csv","mcq_sing_pae.csv",
           "mcq_sing_pfe.csv","mcq_sing_ungee.csv"]
    # 创建评测器
    evaluator = RAGLegalEvaluator(
        base_model_path="../model/law_7b_full_data_f16",
        embedding_model_path="../model/Qwen3-Embedding-0.6B",
        rerank_model_path="../model/Qwen3-Reranker-0.6B",
        document_path="../data/data_json_rag"
    )
    
    try:
        # 设置 RAG 系统
        evaluator.setup_rag_system(
            similarity_top_k=10,  # 第一阶段检索文档数
            rerank_top_n=5        # 最终使用的文档数
        )
        
        # 进行评测
        accuracy, answers = evaluator.evaluate_with_rag(csv_list_mul)
        
        print(f"\nRAG Enhanced Evaluation Complete!")
        print(f"Final Accuracy: {accuracy:.4f}")
        
    except Exception as e:
        print(f"Error during evaluation: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

In [1]:
## base model mult rag
Final Results:
Total Questions: 946
Correct Answers: 382
Accuracy: 0.4038 (40.38%)

RAG Enhanced Evaluation Complete!
Final Accuracy: 0.4038

## lora model mult rag
Final Results:
Total Questions: 946
Correct Answers: 408
Accuracy: 0.4313 (43.13%)

RAG Enhanced Evaluation Complete!
Final Accuracy: 0.4313


## lora model single rag
Final Results:
Total Questions: 1617
Correct Answers: 1230
Accuracy: 0.7594 (75.94%)

RAG Enhanced Evaluation Complete!
Final Accuracy: 0.7594

## base model single rag
Final Results:
Total Questions: 1617
Correct Answers: 1141
Accuracy: 0.7056 (70.56%)

RAG Enhanced Evaluation Complete!
Final Accuracy: 0.7056

## lora model single
1232

## base model single
1107

## lora model mult
362

## base model mult 
347

SyntaxError: invalid syntax (359672138.py, line 1)

In [9]:
answer

NameError: name 'answer' is not defined

### total model

In [1]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from llama_index.core import VectorStoreIndex, Settings, Document
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.postprocessor import SentenceTransformerRerank
import json
import os
from typing import List

class RAGLegalEvaluator:
    def __init__(self, 
                 base_model_path="../model/lora_7b_total_no_comm_data_f16",
                 embedding_model_path="../model/Qwen3-Embedding-0.6B", 
                 rerank_model_path="../model/Qwen3-Reranker-0.6B",
                 document_path="../data/data_json_rag"):
        
        self.base_model_path = base_model_path
        self.embedding_model_path = embedding_model_path
        self.rerank_model_path = rerank_model_path
        self.document_path = document_path
        
        # 初始化组件
        self.tokenizer = None
        self.model = None
        self.query_engine = None
        
    def setup_base_model(self):
        """设置基础模型"""
        print("Loading base model...")
        self.tokenizer = AutoTokenizer.from_pretrained(self.base_model_path)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.base_model_path,
            torch_dtype="auto",
            device_map="auto"
        )
        
        # 为 LlamaIndex 创建 LLM 包装器
        llm = HuggingFaceLLM(
            model_name=self.base_model_path,
            tokenizer_name=self.base_model_path,
            context_window=8192,
            max_new_tokens=512,
            generate_kwargs={
                "temperature": 0.1,
                "do_sample": True,
            },
            model_kwargs={
                "torch_dtype": torch.float16,
            }
        )
        
        Settings.llm = llm
        return llm
    
    def setup_embedding_model(self):
        """设置 Embedding 模型"""
        print("Loading embedding model...")
        embed_model = HuggingFaceEmbedding(
            model_name=self.embedding_model_path,
            trust_remote_code=True,
            device="cuda" if torch.cuda.is_available() else "cpu"
        )
        
        Settings.embed_model = embed_model
        return embed_model
    
    def load_legal_documents(self) -> List[Document]:
        """加载法律文档"""
        print(f"Loading documents from {self.document_path}...")
        documents = []
        
        # 遍历 JSON 文件
        for filename in os.listdir(self.document_path):
            if filename.endswith('.json'):
                file_path = os.path.join(self.document_path, filename)
                try:
                    with open(file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                    
                    # 处理不同的 JSON 结构
                    if isinstance(data, list):
                        # 如果是列表，处理每个项目
                        for i, item in enumerate(data):
                            text_content = self._extract_text_from_json(item)
                            if text_content:
                                doc = Document(
                                    text=text_content,
                                    metadata={
                                        "source": filename,
                                        "index": i,
                                        "file_path": file_path
                                    }
                                )
                                documents.append(doc)
                    else:
                        # 如果是单个对象
                        text_content = self._extract_text_from_json(data)
                        if text_content:
                            doc = Document(
                                text=text_content,
                                metadata={
                                    "source": filename,
                                    "file_path": file_path
                                }
                            )
                            documents.append(doc)
                            
                except Exception as e:
                    print(f"Error loading {filename}: {e}")
                    continue
        
        print(f"Loaded {len(documents)} documents")
        return documents
    
    def _extract_text_from_json(self, json_obj) -> str:
        """从 JSON 对象中提取文本内容"""
        if isinstance(json_obj, str):
            return json_obj
        elif isinstance(json_obj, dict):
            # 常见的文本字段名
            text_fields = ['instruction', 'question', 'answer']
            
            # 优先查找常见文本字段
            for field in text_fields:
                if field in json_obj and json_obj[field]:
                    return str(json_obj[field])
            
            # 如果没有找到，将所有值连接起来
            text_parts = []
            for key, value in json_obj.items():
                if isinstance(value, (str, int, float)) and str(value).strip():
                    text_parts.append(f"{key}: {value}")
            
            return "\n".join(text_parts)
        else:
            return str(json_obj)
    
    def setup_rag_system(self, similarity_top_k=10, rerank_top_n=5):
        """设置 RAG 系统"""
        print("Setting up RAG system...")
        
        # 1. 设置模型
        self.setup_base_model()
        self.setup_embedding_model()
        
        # 2. 加载文档
        documents = self.load_legal_documents()
        if not documents:
            raise ValueError("No documents loaded!")
        
        # 3. 创建向量索引
        print("Building vector index...")
        index = VectorStoreIndex.from_documents(documents)
        
        # 4. 设置检索器
        retriever = VectorIndexRetriever(
            index=index,
            similarity_top_k=similarity_top_k
        )
        
        # 5. 设置 Rerank 模型
        try:
            rerank = SentenceTransformerRerank(
                model=self.rerank_model_path,
                top_n=rerank_top_n,
                device="cuda" if torch.cuda.is_available() else "cpu"
            )
            post_processors = [
                SimilarityPostprocessor(similarity_cutoff=0.7),
                rerank
            ]
        except Exception as e:
            print(f"Rerank model loading failed, using similarity only: {e}")
            post_processors = [SimilarityPostprocessor(similarity_cutoff=0.7)]
        
        # 6. 创建查询引擎
        self.query_engine = RetrieverQueryEngine(
            retriever=retriever,
            node_postprocessors=post_processors
        )
        
        print("RAG system setup complete!")
    
    def get_relevant_context(self, question: str, max_context_length=2000) -> str:
        """获取相关上下文"""
        try:
            # 使用查询引擎检索相关信息
            response = self.query_engine.query(question)
            
            # 提取相关文档的文本
            context_parts = []
            total_length = 0
            
            for node in response.source_nodes:
                text = node.node.text.strip()
                if total_length + len(text) <= max_context_length:
                    context_parts.append(text)
                    total_length += len(text)
                else:
                    # 截取剩余长度
                    remaining = max_context_length - total_length
                    if remaining > 100:  # 只有在剩余长度足够时才添加
                        context_parts.append(text[:remaining] + "...")
                    break
            
            return "\n\n".join(context_parts)
        
        except Exception as e:
            print(f"Error retrieving context: {e}")
            return ""
    
    def generate_answer_with_rag(self, question: str, options: dict) -> str:
        """使用 RAG 生成答案"""
        # 1. 获取相关上下文
        context = self.get_relevant_context(question)
        
        # 2. 构建增强的提示
        if context:
            prompt = f"""
根据以下法律知识和背景信息，回答这个法律单项选择题。请仔细阅读相关法律条文，然后选择正确的选项。

相关法律知识：
{context}

问题：{question}
A：{options['A']}
B：{options['B']}
C：{options['C']}
D：{options['D']}

请根据上述法律知识选择正确答案，只需要回答正确选项即可，不需要进行分析,回答格式：C
"""
        else:
            # 如果没有检索到相关上下文，使用原始提示
            prompt = f"""
这是一个关于法律问题的单项选择题,请根据题目选择正确的选项，只需要回答正确选项即可，不需要进行分析,回答格式：B

问题：{question}
A：{options['A']}
B：{options['B']}
C：{options['C']}
D：{options['D']}
"""
        
        # 3. 使用原有的生成逻辑
        messages = [{"role": "user", "content": prompt}]
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True,
        )
        model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
        
        generated_ids = self.model.generate(
            **model_inputs,
            max_new_tokens=50,
            temperature=0.1,
            do_sample=True,
            pad_token_id=self.tokenizer.eos_token_id
        )
        
        content = self.tokenizer.decode(
            generated_ids[0][model_inputs.input_ids.shape[1]:], 
            skip_special_tokens=True
        ).strip()
        
        return content
    
    def evaluate_with_rag(self, csv_list_mul, path="../data/val_csv_single/"):
        """使用 RAG 进行评测"""
        if not self.query_engine:
            raise ValueError("RAG system not initialized. Call setup_rag_system() first.")
        
        total_len = 0
        answer = []
        acc = 0
        
        for name in csv_list_mul:
            csv_name = os.path.join(path, name)
            print(f"Processing {csv_name}...")
            
            df = pd.read_csv(csv_name)
            total_len += len(df)
            
            for i in range(len(df)):
                question = df["input"][i]
                options = {
                    'A': df["A"][i],
                    'B': df["B"][i], 
                    'C': df["C"][i],
                    'D': df["D"][i]
                }
                
                # 使用 RAG 生成答案
                content = self.generate_answer_with_rag(question, options)
                
                # 清理答案格式
                content = content.strip().upper()
                # if len(content) > 1:
                #     # 提取第一个有效的选项字母
                #     for char in content:
                #         if char in ['A', 'B', 'C', 'D']:
                #             content = char
                #             break
                
                if str(content) == str(df["output"][i]):
                    acc += 1
                    
                answer.append(content)
                print(content)
                # 每处理10题显示一次进度
                if (i + 1) % 10 == 0:
                    current_acc = acc / (total_len - len(df) + i + 1)
                    print(f"Progress: {i+1}/{len(df)}, Current Accuracy: {current_acc:.4f}")
            
            print(f"Completed {name}, Current total accuracy: {acc}/{total_len}")
        
        percentage = acc / total_len
        print(f"\nFinal Results:")
        print(f"Total Questions: {total_len}")
        print(f"Correct Answers: {acc}")
        print(f"Accuracy: {percentage:.4f} ({percentage*100:.2f}%)")
        
        return percentage, answer

def main():
    """主函数"""
    # 配置文件列表
    # csv_list_mul = ["mcq_mult_cpa.csv", "mcq_mult_nje.csv", "mcq_mult_pae.csv", "mcq_mult_ungee.csv"]
    csv_list_mul = ["mcq_sing_cpa.csv","mcq_sing_lbk.csv","mcq_sing_nje.csv","mcq_sing_pae.csv",
           "mcq_sing_pfe.csv","mcq_sing_ungee.csv"]
    # 创建评测器
    evaluator = RAGLegalEvaluator(
        base_model_path="../model/lora_7b_total_no_comm_data_f16",
        embedding_model_path="../model/Qwen3-Embedding-0.6B",
        rerank_model_path="../model/Qwen3-Reranker-0.6B",
        document_path="../data/data_json_rag"
    )
    
    try:
        # 设置 RAG 系统
        evaluator.setup_rag_system(
            similarity_top_k=10,  # 第一阶段检索文档数
            rerank_top_n=5        # 最终使用的文档数
        )
        
        # 进行评测
        accuracy, answers = evaluator.evaluate_with_rag(csv_list_mul)
        
        print(f"\nRAG Enhanced Evaluation Complete!")
        print(f"Final Accuracy: {accuracy:.4f}")
        
    except Exception as e:
        print(f"Error during evaluation: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

[4pdvGPU Msg(59313:140610778736448:libvgpu.c:869)]: Initializing.....
[4pdvGPU Warn(59313:140610778736448:hook.c:475)]: remap handles for device 0
[4pdvGPU Warn(59313:140610778736448:hook.c:475)]: remap handles for device 1
  from .autonotebook import tqdm as notebook_tqdm


Setting up RAG system...
Loading base model...


[4pdvGPU Warn(59313:140610778736448:utils.c:228)]: get default cuda 2 from (null)
[4pdvGPU Msg(59313:140610778736448:libvgpu.c:902)]: Initialized
[4pdvGPU Msg(59313:140610778736448:memory.c:566)]: orig free=24971051008 total=25217466368 limit=25769803776 usage=236978176
[4pdvGPU Msg(59313:140610778736448:memory.c:566)]: orig free=24971051008 total=25217466368 limit=25769803776 usage=236978176
Loading checkpoint shards:   0%|                                        | 0/4 [00:00<?, ?it/s][4pdvGPU Msg(59313:140610778736448:memory.c:566)]: orig free=24971051008 total=25217466368 limit=25769803776 usage=236978176
[4pdvGPU Msg(59313:140610778736448:memory.c:566)]: orig free=24971051008 total=25217466368 limit=25769803776 usage=236978176
Loading checkpoint shards: 100%|████████████████████████████████| 4/4 [00:07<00:00,  1.91s/it]
[4pdvGPU Warn(59472:139973416130368:hook.c:475)]: remap handles for device 0
[4pdvGPU Warn(59472:139973416130368:hook.c:475)]: remap handles for device 1
[4pdvGPU Ms

Loading embedding model...
Loading documents from ../data/data_json_rag...
Loaded 10000 documents
Building vector index...


Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at ../model/Qwen3-Reranker-0.6B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RAG system setup complete!
Processing ../data/val_csv_single/mcq_sing_cpa.csv...
B
A
B
D
C
D
C
C
C
A
Progress: 10/197, Current Accuracy: 1.0000
B
C
B
C
C
D
B
C
A
B
Progress: 20/197, Current Accuracy: 1.0000
C
C
D
C
A
B
C
A
D
B
Progress: 30/197, Current Accuracy: 1.0000
A
C
B
B
B
C
C
D
A
D
Progress: 40/197, Current Accuracy: 1.0000
A
A
B
D
D
B
B
C
C
B
Progress: 50/197, Current Accuracy: 1.0000
C
B
B
B
B
C
D
A
A
C
Progress: 60/197, Current Accuracy: 0.9833
D
D
B
B
D
D
C
A
D
A
Progress: 70/197, Current Accuracy: 0.9714
B
A
B
A
C
B
A
B
D
C
Progress: 80/197, Current Accuracy: 0.9375
D
B
C
A
D
B
D
C
B
B
Progress: 90/197, Current Accuracy: 0.9222
B
A
D
B
C
A
C
D
A
B
Progress: 100/197, Current Accuracy: 0.9100
B
A
B
C
B
C
D
D
B
C
Progress: 110/197, Current Accuracy: 0.9091
A
B
A
C
B
B
C
D
D
A
Progress: 120/197, Current Accuracy: 0.9167
D
D
C
D
B
B
A
B
B
A
Progress: 130/197, Current Accuracy: 0.9077
B
A
D
C
D
C
B
C
C
D
Progress: 140/197, Current Accuracy: 0.9143
A
C
B
A
C
A
D
C
A
B
Progress: 15

In [None]:
## total model rag single
Progress: 320/320, Current Accuracy: 0.7551
Completed mcq_sing_ungee.csv, Current total accuracy: 1221/1617

Final Results:
Total Questions: 1617
Correct Answers: 1221
Accuracy: 0.7551 (75.51%)

RAG Enhanced Evaluation Complete!
Final Accuracy: 0.7551


## total model single
1196

## total model mult
238

In [None]:
## no_commen rag mul 
Final Results:
Total Questions: 946
Correct Answers: 394
Accuracy: 0.4165 (41.65%)

RAG Enhanced Evaluation Complete!
Final Accuracy: 0.4165

## no_commen rag sing
Progress: 320/320, Current Accuracy: 0.7495
Completed mcq_sing_ungee.csv, Current total accuracy: 1212/1617

Final Results:
Total Questions: 1617
Correct Answers: 1212
Accuracy: 0.7495 (74.95%)

RAG Enhanced Evaluation Complete!
Final Accuracy: 0.7495

## no_commen single
1187

## no_commen mul
336