In [2]:
!pip install datasets
import torch
from datasets import load_dataset
from transformers import LlavaForConditionalGeneration, AutoProcessor
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from PIL import Image
import pandas as pd
import networkx as nx
from tqdm import tqdm
import re
from google.colab import drive
drive.mount('/content/drive')

IMG_PATH = "/content/drive/MyDrive/MATH-V-main"

class EnhancedMathVisionEvaluator:
    def __init__(self, dataset_name="MathLLMs/MathVision"):
        """
        Initialize the evaluator with RAG and graphRAG capabilities
        """
        # Load dataset and model as before
        self.dataset = load_dataset(dataset_name)
        self.model_id = "llava-hf/llava-1.5-7b-hf"
        self.model = LlavaForConditionalGeneration.from_pretrained(
            self.model_id,
            torch_dtype=torch.float16,
            device_map='auto'
        )
        self.processor = AutoProcessor.from_pretrained(self.model_id)
        
        # Initialize RAG components
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        
        # Create knowledge base from training data
        self._initialize_knowledge_base()
        
        # Create problem graph
        self._initialize_problem_graph()
        
        self.results = {'zero_shot': [], 'rag': [], 'graph_rag': []}

    def _initialize_knowledge_base(self):
        """
        Initialize RAG knowledge base from training data
        """
        # Prepare documents from training data
        documents = []
        for example in self.dataset['train']:
            doc = f"Question: {example['question']}\nAnswer: {example['answer']}\nSubject: {example['subject']}\nDifficulty: {example['difficulty']}"
            documents.append(doc)
        
        # Split documents into chunks
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50
        )
        texts = text_splitter.create_documents(documents)
        
        # Create vector store
        self.vector_store = FAISS.from_documents(texts, self.embeddings)

    def _initialize_problem_graph(self):
        """
        Create a graph representation of problems and their relationships
        """
        self.problem_graph = nx.Graph()
        
        # Add nodes for each training example
        for i, example in enumerate(self.dataset['train']):
            self.problem_graph.add_node(i, 
                                      subject=example['subject'],
                                      difficulty=example['difficulty'],
                                      question=example['question'],
                                      answer=example['answer'])
        
        # Add edges between similar problems (based on subject and difficulty)
        for i in range(len(self.dataset['train'])):
            for j in range(i + 1, len(self.dataset['train'])):
                if (self.problem_graph.nodes[i]['subject'] == self.problem_graph.nodes[j]['subject'] and
                    abs(self.problem_graph.nodes[i]['difficulty'] - self.problem_graph.nodes[j]['difficulty']) <= 1):
                    self.problem_graph.add_edge(i, j, weight=1)

    def _get_relevant_context(self, question, subject, difficulty, method='rag'):
        """
        Get relevant context using either RAG or graphRAG
        """
        if method == 'rag':
            # Use traditional RAG to find similar problems
            similar_docs = self.vector_store.similarity_search(
                f"Question: {question} Subject: {subject}", k=3
            )
            context = "\n\n".join([doc.page_content for doc in similar_docs])
            
        else:  # graph_rag
            # Find most similar node in graph
            similar_problems = []
            for node in self.problem_graph.nodes():
                node_data = self.problem_graph.nodes[node]
                if (node_data['subject'] == subject and 
                    abs(node_data['difficulty'] - difficulty) <= 1):
                    similar_problems.append(node)
            
            # Get connected problems from graph
            context_problems = []
            for problem in similar_problems[:2]:  # Get top 2 similar problems
                neighbors = list(self.problem_graph.neighbors(problem))
                if neighbors:
                    context_problems.extend([self.problem_graph.nodes[n] for n in neighbors[:2]])
            
            context = "\n\n".join([
                f"Question: {p['question']}\nAnswer: {p['answer']}"
                for p in context_problems
            ])
            
        return context

    def _prepare_input(self, example, method='zero_shot'):
        """
        Prepare input with optional RAG/graphRAG context
        """
        base_prompt = f"Solve the following math problem step by step, given the image attached. Write the final answer after <Answer:> \n{example['question']}"
        
        if method in ['rag', 'graph_rag']:
            context = self._get_relevant_context(
                example['question'],
                example['subject'],
                example['difficulty'],
                method=method
            )
            base_prompt = f"Here are some similar problems and their solutions:\n{context}\n\nNow solve this problem:\n{base_prompt}"

        conversation = [
            {
                "role": "user",
                "content": [
                    {"type": "image"},
                    {"type": "text", "text": base_prompt},
                ],
            },
        ]
        prompt = self.processor.apply_chat_template(conversation, add_generation_prompt=True)

        # Process image if available
        if 'image' in example and example['image']:
            images = [Image.open(f"{IMAGE_PATH}/{example['image']}")]
        else:
            images = None
            
        inputs = self.processor(
            images=images,
            text=[prompt],
            padding=True,
            return_tensors="pt"
        ).to(self.model.device, torch.float16)

        return inputs

    def evaluate(self, methods=['zero_shot', 'rag', 'graph_rag']):
        """
        Evaluate using specified methods
        """
        for method in methods:
            model_results = []
            for i, example in enumerate(tqdm(self.dataset['testmini'],
                                          desc=f"Evaluating Llava - {method}")):
                inputs = self._prepare_input(example, method=method)
                
                generate_ids = self.model.generate(
                    **inputs,
                    max_new_tokens=200,
                )
                
                generated_text = self.processor.batch_decode(
                    generate_ids,
                    skip_special_tokens=True
                )
                extracted_solution = self._extract_solution(generated_text)
                
                model_results.append({
                    'question': example['question'],
                    'ground_truth': example['answer'],
                    'model_prediction': extracted_solution,
                    'method': method
                })
                
                if i % 50 == 0:
                    pd.DataFrame(model_results).to_csv(f'{IMAGE_PATH}/{method}_results.csv')
            
            self.results[method] = model_results
        
        return self.results

    def _extract_solution(self, generated_text):
        """Extract solution from generated text"""
        solution_match = re.search(r'<Answer:>\s*(.*)', generated_text[0], re.DOTALL)
        if solution_match:
            return solution_match.group(1).strip()
        return generated_text[0]

# Usage
evaluator = EnhancedMathVisionEvaluator()
results = evaluator.evaluate(['zero_shot', 'rag', 'graph_rag'])

# Save results
for method in results:
    pd.DataFrame(results[method]).to_csv(f'{IMAGE_PATH}/{method}_results.csv')

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl 

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/5.93k [00:00<?, ?B/s]

(…)-00000-of-00001-3532b8d3f1b4047a.parquet:   0%|          | 0.00/57.0M [00:00<?, ?B/s]

(…)-00000-of-00001-f8ff70fcb2f29b1d.parquet:   0%|          | 0.00/6.99M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/3040 [00:00<?, ? examples/s]

Generating testmini split:   0%|          | 0/304 [00:00<?, ? examples/s]

Dataset({
    features: ['id', 'question', 'options', 'image', 'decoded_image', 'answer', 'solution', 'level', 'subject'],
    num_rows: 3040
})


OSError: llava-v1.5-7b is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`