## Necessary Imports

In [None]:
import random
import argparse
import os
from typing import List, Dict, Optional, Union
from google import  genai
from google.genai import types
from io import BytesIO
from datasets import load_dataset
import google.generativeai as genai
from PIL import Image
import io


## Load the dataset

In [None]:
class DatasetLoader:
    def __init__(self, dataset_name: str = "AI4Math/MathVista"):
        self.dataset_name = dataset_name

    def load_mathvista_dataset(self) -> List[Dict[str, Any]]:
        """Load MathVista dataset from Hugging Face or return sample data if loading fails."""
        dataset = load_dataset(dataset_name)["testmini"]
        return [dict(item) for item in dataset]
    
    def select_examples(self, dataset: List[Dict[str, Any]], num_examples: int = 3) -> List[Dict[str, Any]]:
        """Randomly select examples from the dataset."""
        return dataset if len(dataset) <= num_examples else random.sample(dataset, num_examples)

In [None]:
################### Example Usage of DatasetLoader ####################

# dataset_loader = DatasetLoader()
# mathvista_data = dataset_loader.load_mathvista_dataset()
# few_shot_examples = dataset_loader.select_examples(mathvista_data, num_examples=3)

########################################################################

## Load the LLM

In [None]:
class LLM:
    def __init__(self, model_name: str = "gemini-2.0-flash-exp", temperature: float = 0.0, system_prompt: str = ""):
        self.model_name = model_name
        self.temperature = temperature
        self.system_prompt = system_prompt
        
        # Configure the API using the key from environment variable
        api_key = os.getenv("GOOGLE_GENAI_API_KEY")
        if api_key:
            genai.configure(api_key=api_key)
        else:
            raise ValueError("GOOGLE_GENAI_API_KEY environment variable not set.")
        
        # Create the model instance
        generation_config = {
            "temperature": self.temperature,
        }
        
        self.model = genai.GenerativeModel(
            model_name=self.model_name,
            generation_config=generation_config,
            system_instruction=self.system_prompt if self.system_prompt else None
        )

    def _format_few_shot_examples(self, few_shot_examples: List[Dict[str, Union[str, bytes]]], include_images: bool = True) -> str:
        """Format few-shot examples into a string for the prompt."""
        if not few_shot_examples:
            return ""
        
        formatted_examples = ["Here are some examples:"]
        
        for i, example in enumerate(few_shot_examples, 1):
            formatted_examples.append(f"\nExample {i}:")
            if include_images and "image" in example:
                formatted_examples.append("Input: [Image provided]")
            if "input" in example:
                formatted_examples.append(f"Input: {example['input']}")
            if "output" in example:
                formatted_examples.append(f"Output: {example['output']}")
        
        formatted_examples.append("\nNow, please respond to the following:")
        return "\n".join(formatted_examples)

    def generate_response_with_image(self, prompt: str, image_bytes: bytes, 
                                   few_shot_examples: Optional[List[Dict[str, Union[str, bytes]]]] = None) -> str:
        """
        Generate a response from the LLM based on the provided prompt and image bytes.
        
        Args:
            prompt: The text prompt
            image_bytes: Image data as bytes
            few_shot_examples: Optional list of examples. Each example should be a dict with:
                - 'input': input text (optional)
                - 'output': expected output text
                - 'image': image bytes (optional)
        
        Returns:
            Generated response as string
        """
        try:
            # Convert bytes to PIL Image
            image = Image.open(io.BytesIO(image_bytes))
            
            # Prepare content list
            content = []
            
            # Add few-shot examples if provided
            if few_shot_examples:
                # example_prompt = self._format_few_shot_examples(few_shot_examples, include_images=True)
                # content.append(example_prompt)
                
                # Add example images if they exist
                for example in few_shot_examples:
                    if "image" in example:
                        example_image = Image.open(io.BytesIO(example["image"]))
                        content.append(example_image)
            
            # Add the main prompt and image
            content.extend([prompt, image])
            
            # Generate response
            response = self.model.generate_content(content)
            
            return response.text
            
        except Exception as e:
            return f"Error generating response: {str(e)}"

    def generate_response_without_image(self, prompt: str, few_shot_examples: Optional[List[Dict[str, str]]] = None) -> str:
        """
        Generate a response from the LLM based on the provided prompt (text only).
        
        Args:
            prompt: The text prompt
            few_shot_examples: Optional list of examples. Each example should be a dict with:
                - 'input': input text
                - 'output': expected output text
        
        Returns:
            Generated response as string
        """
        try:
            # gieve few-shot examples as content if provided without formatting few-shot examples
            content = []
            if few_shot_examples:
                for example in few_shot_examples:
                    content.append(f"Input: {example['input']}\nOutput: {example['output']}")
            content.append(prompt)

            # Generate response
            response = self.model.generate_content(content)

            return response.text
            
        except Exception as e:
            return f"Error generating response: {str(e)}"

    def generate_response(self, prompt: str, few_shot_examples: Optional[List[Dict[str, str]]] = None, image_bytes: Optional[bytes] = None) -> str:
        """
        Generate a response from the LLM based on the provided prompt.
        If image_bytes is provided, it will include the image in the prompt.
        
        Args:
            prompt: The text prompt
            few_shot_examples: Optional list of examples. Each example should be a dict with:
                - 'input': input text
                - 'output': expected output text
                - 'image': image bytes (optional)
            image_bytes: Optional image data as bytes   
        Returns:
            Generated response as string
        """
        if image_bytes:
            return self.generate_response_with_image(prompt, image_bytes, few_shot_examples)
        else:
            return self.generate_response_without_image(prompt, few_shot_examples)


In [None]:
################## Example usage ##################

# # Initialize the LLM
# llm = LLM(system_prompt="You are a helpful assistant")

# # Text-only with few-shot examples
# text_examples = [
#     {"input": "What is 2+2?", "output": "2+2 equals 4"},
#     {"input": "What is 3+3?", "output": "3+3 equals 6"}
# ]
# response = llm.generate_response("What is 5+5?", few_shot_examples=text_examples)

# # Image with few-shot examples
# image_examples = [
#     {"input": "Describe this image", "output": "This is a cat", "image": example_image_bytes},
#     {"output": "This is a dog", "image": another_example_image_bytes}
# ]
# response = llm.generate_response_with_image("What do you see?", image_bytes, few_shot_examples=image_examples)

# # Without examples
# response = llm.generate_response("Hello, how are you?")
# response = llm.generate_response_with_image("What's in this image?", image_bytes)

####################################################

## Idea Generator

In [None]:
class IdeaGenerator:
    def __init__(self, system_prompt: str = ""):
        self.llm = LLM(system_prompt=system_prompt)

    def generate_idea(self, difficulty_level: int, given_example: Dict[str, Any]) -> str:
        """
        Extend the current example to generate a similar math problem idea with the specified difficulty level.
        positive integer difficulty_level: Increases difficulty level for the new problem.
        negative integer difficulty_level: Decreases difficulty level for the new problem.
        zero difficulty_level: Keeps the same difficulty level for the new problem.
        """
        prompt = f"""
        Given the example.
        Generate a new math problem idea for difficulty level {difficulty_level}.
        positive integer difficulty_level: Increases difficulty level for the new problem.
        negative integer difficulty_level: Decreases difficulty level for the new problem.
        zero difficulty_level: Keeps the same difficulty level for the new problem.
        """
        llm_response = self.llm.generate_response(prompt, few_shot_examples=[given_example])
        return llm_response

    def generate_question_template(self, idea: str, difficulty_level: int, given_example: Dict[str, Any]) -> str:
        """
        Generate a question template based on the provided idea and difficulty level.
        positive integer difficulty_level: Increases difficulty level for the new problem.
        negative integer difficulty_level: Decreases difficulty level for the new problem.
        zero difficulty_level: Keeps the same difficulty level for the new problem.
        """
        prompt = f"""
        Given the idea and the example.
        Create a question template for the following idea at difficulty level {difficulty_level}:
        Idea: {idea}
        positive integer difficulty_level: Increases difficulty level for the new problem.
        negative integer difficulty_level: Decreases difficulty level for the new problem.
        zero difficulty_level: Keeps the same difficulty level for the new problem.
        """
        llm_response = self.llm.generate_response(prompt, few_shot_examples=[given_example])
        return llm_response

## Programatically generating Image Using VLM
### coding model generates code to generate the image

In [None]:
class ImageGenerator:
    def __init__(self, system_prompt: str = ""):
        self.llm = LLM(system_prompt=system_prompt)
    
    def generate_code(self, prompt: str) -> str:
        llm_response = self.llm.generate_response(prompt)
        return llm_response
    def save_code_to_file(self, code: str, filename: str = "generated_code.py"):
        with open(filename, "w") as f:
            f.write(code)
    def execute_code(self, filename: str = "generated_code.py") -> bytes:
        import importlib.util
        spec = importlib.util.spec_from_file_location("generated_code", filename)
        generated_code = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(generated_code)
        if hasattr(generated_code, "generate_image"):
            image_bytes = generated_code.generate_image()
            return image_bytes
        else:
            raise AttributeError("The generated code does not have a 'generate_image' function.")
    def generate_image_and_save(self, prompt: str) -> bytes:
        code = self.generate_code(prompt)
        self.save_code_to_file(code)
        image_bytes = self.execute_code()
        #save image bytes to file for verification
        with open("generated_image.png", "wb") as img_file:
            img_file.write(image_bytes)
        return image_bytes

In [None]:
########################## Example usage ##########################
# image_gen = ImageGenerator(system_prompt="You are a helpful assistant that generates Python code to
# create images using matplotlib.")
# code_prompt = "Generate Python code to create a plot of y=sin(x) from x=0 to 2π using matplotlib."
# image_bytes = image_gen.generate_image_and_save(code_prompt)
####################################################################

## Verifying the generate Image and Question

In [None]:
class Verifier:
    def __init__(self, system_prompt: str = ""):
        self.llm = LLM(system_prompt=system_prompt)
    
    def verify_question_and_image(self, question: str, image_bytes: bytes, expected_answer: str) -> str:
        prompt = f"""
        Verify if the question and the image correspond to the expected answer.
        Question: {question}
        Expected Answer: {expected_answer}
        """
        llm_response = self.llm.generate_response_with_image(prompt, image_bytes)
        return llm_response

## Save it to the new dataset

## Pipeline execution