# Setup

## Make both T4 GPUs visiable to CUDA

In [1]:
import os, math, numpy as np
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

## Install vLLM

In [2]:
%%time
!pip uninstall -y torch
!pip install -U --no-index --find-links=/kaggle/input/vllm-whl -U vllm
!pip install -U --upgrade /kaggle/input/vllm-t4-fix/grpcio-1.62.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install -U --upgrade /kaggle/input/vllm-t4-fix/ray-2.11.0-cp310-cp310-manylinux2014_x86_64.whl

Found existing installation: torch 2.4.0
Uninstalling torch-2.4.0:
  Successfully uninstalled torch-2.4.0
Looking in links: /kaggle/input/vllm-whl
Processing /kaggle/input/vllm-whl/vllm-0.4.0.post1-cp310-cp310-manylinux1_x86_64.whl
Processing /kaggle/input/vllm-whl/cmake-3.29.0.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (from vllm)
Processing /kaggle/input/vllm-whl/torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl (from vllm)
Processing /kaggle/input/vllm-whl/xformers-0.0.23.post1-cp310-cp310-manylinux2014_x86_64.whl (from vllm)
Processing /kaggle/input/vllm-whl/pynvml-11.5.0-py3-none-any.whl (from vllm)
Processing /kaggle/input/vllm-whl/triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (from vllm)
Processing /kaggle/input/vllm-whl/outlines-0.0.34-py3-none-any.whl (from vllm)
Processing /kaggle/input/vllm-whl/tiktoken-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (from vllm)
Processing /kaggle/input/vllm-whl/interegular-

## Some Configuration

In [3]:
# In DEBUG mode, infer only on 5 problems
DEBUG = False
# Number of candidate solutions to generate
K = 20
DEPTH = 4
TEMPERATURE = 0.5
TOP_P = 0.9
BATCH_SIZE = 64

# Imports

In [4]:
import vllm
import re
import csv
import torch
import gc
from tqdm import tqdm
import pandas as pd
from queue import Queue, Empty
import os
import re
import signal
import subprocess
import tempfile
from collections import Counter
from contextlib import contextmanager

import threading
from concurrent.futures import ThreadPoolExecutor, as_completed

2024-10-29 11:25:52,375	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


# Python Code Execution Environment

In [5]:
1

1

## Find Python code blocks within text

In [6]:
def find_python_blocks(text):
    blocks = re.findall(r"(```python.*?```)", text, re.DOTALL)
    # filter blocks by trying to convert them to float or int
    filtered_blocks = []
    for block in blocks:
        code = block[len("```python"):-len("```")].strip()
        try:
            x = int(code)
        except:
            filtered_blocks.append(code)
            continue
        try:
            x = float(code)
        except:
            filtered_blocks.append(code)
    return filtered_blocks        

## Class to Execute Python code (adopted from Numina)

In [7]:
class PythonREPL:
    def __init__(self, timeout=5):
        self.timeout = timeout
    # handles timeout
    @contextmanager
    def time_limit(self, seconds):
        def signal_handler(*_):
            raise TimeoutError(f"Timed out after {seconds} seconds.")

        signal.signal(signal.SIGALRM, signal_handler)
        signal.alarm(seconds)
        try:
            yield
        finally:
            signal.alarm(0)

    def __call__(self, query):
        query = "import math\nimport numpy as np\nimport sympy as sp\n" + query
        query = query.strip().split("\n")
        if "print(" not in query[-1]:
            if "#" in query[-1]:
                query[-1] = query[-1].split("#")[0]
            query[-1] = "print(" + query[-1] + ")"
        query = "\n".join(query)
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_file_path = os.path.join(temp_dir, "tmp.py")
            with open(temp_file_path, "w", encoding="utf-8") as f:
                f.write(query)
            with self.time_limit(self.timeout):
                result = subprocess.run(
                    ["python3", temp_file_path],
                    capture_output=True,
                    check=False,
                    text=True,
                    timeout=self.timeout,
                )
                if result.returncode == 0:
                    output = result.stdout
                    return True, output.strip()
                error_msg = result.stderr.strip()
                msgs = error_msg.split("\n")
                new_msgs = []
                want_next = False
                for m in msgs:
                    if "Traceback" in m:
                        new_msgs.append(m)
                    elif m == msgs[-1]:
                        new_msgs.append(m)
                    elif temp_file_path in m:
                        st = m.index('"/') + 1 if '"/' in m else 0
                        ed = m.index(temp_file_path) + 1 if temp_file_path in m else None
                        clr = m[st:ed] if not ed else m[st:]
                        m = m.replace(clr, "")
                        new_msgs.append(m)
                        want_next = True
                    elif want_next:
                        new_msgs.append(m)
                        want_next = False
                error_msg = "\n".join(new_msgs)
                return False, error_msg.strip()

## Execute a Python code block

In [8]:
def execute(executor, code):
    success = False
    for lib in ("subprocess", "venv"):
        if lib in code:
            output = f"{lib} is not allowed"
            outputs.append(output)
            successes.append(success)
            continue
    try:
        success, output = executor(code)
    except TimeoutError as e:
        output = str(e)

    output = output.strip()
    
    return output, success

## Test by running some python code

In [9]:
text = """Block 1
```python
s = 0
for i in range(100):
    s += i
print(s)
```
Block 2
```python
2**12
```
Block 3
```python
3
```
"""

In [10]:
blocks = find_python_blocks(text)
blocks

['s = 0\nfor i in range(100):\n    s += i\nprint(s)', '2**12']

In [11]:
executor = PythonREPL()
outputs = [execute(executor, block) for block in blocks]
outputs

[('4950', True), ('4096', True)]

In [12]:
# for block, output in zip(blocks, outputs):
#     print(f"""
# ```python
# {block}
# ```
# ```output
# {output[0]}
# ```"""
#     )

# Load Model on vLLM

## We use the Qwen 2.5 7b Instruct Model here by Alibaba. You should explore other models.

In [13]:
llm = vllm.LLM(
    "nvidia/OpenMath2-Llama3.1-8B",
    tensor_parallel_size=2, 
    gpu_memory_utilization=0.95, 
    trust_remote_code=True,
    dtype="half", 
    enforce_eager=True,
    max_model_len=4096,
)
tokenizer = llm.get_tokenizer()

OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like nvidia/OpenMath2-Llama3.1-8B is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

# Utilites

## Extract boxed answer

In [None]:
def extract_answer(text):
    # find right most boxed answer
    def last_boxed_only_string(text):
        idx = text.rfind("\\boxed")
        if idx < 0:
            idx = text.rfind("\\fbox")
            if idx < 0:
                return None
        i = idx
        right_brace_idx = None
        num_left_braces_open = 0
        while i < len(text):
            if text[i] == "{":
                num_left_braces_open += 1
            if text[i] == "}":
                num_left_braces_open -= 1
                if num_left_braces_open == 0:
                    right_brace_idx = i
                    break
            i += 1
        if right_brace_idx is None:
            return None
        return text[idx : right_brace_idx + 1]
    # get content of boxed
    def remove_boxed(boxed):
        left = "\\boxed{"
        try:
            assert boxed[: len(left)] == left
            assert boxed[-1] == "}"
            length = len(left)
            return boxed[length:-1]
        except Exception:
            return None

    boxed = last_boxed_only_string(text)
    if boxed is None:
        return None
    answer = remove_boxed(boxed)
    return answer

## Majority vote (select the most occuring answer)

In [None]:
# Define the majority voting function to get the most common answer
def majority_vote(answers):
    answers = [answer for answer in answers if answer is not None]

    if not answers:
        return None
    # count the occurence of each answer
    counts = {}
    for answer in answers:
        if answer in counts:
            counts[answer] += 1
        else:
            counts[answer] = 1

    max_answer = None
    max_count = 0
    
    for answer, count in counts.items():
        if count > max_count:
            max_answer = answer
            max_count = count
    
    return max_answer

# TIR Agent

In [None]:
# class TIRAgent:
#     def __init__(self, problem_id, id, problem, tokenizer, max_depth, log):
#         # problem id
#         self.problem_id = problem_id
#         # id of the agent
#         self.id = id
#         # number of LLM turns
#         self.depth = 1
#         # maximum number of turns allowed
#         self.max_depth = max_depth
#         # LLM's tokenizer
#         self.tokenizer = tokenizer
#         # Problem statement
#         self.problem = problem
#         # Chat Messages
#         self.messages = [
#             {
#                "role": "user", 
#                 "content": f"""Here is a math problem:
# {self.problem}
# Please solve the problem step by step, providing Python code at each step to verify your reasoning.
# Break your reasoning into:
# 1. Problem understanding.
# 2. Step-by-step logical breakdown.
# 3. Python code to verify.
# Finally, provide the non-negative integer answer within \\boxed{{}}."""
#             }
#         ]
#         # Last response from the LLM
#         self.last_response = None
#         # Code blocks from the last response
#         self.blocks = []
#         # Answers that the LLM generated in \boxed{}
#         self.answers = []
#         # No python code generated in last response or max_depth reached
#         self.is_complete = False
#         # File to log answers
#         self.log = log
#         # Next prompt to the LLM
#         self.next_prompt = None
        
#     def complete(self):
#         # Is the Agent done
#         return self.is_complete
    
#     def add_response(self, response, executor):
#         self.depth += 1
#         # Remember this response
#         self.last_response = response
#         # Add this to the messages history
#         self.messages.append({"role": "assistant", "content": response})
#         # Extract python blocks
#         self.blocks = find_python_blocks(response)
#         # Extract answer from the generated text, if present
#         answer = extract_answer(response)
#         if answer is not None:
#             self.answers.append(answer)
#         # Is it done?
#         self.is_complete = not self._should_continue()
#         # If not, use the python executor to create next prompt
#         if not self.is_complete:
#             self.next_prompt = self._next_prompt(executor)   
#             self.messages.append({"role": "user", "content": self.next_prompt})
    
#     def _should_continue(self):        
#         # Quit if max_depth number of turns reached
#         if self.depth >= self.max_depth:
#             return False
#         # If python code is generated, we can continue
#         elif len(self.blocks) > 0:
#             return True
#         return False
    
#     def _next_prompt(self, executor):
#         assert not self.is_complete
#         assert len(self.blocks) > 0
#         # Get code result from python execution
#         output, status = execute(executor, self.blocks[-1])
        
#         prompt = ''
#         # If code succeeds, give the output
#         if status:
#             prompt = f"""The Python code you provided executed successfully with the following output:
# ```python
# {self.blocks[-1]}

# ```
# ```output
# {output}
# ```"""
#         # if code fails, give the error
#         else:
#             prompt = f"""The python code you provided gives the following error:
# ```python
# {self.blocks[-1]}
# ```
# ```output
# {output}
# Please review the code, correct any mistakes, and provide a new solution with updated logic and code.
# ```"""
#         if self.depth == self.max_depth - 1:
#             prompt += f"\nSince we are nearing the limit of {self.max_depth} attempts, please finalize your response and double-check all steps."
#         return prompt
    
    
#     def next_message(self):
#         assert not self.is_complete 
#         # apply chat template to get the text
#         text = self.tokenizer.apply_chat_template(
#             self.messages,
#             tokenize=False,
#             add_generation_prompt=True
#         )
        
#         return text
        
    
#     def final_answer(self):
#         # if there no answers yet, we have to return None
#         ans = None
#         # otherwise return the latest answer
#         if len(self.answers) > 0:
#             ans = self.answers[-1]
#         # log to file
#         if self.log:
#             self.log.writerow([self.problem_id, self.id, ans])
#         # try to convert to integer
#         try:
#             ans = int(ans)
#         except:
#             ans = None
        
#         return ans

In [None]:
class TIRAgent:
    def __init__(self, problem_id, id, problem, tokenizer,max_depth, log):
        # problem id
        self.problem_id = problem_id
        # id of the agent
        self.id = id
        # number of LLM turns
        self.depth = 1
        # maximum number of turns allowed
        self.max_depth = max_depth
        # LLM's tokenizer
        self.tokenizer = tokenizer
        # Problem statement
        self.problem = problem
        # Chat Messages
        self.messages = [
            {
               "role": "user", 
                "content": f"""Here is a math problem in Bengali:
{self.problem}
The answer is a non-negative integer. Please reason step by step to solve the problem above. Provide python code to verify your reasoning.
Put your final integer answer within \\boxed{{}}."""
            }
        ]
        # Last response from the LLM
        self.last_response = None
        # Code blocks from the last response
        self.blocks = []
        # Answers that the LLM generated in \boxed{}
        self.answers = []
        # No python code generated in last response or max_depth reached
        self.is_complete = False
        # File to log answers
        self.log = log
        # Next prompt to the LLM
        self.next_prompt = None
        
    def complete(self):
        # is the Agent done
        return self.is_complete
    
    def add_response(self, response, executor):
        self.depth += 1
        # remember this response
        self.last_response = response
        # add this to the messages history
        self.messages.append({"role": "assistant", "content": response})
        # extract python blocks
        self.blocks = find_python_blocks(response)
        # extract answer from the generated text, if present
        answer = extract_answer(response)
        if answer is not None:
            self.answers.append(answer)
        # is it done?
        self.is_complete = not self._should_continue()
        # if not, use the python executor to create next prompt
        if not self.is_complete:
            self.next_prompt = self._next_prompt(executor)   
            self.messages.append({"role": "user", "content": self.next_prompt})
    
    def _should_continue(self):        
        # quit if max_depth number of turns reached
        if self.depth >= self.max_depth:
            return False
        # if no python code generated, we can stop now
        elif len(self.blocks) > 0:
            return True
        return False
    
    def _next_prompt(self, executor):
        assert not self.is_complete
        assert len(self.blocks) > 0
        # get code result from python execution
        output, status = execute(executor, self.blocks[-1])
        
        prompt = ''
        # if code succeeds give the output
        if status:
            prompt = f"""The python code you provided gives the following output:
```python
{self.blocks[-1]}
```
```output
{output}
```"""
        # if code fails, give the error
        else:
            prompt = f"""The python code you provided gives the following error:
```python
{self.blocks[-1]}
```
```output
{output}
```"""
        return prompt
    
    
    def next_message(self):
        assert not self.is_complete 
        # apply chat template to get the text
        text = self.tokenizer.apply_chat_template(
            self.messages,
            tokenize=False,
            add_generation_prompt=True
        )
        
        return text
        
    
    def final_answer(self):
        # if there no answers yet, we have to return None
        ans = None
        # otherwise return the latest answer
        if len(self.answers) > 0:
            ans = self.answers[-1]
        # log to file
        if self.log:
            self.log.writerow([self.problem_id, self.id, ans])
        # try to convert to integer
        try:
            ans = int(ans)
        except:
            ans = None
        
        return ans        

# Sc-TIR Agent

In [None]:
class SCTIRAgent:
    def __init__(self, problem_id, problem, tokenizer,samples, max_depth, log):
        # problem id
        self.problem_id = problem_id
        # problem statement
        self.problem = problem
        # LLM's tokenizer
        self.tokenizer = tokenizer
        # number of TIRAgents to create
        self.samples = samples
        # maximum number of turns
        self.max_depth = max_depth
        # TIR Agents
        self.agents = [TIRAgent(problem_id, i, problem, tokenizer, max_depth, log) for i in range(samples)]
        # log file
        self.log = log
    
    def complete(self):
        # only complete when all agents are done
        for agent in self.agents:
            if not agent.complete():
                return False
        return True
        
    def get_ready_agents(self):
        # return agents that are not complete yet
        ready_agents = []
        for agent in self.agents:
            if not agent.complete():
                ready_agents.append(agent)
        return ready_agents
    
    def final_answer(self):
        # majority vote agent answers
        assert self.complete()
        answers = [agent.final_answer() for agent in self.agents]
        answer = majority_vote(answers)
        if answer is None:
            return 0
        return answer

# Load Test Set

In [None]:
#test_df = pd.read_csv('/kaggle/input/translated-test-df/translated_test_df.csv')

test_df = pd.read_csv('/kaggle/input/dlsprint3/test.csv')
test_df.sample(5)

## Load 5 problems since we are short on time

In [None]:
# if DEBUG:
#     test_df = test_df[:5]
#     torch.cuda.empty_cache()
#     gc.collect()

# Configure LLM and Python REPL

In [None]:
sampling_params = vllm.SamplingParams(max_tokens=2048, temperature=TEMPERATURE, top_p=TOP_P)
executor = PythonREPL()

# Run the Agents

## TIR Agent

In [None]:
# for row in test_df.values[0:2]:
#     problem_id = row[0]
#     problem = row[1]
    
#     agent = TIRAgent(problem_id, 0, problem, tokenizer, max_depth=4, log=None)
    
#     while not agent.complete():
#         text = agent.next_message()
#         # get response from LLM
#         response = llm.generate([text], sampling_params)
#         # pass in python executor, since response might contain python code
#         agent.add_response(response[0].outputs[0].text, executor)
    
#     for message in agent.messages:
#         print(f"Role: {message['role']}\n")
#         print(f"Content:\n {message['content']}\n")
    
#     answer = agent.final_answer()
#     print(f"Final answer: {answer}")
    

## SC-TIR Agent

In [None]:
# for row in test_df.values[1:2]:
#     problem_id = row[0]
#     problem = row[1]
    
#     agent = SCTIRAgent(problem_id, problem, tokenizer,samples=6, max_depth=6, log=None)
    
#     while not agent.complete():
#         ready_agents = agent.get_ready_agents()
#         texts = [a.next_message() for a in ready_agents]
#         # get response from LLM
#         responses = llm.generate(texts, sampling_params)
#         # pass response to the agents
#         for i, ready_agent in enumerate(ready_agents):
#             ready_agent.add_response(responses[i].outputs[0].text, executor)
    
#     answer = agent.final_answer()
#     print(f"Problem: {problem}")
#     print(f"Final answer: {answer}")

# Run Inference

# Create submission

## Also log agent answers

In [None]:
file = open('Llama.csv', 'w', encoding='utf-8')
log_file = open('log.csv', 'w', encoding='utf-8')

submission = csv.writer(file)
log = csv.writer(log_file)

submission.writerow(['ID', 'Answer'])
log.writerow(['ID', "Agent ID", 'Answer'])

## Configure LLM sampling parameters and Python REPL

## Use a queue to Batch inference

In [None]:
%%time

boxed_answers = {}
agents = []

q = Queue()

iterator = iter(tqdm(test_df.values))

while True:
    for agent in agents:
        if agent.complete():
            boxed_answers[agent.problem_id] = agent.final_answer()

    agents[:] = list(filter(lambda a: not a.complete(), agents))

    while q.qsize() < BATCH_SIZE:
        try:
            row = next(iterator)
        except StopIteration:
            break

        id = row[0]
        problem = row[1]

        agent = SCTIRAgent(id, problem, tokenizer, K, DEPTH, log)
        
        agents.append(agent)

        for tir_agent in agent.get_ready_agents():
            q.put_nowait(tir_agent)
            
    if q.empty():
        break
        
    
    ready_agents = []
    texts = []
    for _ in range(BATCH_SIZE):
        try:
            agent = q.get_nowait()
            ready_agents.append(agent)
            texts.append(agent.next_message())
        except:
            break

    
    responses = llm.generate(texts, sampling_params)
    responses = [response.outputs[0].text for response in responses]
    
    for i in range(len(ready_agents)):
        agent = ready_agents[i]
        response = responses[i]
        agent.add_response(response, executor)
        if not agent.complete():
            q.put_nowait(agent)
   

## Write to submission file

In [None]:
for id, answer in boxed_answers.items():
    submission.writerow([id, answer])

## Close files

In [None]:
   
file.close()
log_file.close()

Combined Prompts inference

In [None]:
# import pandas as pd
# import torch

# # Load the CSV file with problems and prompts
# df = pd.read_csv("pruned_prompts.csv")

# # Specify the ID of the test problem (change as needed)
# test_id = 0  # Example test case ID
# test_problem = df.loc[test_id, 'Problem']
# similar_problems_solutions = df.loc[test_id, 'prompts']  # Get the related problems and solutions
# print(test_problem)

# # Create the prompt for the model
# prompt = f"""Here is a math problem in Bengali:
# {test_problem}. Here is a problem and solution that might or might not help you to solve the problem:{similar_problems_solutions})
# The answer is a non-negative integer. Please reason step by step to solve the problem above. 
# Put your final integer answer within \\boxed{{}}."""

# prompts = [{"role": "system", "content": "You are a helpful math assistant."},{"role": "user", "content": prompt}]

# # Tokenize the input prompt
# tokens = tokenizer.apply_chat_template(
#             prompts,
#             tokenize=False,
#             add_generation_prompt=True
#         )

# # Generate the solution using the model
# generated_solution = llm.generate(tokens,sampling_params)

# # Output the generated solution
# print(f"Generated solution for test problem {test_id}:")
# print(generated_solution[0].outputs[0].text)


In [None]:
# import pandas as pd

# # Load the CSV file
# df = pd.read_csv("pruned_prompts.csv")

# # Define a function to prune the 5th problem and its solution from each 'prompts' entry
# def prune_fifth_problem(prompts_text):
#     # Split the prompts into individual problems/solutions
#     problems = prompts_text.split("Problem:")
    
#     # Check if there are at least 5 problems
#     if len(problems) > 2:
#         # Remove the 5th problem (index 5 is actually the 6th element, as index starts from 0)
#         del problems[2]
        
#     # Rejoin the remaining problems and solutions
#     return "Problem:".join(problems)

# # Apply the function to the 'prompts' column
# df['prompts'] = df['prompts'].apply(prune_fifth_problem)

# # Now the 'pruned_prompts' column contains the updated text with the 5th problem removed
# print(df[['ID', 'prompts']].head())  # Display the updated prompts for the first few rows

# # You can save the updated DataFrame to a new CSV if needed
# # df.to_csv("pruned_prompts.csv", index=False)


In [None]:
# df.to_csv("pruned_prompts.csv", index=False)

In [None]:
# df['prompts'] = df['prompts'].str.replace("Here are a list of math problems and solutions:", "", regex=False)

# # Display the updated DataFrame to confirm the string has been removed
# print(df.head())

In [None]:
# df.to_csv("pruned_prompts.csv", index=False)