## Install/Upgrade Packages

In [1]:
!pip install --upgrade transformers

Collecting transformers
  Downloading transformers-4.45.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.45.2-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.45.1
    Uninstalling transformers-4.45.1:
      Successfully uninstalled transformers-4.45.1
Successfully installed transformers-4.45.2


## Imports

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import pandas as pd
import re
import csv
from tqdm import tqdm
import gc

## Variables

In [3]:
# In DEBUG mode, infer only on 5 problems
DEBUG = False 
# Number of candidate solutions to generate
K = 4

## Load Dataset

### Use pandas to read CSV

In [4]:
test_df = pd.read_csv('/kaggle/input/dlsprint3/test.csv')
test_df.sample(5)

Unnamed: 0,ID,Problem
52,52,একটি পরিবারে 8 (আট) জন সদস্যের জন্য 9 দিনে 6 ক...
41,41,একটি ড্রাগন প্রতি 10 তম মিনিটে 5 টা গাছ পুড়িয়ে...
66,66,মাজেদ গণিত নিয়ে নতুন নতুন আবিষ্কার করতে খুব প...
11,11,$k$-এর সর্বনিম্ন কোন মানের জন্য $\sqrt{70 \tim...
40,40,"এমন দুই অংকের সংখ্যা নির্ণয় কর, যে সংখ্যাগুলো ..."


## Initalize Model

#### We are using DeepSeek Math 7b Instruct with 16 bit precision. You can explore other models.

In [5]:
model_name = "deepseek-ai/deepseek-math-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")


# Now, apply DataParallel if needed (use it on the quantized model)
model = torch.nn.DataParallel(model)


tokenizer_config.json:   0%|          | 0.00/1.14k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/594 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/22.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.97G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/121 [00:00<?, ?B/s]

## Configure Model Generation Parameters

In [6]:
model.generation_config = GenerationConfig.from_pretrained(model_name)
model.generation_config.max_new_tokens = 1024
model.generation_config.temperature = 0.7
model.generation_config.top_p = 0.7
model.generation_config.do_sample = True
model.generation_config.num_return_sequences = 4
model.generation_config.pad_token_id = model.generation_config.eos_token_id

#model.generation_config = GenerationConfig.from_pretrained(model_name)
#model.generation_config.max_new_tokens = 512  # Reduced max token count
#model.generation_config.temperature = 0.5  # Lower temperature for deterministic results
#model.generation_config.top_p = 0.9  # Slightly higher p to include more token options
#model.generation_config.do_sample = False  # Make the generation deterministic
#model.generation_config.num_return_sequences = 1  # Only return 1 sequence
#model.generation_config.pad_token_id = model.generation_config.eos_token_id


## Functions

#### Extract the problem answer from \boxed{}

In [7]:
def extract_answer(result):
    match = re.search(r'\\boxed{(.*?)}', result)
    if match:
        boxed_content = match.group(1)
        digits = ''.join(filter(str.isdigit, boxed_content))
        if digits:
            return int(digits)
    return None

#### Majority vote between candidate answers

In [8]:
def majority_answer(answers):
    answers = [answer for answer in answers if answer is not None]

    if not answers:
        return None
    
    counts = {}
    for answer in answers:
        if answer in counts:
            counts[answer] += 1
        else:
            counts[answer] = 1

    max_answer = None
    max_count = 0
    
    for answer, count in counts.items():
        if count > max_count:
            max_answer = answer
            max_count = count
    
    return max_answer

#### Generate solution candidates using LLM

In [9]:
def predict_answer(problem):
    messages = [
        {
            "role": "user", 
            "content": f"Here is a math problem in Bengali.\n{problem}\nPlease solve the problem. Please reason step by step, and put your final answer within \\boxed{{}}."
        }
    ]

    input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
    
    outputs = model.module.generate(input_tensor.to(model.module.device), max_new_tokens=1024)  # or a higher number based on your needs


    #outputs = model.generate(input_tensor.to(model.device))

    results = [tokenizer.decode(outputs[i][input_tensor.shape[1]:], skip_special_tokens=True) for i in range(len(outputs))]
    answers = [extract_answer(result) for result in results]
    
    return majority_answer(answers)

## Create Submission

In [10]:
if DEBUG:
    test_df = test_df[:5]
    torch.cuda.empty_cache()
    gc.collect()

In [11]:
file = open('token1024Parallel-NoTranslation.csv', 'w', encoding='utf-8')
writer = csv.writer(file)
writer.writerow(['ID', 'Answer'])

for row in tqdm(test_df.values):
    id = row[0]
    problem = row[1]    
    answer = predict_answer(problem)
    
    if DEBUG:
        print('id: ', id)
        print('problem: ', problem)
        print('answer: ', answer)
    
    if answer is None:
        answer = 0
        
    writer.writerow([id, answer])
    
file.close()

  0%|          | 0/100 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
  1%|          | 1/100 [00:03<06:14,  3.78s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
  2%|▏         | 2/100 [00:26<24:42, 15.13s/it]T