# Installing the necessary Modules

In [None]:
!pip install datasets

# Importing Necessary Libraries

In [11]:
import re
import requests
from datasets import load_dataset
from typing import List, Tuple, Dict
from dotenv import load_dotenv
import os
load_dotenv()

DEEPSEEK_API = os.environ['DEEPSEEK_API']

# Step 1. Loading the dataset

In [2]:
dataset = load_dataset("hishab/bangla-mmlu")

# splitting into different sets

validation = dataset["validation"]
test = dataset["test"]
dev = dataset['dev']

README.md:   0%|          | 0.00/618 [00:00<?, ?B/s]

dev-00000-of-00001.parquet:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/2.52M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/12.0M [00:00<?, ?B/s]

Generating dev split:   0%|          | 0/175 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/14750 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/72944 [00:00<?, ? examples/s]

# Step 2. Working with a single Set (i.e, Validation)

## Formatting The Question

In [9]:
def format_question(sample: Dict) -> Dict:
    """
    A function to format the question with proper option in a clear way so that the llm have easy time understanding it without any confusion
    i.e, "Question in Bangla: নিচের কোনটিতে মেটামারিজম অনুপস্থিত?
        Options in Bangla: {"A":'সেকেন্ডারি অ্যামিন', "B":'কিটোন', "C": 'অ্যালকোহল', "D": 'ইথার'}"

    Inputs:
    sample =  A dict object containing id,question, choices and answer field

    Output:
    Dictionary object containing evarything in input dict and formated question (str)
    """

    ID = sample['id']
    question = sample['question']
    choices = sample['choices']
    answer = sample['answer']


    # formating the Question
    formated_option = {
        "A": choices[0],
        "B": choices[1],
        "C": choices[2],
        "D": choices[3],
    }

    formated_question = f"""Below there is a Bangla multiple choice question with 4 option, you need to find the correct option for the question.
    Question in Bangla: {question}
    Options in Bangla: {str(formated_option)}"""
    
    sample_with_formated_question = {
        'id':ID,
        'question': question,
        'choices': choices,
        'answer': answer,
        'formated_question': formated_question
    }

    return sample_with_formated_question

In [10]:
## checking if the function

format_question(validation[0])

{'id': '4c6b8f9d-93a9-4f46-a90b-6717a42597ae-70221',
 'question': '∛-8 এর মান কত?',
 'choices': ['-2', '2i', '-2, 1 ± i√3', '-2, 1 ± i√2'],
 'answer': 'C',
 'formated_question': "Below there is a Bangla multiple choice question with 4 option, you need to find the correct option for the question.\n    Question in Bangla: ∛-8 এর মান কত?\n    Options in Bangla: {'A': '-2', 'B': '2i', 'C': '-2, 1 ± i√3', 'D': '-2, 1 ± i√2'}"}

## Building the API pipeline

In [21]:
def api_calling(sample):
    
    formated_question = sample['formated_question']
    url = "https://api.hyperbolic.xyz/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {DEEPSEEK_API}"
    }
    system_instruction = """You are an inteligent assistant which will help the user to answer multiple choice question. The multiple choice question is in bangla and your final answer will always be in bangla."""
    
    data = {
        "messages": [
            {
                "role": "system",
                "content": system_instruction
            },
            {
                "role": "user",
                "content": formated_question
            }
        ],
        "model": "deepseek-ai/DeepSeek-R1",
        "temperature": 0.1,
        "top_p": 0.2
    }
    
    response = requests.post(url, headers=headers, json=data)
    return response.json()

In [17]:
# testing api calling

formatted_sample = format_question(validation[1])

response_json  = api_calling(formatted_sample)

<Response [200]>


In [20]:
response_json

{'id': 'dcf81e9f53784cfda4ed41936ee4ef13',
 'object': 'chat.completion',
 'created': 1739942514,
 'model': 'deepseek-ai/DeepSeek-R1',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': "<think>\nOkay, let's see. The question is asking which of the following does not exhibit metamerism. The options are A: Secondary amine, B: Ketone, C: Alcohol, D: Ether.\n\nFirst, I need to remember what metamerism is. Metamerism is a type of structural isomerism where compounds have the same molecular formula but different alkyl groups attached to the same functional group. This usually happens in compounds where there's a functional group that can have different alkyl chains on either side. \n\nSo, let's go through each option. \n\nOption A: Secondary amine. Secondary amines have the structure R2NH. The two R groups can be different, so they can exhibit metamerism. For example, if one R is methyl and the other is ethyl, that's a different isomer compared to both being propyl 