# Installing Packages

In [1]:
%%capture
!pip install vllm

# Import Libs

In [2]:
import vllm
import kagglehub

import numpy as np
import pandas as pd

import os, re, string, ast
import random
from collections import Counter
from tqdm.auto import tqdm

INFO 03-30 06:53:55 [__init__.py:239] Automatically detected platform cuda.


# Setup model

In [3]:
num_attempt = 6

class Model:
    def __init__(self):
        self.model_path = kagglehub.model_download('qwen-lm/qwen2.5/Transformers/32b-instruct-awq/1')
        self.llm = vllm.LLM(
            self.model_path,
            quantization="awq",
            tensor_parallel_size=2,
            gpu_memory_utilization=0.95,
            trust_remote_code=True,
            dtype="half",
            enforce_eager=True,
            max_model_len=5120,
            disable_log_stats=True
        )

        self.sampling_params = vllm.SamplingParams(
            n=1,  # Number of output sequences to return for each prompt.
            top_k=20,  # Float that controls the cumulative probability of the top tokens to consider.
            top_p=0.8,
            temperature=0.7,  # randomness of the sampling
            repetition_penalty=1.05,
            # seed=777, # Seed for reprodicibility
            skip_special_tokens=False,  # Whether to skip special tokens in the output.
            max_tokens=512,  # Maximum number of tokens to generate per output sequence.
        )
        self.tokenizer = self.llm.get_tokenizer()
        
    def predict(self, question, choices, prompt_template, max_new_tokens=512) -> str:
        def apply_template(prompt, tokenizer):
            messages = [
                {"role": "user", "content": prompt},
            ]
            text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
            return text

        prompt = prompt_template.format(question, choices)
        inputs = [apply_template(prompt, self.tokenizer)] * num_attempt
        responses = self.llm.generate(inputs, self.sampling_params, use_tqdm=False)
        responses = [x.outputs[0].text for x in responses]
        return responses

In [4]:
model = Model()

INFO 03-30 06:54:08 [config.py:585] This model supports multiple tasks: {'embed', 'reward', 'generate', 'score', 'classify'}. Defaulting to 'generate'.
INFO 03-30 06:54:10 [config.py:1519] Defaulting to use mp for distributed inference
INFO 03-30 06:54:10 [llm_engine.py:241] Initializing a V0 LLM engine (v0.8.2) with config: model='/kaggle/input/qwen2.5/transformers/32b-instruct-awq/1', speculative_config=None, tokenizer='/kaggle/input/qwen2.5/transformers/32b-instruct-awq/1', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=5120, download_dir=None, load_format=auto, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=awq, enforce_eager=True, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar', reasoning_backend=None), observability_config=ObservabilityConfig(show_

Loading safetensors checkpoint shards:   0% Completed | 0/5 [00:00<?, ?it/s]


[1;36m(VllmWorkerProcess pid=107)[0;0m INFO 03-30 07:00:17 [loader.py:447] Loading weights took 314.19 seconds
INFO 03-30 07:00:18 [loader.py:447] Loading weights took 314.44 seconds
[1;36m(VllmWorkerProcess pid=107)[0;0m INFO 03-30 07:00:18 [model_runner.py:1146] Model loading took 9.0925 GB and 314.482704 seconds
INFO 03-30 07:00:18 [model_runner.py:1146] Model loading took 9.0925 GB and 314.736209 seconds
[1;36m(VllmWorkerProcess pid=107)[0;0m INFO 03-30 07:00:34 [worker.py:267] Memory profiling takes 15.97 seconds
[1;36m(VllmWorkerProcess pid=107)[0;0m INFO 03-30 07:00:34 [worker.py:267] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.95) = 14.00GiB
[1;36m(VllmWorkerProcess pid=107)[0;0m INFO 03-30 07:00:34 [worker.py:267] model weights take 9.09GiB; non_torch_memory takes 0.12GiB; PyTorch activation peak memory takes 0.73GiB; the rest of the memory reserved for KV Cache is 4.06GiB.
INFO 03-30 07:00:34 [worker.py:267] Memory pro

# Load data and process

In [5]:
df_train = pd.read_csv("/kaggle/input/fpt-ai-residency-batch-6-entry-test/b6_train_data.csv")
df_train.head(3)

Unnamed: 0,task_id,question,choices,answer
0,k10168,Question: What will be output of the following...,"['8 4 2', '8 4 2', '8 4 4', '8 4 3']",C
1,k10173,Question: What will be output of the following...,"['-4', '-5', '10', '11']",A
2,k10174,Question: Match the following.\n Group 1 ...,"['P-4. Q-1, R-2, S-3', 'P-3, Q-1, R-4, S-2', '...",B


In [6]:
df_test = pd.read_csv("/kaggle/input/fpt-ai-residency-batch-6-entry-test/b6_test_data.csv")
df_test.head(3)

Unnamed: 0,task_id,question,choices
0,k10171,Question: What will be output of the following...,"['10', '9', '8', 'Error']"
1,k10182,Question: Consider line 3. Identify the compil...,"['No compilation error', 'Only a lexical error..."
2,k10184,Question: Assume the conflicts part (a) of thi...,['Equal precedence and left associativity; exp...


In [7]:
print(df_train['answer'].unique())
df_train.describe()

['C' 'A' 'B' 'D' 'ANSWER: C' 'ANSWER: B' 'ANSWER: D' 'ANSWER: A' 'E' 'G'
 nan 'ANSWER:  D']


Unnamed: 0,task_id,question,choices,answer
count,3963,3963,3963,3949
unique,3963,3826,3451,11
top,k00701,Question: What is Conditional Rendering?,"['False', 'True']",A
freq,1,7,22,988


In [8]:
df_train = df_train.dropna()
df_train = df_train.drop_duplicates()
df_train['answer'] = df_train['answer'].astype(str).str.strip().str[-1]
print(df_train['answer'].unique())
df_train.describe()

['C' 'A' 'B' 'D' 'E' 'G']


Unnamed: 0,task_id,question,choices,answer
count,3949,3949,3949,3949
unique,3949,3814,3450,6
top,k00701,Question: What is Conditional Rendering?,"['False', 'True']",A
freq,1,7,22,1066


In [9]:
df_test.describe()

Unnamed: 0,task_id,question,choices
count,1253,1253,1253
unique,1253,1242,1163
top,k00700,Question: Match the following:,"['(1)', '(2)', '(3)', '(4)']"
freq,1,2,7


# Inference

In [10]:
def answer_extract(response:list):
    # Lọc ra các phần tử hợp lệ (chỉ gồm một chữ cái in hoa từ A-Z)
    valid_answers = [ans for ans in response if re.fullmatch(r"[A-Z]", ans)]

    if not valid_answers:
        return None  # Không có câu trả lời hợp lệ
    
    # Đếm số lần xuất hiện của từng câu trả lời hợp lệ
    counter = Counter(valid_answers)
    max_freq = max(counter.values())
    
    # Lấy tất cả các câu trả lời có số lần xuất hiện nhiều nhất
    most_common_answers = [ans for ans, freq in counter.items() if freq == max_freq]
    
    # Nếu có nhiều hơn 1 đáp án cùng tần suất, chọn ngẫu nhiên 1
    return random.choice(most_common_answers)

In [11]:
prompt_tempalte = """You are a programming expert and will answer multiple-choice questions about code.  
Read the following question and options carefully and select the **best** answer.  

### Response Format:
- Reply with **only** the letter of the correct choice (A, B, C, or D).  
- Do **not** provide explanations.  

### {}

### Options:
{}

### Response:
"""

In [12]:
index2choice = {i: letter for i, letter in enumerate(string.ascii_uppercase)}
choice2index = {letter: i for i, letter in enumerate(string.ascii_uppercase)}

def choices2str(choices):
    choices_lst = ast.literal_eval(choices)
    result = ""
    for i in range(0, len(choices_lst)):
        result = result + index2choice[i] + ". " + str(choices_lst[i]) + "\n\n"
    return result.strip()

In [13]:
data_dict = {
    "task_id": [],
    "answer": []
}

In [14]:
for i in tqdm(range(0, df_test.shape[0]), desc="Inference on Test set"):
    task_id = df_test['task_id'].iloc[i]
    response = model.predict(
        df_test['question'].iloc[i],
        choices2str(df_test['choices'].iloc[i]),
        prompt_tempalte,
        1
    )
    answer = answer_extract(response)
    print("Task id:",task_id,"- Answer:",answer)
    data_dict['task_id'].append(task_id)
    data_dict['answer'].append(answer)

Inference on Test set:   0%|          | 0/1253 [00:00<?, ?it/s]

Task id: k10171 - Answer: D
Task id: k10182 - Answer: D
Task id: k10184 - Answer: A
Task id: k10206 - Answer: C
Task id: k10215 - Answer: D
Task id: k10231 - Answer: D
Task id: k10234 - Answer: D
Task id: k10238 - Answer: B
Task id: k10248 - Answer: D
Task id: k10249 - Answer: A
Task id: k10254 - Answer: A
Task id: k10264 - Answer: D
Task id: k10267 - Answer: D
Task id: k10272 - Answer: D
Task id: k10278 - Answer: D
Task id: k10281 - Answer: A
Task id: k10291 - Answer: C
Task id: k10296 - Answer: B
Task id: k10312 - Answer: C
Task id: k10317 - Answer: B
Task id: k10323 - Answer: A
Task id: k10325 - Answer: B
Task id: k10336 - Answer: C
Task id: k10345 - Answer: B
Task id: k10360 - Answer: B
Task id: k10361 - Answer: A
Task id: k10375 - Answer: D
Task id: k10377 - Answer: D
Task id: k10380 - Answer: D
Task id: k10385 - Answer: C
Task id: k10394 - Answer: D
Task id: k10400 - Answer: C
Task id: k10404 - Answer: C
Task id: k10408 - Answer: C
Task id: k10412 - Answer: A
Task id: k10423 - An

# Submission

In [15]:
submission = pd.DataFrame(data_dict)
submission.head()

Unnamed: 0,task_id,answer
0,k10171,D
1,k10182,D
2,k10184,A
3,k10206,C
4,k10215,D


In [16]:
submission.describe()

Unnamed: 0,task_id,answer
count,1253,1252
unique,1253,5
top,k00700,B
freq,1,355


In [17]:
submission.to_csv("submission.csv", index=False)