In [1]:
!pip install --no-index --find-links=/kaggle/input/wsdm-wheels/accelerate accelerate
!pip install --no-index --find-links=/kaggle/input/wsdm-wheels/bitsandbytes bitsandbytes
!pip install --no-index --find-links=/kaggle/input/wsdm-wheels/peft peft
!pip install --no-index --find-links=/kaggle/input/wsdm-wheels/transformers transformers
!pip install --no-index --find-links=/kaggle/input/ftfy-wheels/ftfy_wheels ftfy

Looking in links: /kaggle/input/wsdm-wheels/accelerate
Looking in links: /kaggle/input/wsdm-wheels/bitsandbytes
Processing /kaggle/input/wsdm-wheels/bitsandbytes/bitsandbytes-0.45.0-py3-none-manylinux_2_24_x86_64.whl
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.45.0
Looking in links: /kaggle/input/wsdm-wheels/peft
Looking in links: /kaggle/input/wsdm-wheels/transformers
Looking in links: /kaggle/input/ftfy-wheels/ftfy_wheels
Processing /kaggle/input/ftfy-wheels/ftfy_wheels/ftfy-6.3.1-py3-none-any.whl
Installing collected packages: ftfy
Successfully installed ftfy-6.3.1


In [2]:
import os
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd
import sklearn
import torch
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    PreTrainedTokenizerBase,
)
from transformers.data.data_collator import pad_without_fast_tokenizer_warning
from peft import PeftModel
import ftfy

In [3]:
@dataclass
class Config:
    # --- main pathes ---
    gemma_dir = '/kaggle/input/wsdm-download-8bit-top5-model'
    data_dir = Path('/kaggle/input/wsdm-cup-multilingual-chatbot-arena')
    # --- head params ---
    head_dropout = 0.1
    hdim = 3584
    num_labels = 2
    # --- inference params ---
    device = torch.device('cuda')    
    max_length = 2200
    batch_size = 2
    use_leftside = True
    tta = True
    do_merge = False
    
cfg = Config()

In [4]:
test = pd.read_parquet(cfg.data_dir / 'test.parquet')

In [5]:
def process_text(text: str) -> str:
    return ftfy.fix_text(text)

In [6]:
def tokenize(
    tokenizer, prompt, response_a, response_b, max_length=cfg.max_length
):
    prompt = ['<prompt>: ' + process_text(t) for t in prompt]
    response_a = ['\n\n<response_a>: ' + process_text(t) for t in response_a]
    response_b = ['\n\n<response_b>: ' + process_text(t) for t in response_b]
    
    texts = [p + r_a + r_b for p, r_a, r_b in zip(prompt, response_a, response_b)]
    tokenized = tokenizer(texts, max_length=max_length, truncation=True)
    
    input_ids = tokenized.input_ids
    attention_mask = tokenized.attention_mask
        
    return input_ids, attention_mask

In [7]:
tokenizer = AutoTokenizer.from_pretrained(cfg.gemma_dir)
tokenizer.add_eos_token = True

if cfg.use_leftside:
    tokenizer.truncation_side = 'left'
    tokenizer.padding_side = 'left'

In [8]:
%%time

data = pd.DataFrame()
data["id"] = test["id"]
data["input_ids"], data["attention_mask"] = tokenize(tokenizer, test["prompt"], test["response_a"], test["response_b"])
data["length"] = data["input_ids"].apply(len)

aug_data = pd.DataFrame()
aug_data["id"] = test["id"]
aug_data['input_ids'], aug_data['attention_mask'] = tokenize(tokenizer, test["prompt"], test["response_b"], test["response_a"])
aug_data["length"] = aug_data["input_ids"].apply(len)

CPU times: user 45.8 ms, sys: 1.65 ms, total: 47.5 ms
Wall time: 54.6 ms


In [9]:
data.to_parquet('data.parquet', index=False)

In [10]:
%%writefile run_model.py

import os
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from pathlib import Path

import numpy as np
import pandas as pd
import sklearn
import torch
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    PreTrainedTokenizerBase,
)
from transformers.data.data_collator import pad_without_fast_tokenizer_warning
from peft import PeftModel
import ftfy
import argparse

@dataclass
class Config:
    # --- main pathes ---
    gemma_dir = '/kaggle/input/wsdm-download-8bit-top5-model'
    data_dir = Path('/kaggle/input/wsdm-cup-multilingual-chatbot-arena')
    # --- head params ---
    head_dropout = 0.1
    hdim = 3584
    num_labels = 2
    # --- inference params ---
    device = torch.device('cuda')    
    max_length = 2200
    batch_size = 2
    use_leftside = True
    tta = True
    do_merge = False
    
cfg = Config()

def main(args):
    lora_path = args.lora_path
    
    device_0 = torch.device('cuda:0')
    model_0 = AutoModelForSequenceClassification.from_pretrained(
        cfg.gemma_dir,
        torch_dtype=torch.float16,
        num_labels=cfg.num_labels,
        device_map=device_0,
        use_cache=False,
    )
    model_0.score = torch.nn.Sequential(
        torch.nn.Dropout(cfg.head_dropout),
        torch.nn.Linear(cfg.hdim, cfg.hdim // 2),
        torch.nn.Dropout(cfg.head_dropout),
        torch.nn.GELU(),
        torch.nn.Linear(cfg.hdim // 2, cfg.num_labels),
    ).to(device_0)
    
    device_1 = torch.device('cuda:1')
    model_1 = AutoModelForSequenceClassification.from_pretrained(
        cfg.gemma_dir,
        torch_dtype=torch.float16,
        num_labels=cfg.num_labels,
        device_map=device_1,
        use_cache=False,
    )
    model_1.score = torch.nn.Sequential(
        torch.nn.Dropout(cfg.head_dropout),
        torch.nn.Linear(cfg.hdim, cfg.hdim // 2),
        torch.nn.Dropout(cfg.head_dropout),
        torch.nn.GELU(),
        torch.nn.Linear(cfg.hdim // 2, cfg.num_labels),
    ).to(device_1)

    model_0 = PeftModel.from_pretrained(model_0, lora_path)
    if cfg.do_merge:
        model_0 = model_0.merge_and_unload()
    
    model_1 = PeftModel.from_pretrained(model_1, lora_path)
    if cfg.do_merge:
        model_1 = model_1.merge_and_unload()

    tokenizer = AutoTokenizer.from_pretrained(cfg.gemma_dir)
    tokenizer.add_eos_token = True
    
    if cfg.use_leftside:
        tokenizer.truncation_side = 'left'
        tokenizer.padding_side = 'left'

    @torch.no_grad()
    @torch.cuda.amp.autocast()
    def inference(df, model, device, batch_size=cfg.batch_size, max_length=cfg.max_length):
        a_win, b_win = [], []
        
        for start_idx in range(0, len(df), batch_size):
            end_idx = min(start_idx + batch_size, len(df))
            tmp = df.iloc[start_idx:end_idx]
            input_ids = tmp['input_ids'].to_list()
            attention_mask = tmp['attention_mask'].to_list()
            inputs = pad_without_fast_tokenizer_warning(
                tokenizer,
                {'input_ids': input_ids, 'attention_mask': attention_mask},
                padding='longest',
                pad_to_multiple_of=None,
                return_tensors='pt',
            )
            outputs = model(**inputs.to(device))
            proba = outputs.logits.softmax(-1).cpu()
            
            a_win.extend(proba[:, 0].tolist())
            b_win.extend(proba[:, 1].tolist())
        
        df['winner_model_a'] = a_win
        df['winner_model_b'] = b_win
        
        return df

    st = time.time()

    data = pd.read_parquet("/kaggle/working/data.parquet")
    data = data.sort_values('length', ascending=False)
    
    sub_1 = data.iloc[0::2].copy()
    sub_2 = data.iloc[1::2].copy()
    
    with ThreadPoolExecutor(max_workers=2) as executor:
        results = executor.map(inference, (sub_1, sub_2), (model_0, model_1), (device_0, device_1))
    
    result_df = pd.concat(list(results), axis=0)
    
    print(f'elapsed time: {time.time() - st}')

    result_df[['id', 'winner_model_a', 'winner_model_b']].to_csv('result.csv', index=False)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--lora_path", type=str, required=True)
    args = parser.parse_args()
    main(args)

Writing run_model.py


In [11]:
%%writefile run.sh

python run_model.py --lora_path '/kaggle/input/wsdm-download-top5-adapter-2200-ver2'

wait 
echo "All done"

Writing run.sh


In [12]:
!sh run.sh

2025-01-24 08:25:23.383142: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-24 08:25:23.404997: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-24 08:25:23.412468: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████████████| 3/3 [01:02<00:00, 20.92s/it]
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transforme

In [13]:
result_df = pd.read_csv('result.csv')
proba = result_df[['winner_model_a', 'winner_model_b']].values

In [14]:
proba

array([[0.11182182, 0.88817811],
       [0.34792474, 0.65207529],
       [0.74073738, 0.25926262]])

In [15]:
# carefull
if cfg.tta:
    aug_data.to_parquet('data.parquet', index=False)
    #data.to_parquet('data.parquet', index=False)

In [16]:
%%writefile run.sh

python run_model.py --lora_path '/kaggle/input/wsdm-download-top5-adapter-2200-rev'

wait 
echo "All done"

Overwriting run.sh


In [17]:
if cfg.tta:
    !sh run.sh

2025-01-24 08:27:20.559952: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-24 08:27:20.582391: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-24 08:27:20.588995: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████████████| 3/3 [00:20<00:00,  6.73s/it]
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transforme

In [18]:
if cfg.tta:
    result_df = pd.read_csv('result.csv')
    #rev_proba = result_df[['winner_model_a', 'winner_model_b']].values
    rev_proba = result_df[['winner_model_b', 'winner_model_a']].values

In [19]:
proba = (proba + rev_proba) / 2

In [20]:
proba

array([[0.10555861, 0.89444137],
       [0.35472783, 0.6452722 ],
       [0.62740874, 0.37259124]])

In [21]:
result_df['winner'] = ['model_a' if i else 'model_b' for i in proba[:, 0] > proba[:, 1]]

In [22]:
submission_df = result_df[['id', 'winner']]
display(submission_df)

Unnamed: 0,id,winner
0,327228,model_b
1,1139415,model_b
2,1235630,model_a


In [23]:
submission_df.to_csv('submission.csv', index=False)