In [3]:
import torch
import pandas as pd

In [2]:
model = torch.load("model.h5", weights_only=False)
model.eval()

  from .autonotebook import tqdm as notebook_tqdm


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [56]:
test = pd.read_csv("test.csv", index_col = 0)
test

Unnamed: 0_level_0,prompt,response_a,response_b
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
136060,"[""I have three oranges today, I ate an orange ...","[""You have two oranges today.""]","[""You still have three oranges. Eating an oran..."
211333,"[""You are a mediator in a heated political deb...","[""Thank you for sharing the details of the sit...","[""Mr Reddy and Ms Blue both have valid points ..."
1233961,"[""How to initialize the classification head wh...","[""When you want to initialize the classificati...","[""To initialize the classification head when p..."


In [57]:
test["prompt"] = test.prompt.map(lambda x: eval(x)[0])
test["response_a"] = test.response_a.map(lambda x: eval(x.replace("null","''"))[0])
test["response_b"] = test.response_b.map(lambda x: eval(x.replace("null", "''"))[0])

In [58]:
def make_pairs(row):
    row["encode_fail"] = False
    try:
        prompt = row.prompt.encode("utf-8").decode("utf-8")
    except Exception:
        prompt = ""
        row["encode_fail"] = True

    try:
        response_a = row.response_a.encode("utf-8").decode("utf-8")
    except Exception:
        response_a = ""
        row["encode_fail"] = True

    try:
        response_b = row.response_b.encode("utf-8").decode("utf-8")
    except Exception:
        response_b = ""
        row["encode_fail"] = True
        
    row['options'] = [f"Prompt: {prompt}\n\nResponse: {response_a}",
                      f"Prompt: {prompt}\n\nResponse: {response_b}"
                     ]
    return row

test = test.apply(make_pairs, axis=1)  
test.head()

Unnamed: 0_level_0,prompt,response_a,response_b,encode_fail,options
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
136060,"I have three oranges today, I ate an orange ye...",You have two oranges today.,You still have three oranges. Eating an orange...,False,"[Prompt: I have three oranges today, I ate an ..."
211333,You are a mediator in a heated political debat...,Thank you for sharing the details of the situa...,Mr Reddy and Ms Blue both have valid points in...,False,[Prompt: You are a mediator in a heated politi...
1233961,How to initialize the classification head when...,When you want to initialize the classification...,To initialize the classification head when per...,False,[Prompt: How to initialize the classification ...


In [59]:
test.shape

(3, 5)

In [60]:
test = test.drop(['prompt', 'response_b', 'response_a', 'encode_fail'], axis=1)
test

Unnamed: 0_level_0,options
id,Unnamed: 1_level_1
136060,"[Prompt: I have three oranges today, I ate an ..."
211333,[Prompt: You are a mediator in a heated politi...
1233961,[Prompt: How to initialize the classification ...


In [61]:
test['options'].values

array([list(['Prompt: I have three oranges today, I ate an orange yesterday. How many oranges do I have?\n\nResponse: You have two oranges today.', 'Prompt: I have three oranges today, I ate an orange yesterday. How many oranges do I have?\n\nResponse: You still have three oranges. Eating an orange yesterday does not affect the number of oranges you have today.']),
       list(["Prompt: You are a mediator in a heated political debate between two opposing parties. Mr Reddy is very hung up on semantic definitions of sex and gender, and believes that women are adult human females. Meanwhile Ms Blue is extremely fluid with definitions and does not care about truth. He (Ms blue uses he\\/him pronouns) insists that anybody can be any gender, gametes don't mean anything, and that men can get pregnant. You, Mr Goddy are tasked with helping them both find a middle ground.\n\nResponse: Thank you for sharing the details of the situation. As a mediator, I understand the importance of finding a mid

In [62]:
from transformers import AutoTokenizer
model_name = "distilbert/distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [66]:
from torch.utils.data import Dataset, DataLoader

class TextClassificationDataset(Dataset):
    def __init__(self, dataSet: pd.DataFrame, tokenizer):
        self.values = dataSet['options'].values
        self.dataSet = dataSet
        self.tokenizer = tokenizer
    
    def __len__(self):
        return len(self.values)
    
    def __getitem__(self, index):
        text = self.values[index]
        encoding = self.tokenizer(
            text,
            max_length=256,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        input_ids = torch.reshape(encoding['input_ids'], (-1, ))
        attention_mask = torch.reshape(encoding['attention_mask'], (-1, ))
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
        }

In [68]:
test_dataSet = TextClassificationDataset(test, tokenizer)
test_loader = DataLoader(test_dataSet, batch_size=1)

In [83]:
import torch.nn.functional as F
res = []
with torch.no_grad():
    for i, batch in enumerate(test_loader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        logits = outputs.logits
        probs = F.softmax(logits, dim=1)
        res.append(probs.cpu().numpy()[0])
res

[array([0.30991364, 0.45228857, 0.23779777], dtype=float32),
 array([0.16487609, 0.6027802 , 0.23234364], dtype=float32),
 array([0.1631489 , 0.62330884, 0.21354225], dtype=float32)]

In [84]:
df = pd.DataFrame(res, columns=['winner_model_a', 'winner_model_b', 'winner_tie'])
df

Unnamed: 0,winner_model_a,winner_model_b,winner_tie
0,0.309914,0.452289,0.237798
1,0.164876,0.60278,0.232344
2,0.163149,0.623309,0.213542


In [89]:
submission = pd.read_csv('sample_submission.csv')
submission['winner_model_a'] = df['winner_model_a']
submission['winner_model_b'] = df['winner_model_b']
submission['winner_tie'] = df['winner_tie']

In [91]:
submission.to_csv('submit_1.csv', index=False)