In [2]:
import os

In [3]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
import krippendorff
import csv
from sklearn.metrics import accuracy_score

In [2]:
!pip install krippendorff

Defaulting to user installation because normal site-packages is not writeable


In [3]:
df=pd.read_csv("../alldata/zihui_thomas_annotations.csv")
df1=df[df['annotator']=='user1']
df2=df[df['annotator']=='user2']
corr,_=pearsonr(df1['rating'],df2['rating'])
corr

0.5533994185360147

In [4]:
arr=[df1['rating'].values,df2['rating'].values]
print(krippendorff.alpha(reliability_data=arr,level_of_measurement='nominal'))
print(krippendorff.alpha(reliability_data=arr,level_of_measurement='ordinal'))


0.1587553189685863
0.42413933141329196


In [2]:
data_all=pd.read_csv('../alldata/si630w22-hw3-data.csv')
train_all=pd.read_csv('../alldata/si630w22-hw3-train.csv')
train_all=train_all.rename(columns={'id':'question_id'})
dev_all=pd.read_csv('../alldata/si630w22-hw3-dev.csv')
dev_all=dev_all.rename(columns={'id':'question_id'})
test_all=pd.read_csv('../alldata/si630w22-hw3-test.public.csv')
test_all=test_all.rename(columns={'id':'question_id'})
train_df=pd.merge(train_all,data_all,on='question_id')
dev_df=pd.merge(dev_all,data_all,on='question_id')
test_df=pd.merge(test_all,data_all,on='question_id')

train_all.dropna(inplace=True)
dev_all.dropna(inplace=True)
test_all.dropna(inplace=True)
train_all=train_all[(train_all['rating']<=5) & (train_all['rating']>0) & (train_all['rating']!='')]
dev_all=dev_all[(dev_all['rating']<=5) & (dev_all['rating']>0)& (dev_all['rating']!='')]


In [3]:
class Example:
    def __init__(self,question_id,user_id,question,answer,label=None):
        self.question_id=question_id
        self.user_id=user_id
        self.question=question
        self.answer=answer
        self.label=label

In [4]:
def to_input(df):
    question_list=[]
    input_list=[]
    for idx,row in df.iterrows():
        user_id=row['annotator_id']
        question_id=row['question_id']
        if 'rating' in df.columns:
#             print(row['rating'])
            if row['rating']==1 or row['rating']==2 or row['rating']==3 or row['rating']==4 or row['rating']==5:
                rating=int(row['rating'])-1
            else:
                continue
        else:
            rating=None
        question=row['question_text']
        answer=row['reply_text']
#         print(question_id,rating,question,answer)
        
        example=Example(question_id,user_id,question,answer,rating)
        input_list.append(example)
    return input_list

In [5]:
import torch
from transformers import TrainingArguments, Trainer
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import EarlyStoppingCallback


device=torch.device("cuda" if torch.cuda.is_available else "cpu")

model_path="../model/MiniLM-L12-H384-uncased/"
tokenizer = BertTokenizer.from_pretrained(model_path)
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=5)
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at ../model/MiniLM-L12-H384-uncased/ and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 384, padding_idx=0)
      (position_embeddings): Embedding(512, 384)
      (token_type_embeddings): Embedding(2, 384)
      (LayerNorm): LayerNorm((384,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=384, out_features=384, bias=True)
              (key): Linear(in_features=384, out_features=384, bias=True)
              (value): Linear(in_features=384, out_features=384, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=384, out_features=384, bias=True)
              (LayerNorm): LayerNorm((384,), eps=1e-12, element

In [6]:
def to_sequence(input_list,tokenizer,max_length=128):
    X_list=[]
    label_list=[]
    for example in input_list:
        x=example.user_id+"['[SEP]']"+example.question+"['[SEP]']"+example.answer
        label=example.label
        label_list.append(label)
        X_list.append(x)
#     print(X_list)
    X_train=tokenizer(X_list,padding=True,truncation=True,max_length=max_length)

    return X_train,label_list


In [7]:
train_input_list=to_input(train_df)
val_input_list=to_input(dev_df)

In [8]:
test_input_list=to_input(test_df)
X_test,Y_test=to_sequence(test_input_list,tokenizer,max_length=128)

In [9]:
X_train,Y_train=to_sequence(train_input_list,tokenizer,max_length=128)
X_val,Y_val=to_sequence(val_input_list,tokenizer,max_length=128)

In [10]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

In [11]:
train_dataset=Dataset(X_train,Y_train)
val_dataset = Dataset(X_val,Y_val)

In [12]:
def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred)
#     recall = recall_score(y_true=labels, y_pred=pred)
#     precision = precision_score(y_true=labels, y_pred=pred)
#     f1 = f1_score(y_true=labels, y_pred=pred)

    return {"accuracy": accuracy}

In [13]:


args = TrainingArguments(
    output_dir="output",
    evaluation_strategy="steps",
    eval_steps=500,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    seed=0,
    load_best_model_at_end=True,
)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
#     callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

# Train pre-trained model
trainer.train()

***** Running training *****
  Num examples = 17836
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 22300


Step,Training Loss,Validation Loss,Accuracy
500,1.3306,1.281912,0.448502
1000,1.2183,1.214335,0.485549
1500,1.1784,1.141844,0.538098
2000,1.1437,1.1835,0.528376
2500,1.1223,1.198128,0.53258
3000,1.1053,1.126477,0.563847
3500,1.0945,1.113546,0.557278
4000,1.0743,1.132826,0.559643
4500,1.0805,1.132054,0.561219
5000,1.0174,1.142129,0.551498


***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-500
Configuration saved in output/checkpoint-500/config.json
Model weights saved in output/checkpoint-500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-1000
Configuration saved in output/checkpoint-1000/config.json
Model weights saved in output/checkpoint-1000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-1500
Configuration saved in output/checkpoint-1500/config.json
Model weights saved in output/checkpoint-1500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-2000
Configuration saved in output/checkpoint-2000/config.json
Model weights saved in output/checkpoint-2000/pytorch_model.bin
***** Running Evaluation *****
  Nu

Model weights saved in output/checkpoint-17000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-17500
Configuration saved in output/checkpoint-17500/config.json
Model weights saved in output/checkpoint-17500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-18000
Configuration saved in output/checkpoint-18000/config.json
Model weights saved in output/checkpoint-18000/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-18500
Configuration saved in output/checkpoint-18500/config.json
Model weights saved in output/checkpoint-18500/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 3806
  Batch size = 8
Saving model checkpoint to output/checkpoint-19000
Configuration saved in output/checkpoint-19000/config.json
Model weights saved 

TrainOutput(global_step=22300, training_loss=0.9295498629856537, metrics={'train_runtime': 1505.0664, 'train_samples_per_second': 118.506, 'train_steps_per_second': 14.817, 'total_flos': 2937423248148480.0, 'train_loss': 0.9295498629856537, 'epoch': 10.0})

In [14]:
def to_sequence_test(input_list,tokenizer,max_length=128):
    X_list=[]
#     label_list=[]
    for example in input_list:
        x=example.user_id+"['[SEP]']"+example.question+"['[SEP]']"+example.answer
#         label=example.label
#         label_list.append(label)
        X_list.append(x)
#     print(X_list)
    X_train=tokenizer(X_list,padding=True,truncation=True,max_length=max_length)

    return X_train


In [15]:
test_input_list=to_input(test_df)

In [16]:
X_test=to_sequence(test_input_list,tokenizer,max_length=128)
X_test=X_test[0]
test_dataset=Dataset(X_test,None)

In [17]:
raw_test_result,_,_=trainer.predict(test_dataset)
y_pred = np.argmax(raw_test_result, axis=1)+1

***** Running Prediction *****
  Num examples = 3821
  Batch size = 8


In [18]:
y_pred

array([5, 5, 5, ..., 3, 3, 3])

In [19]:
pred_list=[]
for item in y_pred:
    pred_list.append((round)(item))

test_result=test_all.rename(columns={'question_id':'id'})
test_result['rating']=pred_list
test_result.to_csv('test_result_classification_batch8.csv')