In [11]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import numpy as np
from helper import Transformer_Dataset, preprocess
import pandas as pd
import torch

## Load Fine-Tuned Model

Load fine-tuned bert model

In [12]:
model_path = "./my_fine_tuned_model"

model = AutoModelForSequenceClassification.from_pretrained(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

Load tokenizer for the model

In [13]:
tokenizer = AutoTokenizer.from_pretrained(model_path)

## Load Data for testing

In [14]:
df_test = pd.read_csv("Datasets/test.csv")
testing_input = preprocess(list(df_test["text"]), tokenizer = tokenizer)
testing_dataset = Transformer_Dataset(testing_input, list(df_test["label"]), device = device)


Testing Single datas

In [15]:
def single_data_test(index):
    single_text = df_test["text"][index]
    real_label = df_test["label"][index]
    print("input_text: ", single_text)
    print("output_label: ", real_label)

    data = testing_dataset[index]

    output = model(**data)
    label_ = output["logits"].argmax(-1)[0]
    print("predicted_label: ", label_)
    return label_

In [16]:
index = 10
single_data_test(index)


input_text:  i don t feel particularly agitated
output_label:  4
predicted_label:  tensor(3, device='cuda:0')


tensor(3, device='cuda:0')

In [17]:
index = 100
single_data_test(index)


input_text:  i feel needy but comfortable with it i feel vulnerable but secure i feel the urge to cum hard but i get no relief
output_label:  0
predicted_label:  tensor(0, device='cuda:0')


tensor(0, device='cuda:0')

In [18]:
index = 1022
single_data_test(index)


input_text:  i don t know if this helps at all but writing all of this has made me feel somewhat regretful of ashamed of who i was and while i have more to share i just don t think i can right now
output_label:  0
predicted_label:  tensor(0, device='cuda:0')


tensor(0, device='cuda:0')

In [19]:
index = 1245
single_data_test(index)


input_text:  i feel the hearts decision to stop caring can it be reversed
output_label:  2
predicted_label:  tensor(2, device='cuda:0')


tensor(2, device='cuda:0')

Whole Dataset Testing

In [20]:
outputs = np.zeros(2000)
real_label = df_test["label"]

for i, data in enumerate(testing_dataset):
    label = data.pop("labels")
    output = model(**data)
    outputs[i] = output["logits"].argmax(-1)


real_label = np.array(real_label)

total_right = (outputs == real_label).sum()
acc = total_right/2000
print("accuracy: ", acc)

accuracy:  0.9315
