In [10]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from transformers import RobertaTokenizer, RobertaForSequenceClassification
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset, random_split
from sklearn.metrics import matthews_corrcoef
import numpy as np
from datasets import load_dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
dataset = load_dataset('glue', 'cola')
train_sentences = dataset['train']['sentence']
train_labels = dataset['train']['label']
val_sentences = dataset['validation']['sentence']
val_labels = dataset['validation']['label']

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


def encode_sentences(sentences):
    return tokenizer.batch_encode_plus(
        sentences,
        add_special_tokens=True,
        max_length=64,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )


train_encodings = encode_sentences(train_sentences)
val_encodings = encode_sentences(val_sentences)
train_labels = torch.tensor(train_labels)
val_labels = torch.tensor(val_labels)

train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
val_dataset = TensorDataset(val_encodings['input_ids'], val_encodings['attention_mask'], val_labels)

In [4]:
batch_size = 32

train_dataloader = DataLoader(
    train_dataset,
    sampler=RandomSampler(train_dataset),
    batch_size=batch_size
)

validation_dataloader = DataLoader(
    val_dataset,
    sampler=SequentialSampler(val_dataset),
    batch_size=batch_size
)

In [5]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=2,
    output_attentions=False,
    output_hidden_states=False
)
model.to(device)
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Train

In [6]:
epochs = 10

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        model.zero_grad()
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch+1}, Loss: {avg_train_loss}')


Epoch 1, Loss: 0.5194935164789656
Epoch 2, Loss: 0.30841303422157446
Epoch 3, Loss: 0.17443934961486218
Epoch 4, Loss: 0.11307993568584267
Epoch 5, Loss: 0.08190644955359844
Epoch 6, Loss: 0.06785648609094544
Epoch 7, Loss: 0.05070989007658478
Epoch 8, Loss: 0.03857439695210231
Epoch 9, Loss: 0.036639563241928345
Epoch 10, Loss: 0.0355785085112482


Evaluation

In [7]:
def evaluate(model, validation_dataloader):
    model.eval()
    preds, true_labels = [], []

    for batch in validation_dataloader:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)

        logits = outputs.logits
        preds.append(logits.detach().cpu().numpy())
        true_labels.append(b_labels.cpu().numpy())

    preds = np.concatenate(preds, axis=0)
    true_labels = np.concatenate(true_labels, axis=0)

    return preds, true_labels

preds, true_labels = evaluate(model, validation_dataloader)
preds = np.argmax(preds, axis=1)

In [8]:
# MCC
mcc = matthews_corrcoef(true_labels, preds)
print(f'MCC: {mcc}')

MCC: 0.5608014461364136


Modification

Lets use more powerful transformer like roberta

In [11]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')


train_encodings = encode_sentences(train_sentences)
val_encodings = encode_sentences(val_sentences)
train_labels = torch.tensor(train_labels)
val_labels = torch.tensor(val_labels)

train_dataset = TensorDataset(train_encodings['input_ids'], train_encodings['attention_mask'], train_labels)
val_dataset = TensorDataset(val_encodings['input_ids'], val_encodings['attention_mask'], val_labels)

batch_size = 32

train_dataloader = DataLoader(
    train_dataset,
    sampler=RandomSampler(train_dataset),
    batch_size=batch_size
)

validation_dataloader = DataLoader(
    val_dataset,
    sampler=SequentialSampler(val_dataset),
    batch_size=batch_size
)

Downloading vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

  train_labels = torch.tensor(train_labels)
  val_labels = torch.tensor(val_labels)


In [14]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)
model.to(device)
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
epochs = 10

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for step, batch in enumerate(train_dataloader):
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        model.zero_grad()
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch+1}, Loss: {avg_train_loss}')


Epoch 1, Loss: 0.5057378503035254
Epoch 2, Loss: 0.34889491236031944
Epoch 3, Loss: 0.23757990260622394
Epoch 4, Loss: 0.18076101139382417
Epoch 5, Loss: 0.12386790446853682
Epoch 6, Loss: 0.10055121111009381
Epoch 7, Loss: 0.07865931370483238
Epoch 8, Loss: 0.07040167963844654
Epoch 9, Loss: 0.05574830389692823
Epoch 10, Loss: 0.049745468293758


In [16]:
preds, true_labels = evaluate(model, validation_dataloader)
preds = np.argmax(preds, axis=1)

# MCC
mcc = matthews_corrcoef(true_labels, preds)
print(f'MCC: {mcc}')

MCC: 0.6292829640607693


Conclusion: as we can see, roberta provides us with higher quality than the standard bert model (0.56 vs. 0.63).

Ex. 2 Inference model on dataset from PW7 

In [17]:
import pandas as pd

hotel_reviews_sentiment = pd.read_csv('../hw7/hotel_reviews_sentiment.csv')
restaurant_reviews_sentiment = pd.read_csv('../hw7/restaurant_reviews_sentiment.csv')
cafe_reviews_sentiment = pd.read_csv('../hw7/cafe_reviews_sentiment.csv')

Hotel

In [18]:
hotel_reviews_sentiment.head()

Unnamed: 0,review,sentiment
0,Lovely Hotel. Had tea and cake. Staff so amazi...,1
1,I had not stayed at the Halkin in quite some t...,1
2,COMO The Halkin is one of the most wonderful h...,1
3,We chose to celebrate my son's birthday at COM...,1
4,I had afternoon tea here with a Groupon. The s...,1


In [41]:
labels = []
for hotel_review in hotel_reviews_sentiment["review"]:
    hotel_review_encodings = encode_sentences([hotel_review])
    b_input_ids = hotel_review_encodings["input_ids"].to(device)
    b_input_mask = hotel_review_encodings["attention_mask"].to(device)
    
    with torch.no_grad():
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        # label = torch.argmax(outputs).item()
    
    logits = outputs.logits
    labels.append(logits.cpu().detach().numpy())

labels = np.concatenate(labels, axis=0)
labels = np.argmax(labels, axis=1)

hotel_reviews_sentiment["acceptability_judgements"] = labels

In [42]:
hotel_reviews_sentiment

Unnamed: 0,review,sentiment,acceptability_judgements
0,Lovely Hotel. Had tea and cake. Staff so amazi...,1,0
1,I had not stayed at the Halkin in quite some t...,1,1
2,COMO The Halkin is one of the most wonderful h...,1,0
3,We chose to celebrate my son's birthday at COM...,1,1
4,I had afternoon tea here with a Groupon. The s...,1,1
5,Great service in civilised surroundings. Quiet...,0,1
6,Had the tasting menu with wine pairing. Great ...,1,1
7,I had a meeting there. I really like the place...,1,0
8,"Had a really lovely meal here, which was bough...",1,1
9,Terrific place! Underrated exquisite tasting m...,1,1


Restaurant

In [45]:
restaurant_reviews_sentiment.head()

Unnamed: 0,review,sentiment
0,Pretty good not a long wait or any other probl...,1
1,Zizi - Victoria is an exceptional Italian rest...,1
2,Dining at Zizi - Victoria was an unforgettable...,1
3,Exceptional service and food. I had a wonderfu...,1
4,Zizi - Victoria is a delightful place for Ital...,1


In [46]:
labels = []
for review in restaurant_reviews_sentiment["review"]:
    review_encodings = encode_sentences([review])
    b_input_ids = review_encodings["input_ids"].to(device)
    b_input_mask = review_encodings["attention_mask"].to(device)
    
    with torch.no_grad():
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    
    logits = outputs.logits
    labels.append(logits.cpu().detach().numpy())

labels = np.concatenate(labels, axis=0)
labels = np.argmax(labels, axis=1)

restaurant_reviews_sentiment["acceptability_judgements"] = labels

In [47]:
restaurant_reviews_sentiment

Unnamed: 0,review,sentiment,acceptability_judgements
0,Pretty good not a long wait or any other probl...,1,0
1,Zizi - Victoria is an exceptional Italian rest...,1,1
2,Dining at Zizi - Victoria was an unforgettable...,1,1
3,Exceptional service and food. I had a wonderfu...,1,1
4,Zizi - Victoria is a delightful place for Ital...,1,1
5,Had a great time at Zizi - Victoria. The food ...,1,1
6,Delicious food and a lovely atmosphere. The sp...,1,1
7,Really enjoyed my meal at Zizi - Victoria. The...,1,1
8,Mixed feelings about Zizi - Victoria. The food...,1,1
9,Zizi - Victoria is an average Italian restaura...,1,1


Cafe

In [48]:
cafe_reviews_sentiment.head()

Unnamed: 0,review,sentiment
0,Starbucks never disappoints! This location is ...,1
1,I had a wonderful experience at this Starbucks...,1
2,"Starbucks has always been a favorite of mine, ...",1
3,Lovely outside seating area local to Buckingha...,1
4,I really enjoy visiting this Starbucks. The co...,1


In [49]:
labels = []
for review in cafe_reviews_sentiment["review"]:
    review_encodings = encode_sentences([review])
    b_input_ids = review_encodings["input_ids"].to(device)
    b_input_mask = review_encodings["attention_mask"].to(device)
    
    with torch.no_grad():
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    
    logits = outputs.logits
    labels.append(logits.cpu().detach().numpy())

labels = np.concatenate(labels, axis=0)
labels = np.argmax(labels, axis=1)

cafe_reviews_sentiment["acceptability_judgements"] = labels

In [50]:
cafe_reviews_sentiment

Unnamed: 0,review,sentiment,acceptability_judgements
0,Starbucks never disappoints! This location is ...,1,1
1,I had a wonderful experience at this Starbucks...,1,1
2,"Starbucks has always been a favorite of mine, ...",1,1
3,Lovely outside seating area local to Buckingha...,1,1
4,I really enjoy visiting this Starbucks. The co...,1,1
5,This Starbucks is a great spot for a quick cof...,1,1
6,A solid Starbucks location with friendly staff...,1,1
7,This Starbucks offers a great coffee experienc...,1,1
8,It's a good spot very close to the Victoria co...,1,1
9,"Good service,very small though and toilets wer...",1,1


Conclusion: the most acceptability judgements are contained in the reviews of the Starbucks cafe.