# Requirements

In [29]:
# Add as many imports as you need.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from keras.api.models import Sequential
from keras.api.layers import Input, LSTM, Dense
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import numpy as np
from transformers import pipeline
from datasets import load_dataset


# Laboratory Exercise - Run Mode (8 points)

## Introduction
This laboratory assignment's primary objective is to fine-tune a pre-trained language model for binary classification on a dataset consisting of wine reviews. The dataset contains two attributes: **description** and **points**. The description is a brief text describing the wine and the points represent a quality metric ranging from 1 to 100. If some wine has at least 90 points it is considered **exceptional**. Your task involves predicting if some wine is **exceptional** based on its review.

## The Wine Reviews Dataset

Load the dataset using the `datasets` library.

In [105]:
# Write your code here. Add as many boxes as you need.
dataset = load_dataset("csv", data_files="wine-reviews.csv", split='train[:200]')
dataset

Dataset({
    features: ['description', 'points'],
    num_rows: 200
})

In [106]:
df = dataset.to_pandas()
# df["emotion"].unique()

In [107]:
l=[]
for row in df['points']:
    if row>=90:
        l.append(1)
    else:
        l.append(0)
df['points']=l
df

Unnamed: 0,description,points
0,"Translucent in color, silky in the mouth, this...",0
1,"On the palate, this wine is rich and complex, ...",1
2,The producer blends 57% Chardonnay from the Ma...,1
3,"Pure Baga in all its glory, packed with dry an...",1
4,Think of Subsídio as a contribution rather tha...,0
...,...,...
195,"Sour-orange juice, yellow-grapefruit flesh, ci...",1
196,Here's a solid value wine that should pair wit...,0
197,Whiffs of pollen and saffron lend shades of nu...,0
198,"A 100% varietal wine, this offers a depth of e...",1


### Target Extraction
Extract the target **exceptional** for each wine review. If some wine has at least 90 points it is considered **exceptional**.

In [108]:
# Write your code here. Add as many boxes as you need.
dataset= dataset.add_column("label", l)
dataset= dataset.remove_columns("points")
dataset = dataset.rename_column("description", "text")
dataset

Dataset({
    features: ['text', 'label'],
    num_rows: 200
})

## Dataset Splitting
Partition the dataset into training and testing sets with an 80:20 ratio.


In [109]:
dataset.to_pandas()

Unnamed: 0,text,label
0,"Translucent in color, silky in the mouth, this...",0
1,"On the palate, this wine is rich and complex, ...",1
2,The producer blends 57% Chardonnay from the Ma...,1
3,"Pure Baga in all its glory, packed with dry an...",1
4,Think of Subsídio as a contribution rather tha...,0
...,...,...
195,"Sour-orange juice, yellow-grapefruit flesh, ci...",1
196,Here's a solid value wine that should pair wit...,0
197,Whiffs of pollen and saffron lend shades of nu...,0
198,"A 100% varietal wine, this offers a depth of e...",1


In [110]:
# Write your code here. Add as many boxes as you need.
dataset = dataset.train_test_split(test_size=0.2)
dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 160
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 40
    })
})

## Tokenization
Tokenize the texts using the `AutoTokenizer` class.

In [111]:
# Write your code here. Add as many boxes as you need.
from transformers import AutoTokenizer, DataCollatorWithPadding

checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenized_texts = tokenizer(dataset["train"]["text"])

## Fine-tuning a Pre-trained Language Model for Classification
Fine-tune a pre-trained language model for classification on the given dataset.

In [112]:
from transformers import AutoTokenizer

def tokenize(sample):
    return tokenizer(sample["text"], truncation=True)

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
tokenized_dataset = dataset.map(tokenize, batched=True)

Map:   0%|          | 0/160 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

In [113]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [114]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 160
    })
    test: Dataset({
        features: ['text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 40
    })
})

    #Define the model using the `AutoModelForSequenceClassification` class.

In [115]:
# !pip install transformers[torch]

In [116]:
# Write your code here. Add as many boxes as you need.
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="trainer",
    eval_strategy="epoch",
    per_device_train_batch_size=8,  # batch size for training
    per_device_eval_batch_size=8,  # batch size for evaluation
    metric_for_best_model="f1",
)

Define the traning parameters using the `TrainingArguments` class.

In [117]:
# Write your code here. Add as many boxes as you need.
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Define the training using the `Trainer` class.

In [118]:
# Write your code here. Add as many boxes as you need.
import evaluate
import numpy as np

metric = evaluate.load("f1")

In [119]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels, average="weighted")

In [120]:
from transformers import Trainer

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

Fine-tune (train) the pre-trained lanugage model.

In [121]:
import torch

In [122]:
# Write your code here. Add as many boxes as you need.
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [123]:
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [124]:
trainer.train()

Epoch,Training Loss,Validation Loss,F1
1,No log,0.634047,0.749373
2,No log,0.550637,0.720635
3,No log,0.524778,0.774859


TrainOutput(global_step=60, training_loss=0.5938518524169922, metrics={'train_runtime': 194.0828, 'train_samples_per_second': 2.473, 'train_steps_per_second': 0.309, 'total_flos': 20666551176480.0, 'train_loss': 0.5938518524169922, 'epoch': 3.0})

In [125]:
trainer.evaluate()

{'eval_loss': 0.5247780680656433,
 'eval_f1': 0.7748592870544091,
 'eval_runtime': 4.5759,
 'eval_samples_per_second': 8.741,
 'eval_steps_per_second': 1.093,
 'epoch': 3.0}

In [126]:
predictions = trainer.predict(tokenized_dataset["test"])

Use the trained model to make predictions for the test set.

In [127]:
# Write your code here. Add as many boxes as you need.
predictions = trainer.predict(tokenized_dataset["test"])


Assess the performance of the model by using different metrics provided by the `scikit-learn` library.

In [128]:
# Write your code here. Add as many boxes as you need.
logits, labels = predictions.predictions, predictions.label_ids
preds = np.argmax(logits, axis=-1)

In [129]:
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [130]:
print(classification_report(labels, preds))

              precision    recall  f1-score   support

           0       0.76      0.80      0.78        20
           1       0.79      0.75      0.77        20

    accuracy                           0.78        40
   macro avg       0.78      0.78      0.77        40
weighted avg       0.78      0.78      0.77        40



# Laboratory Exercise - Bonus Task (+ 2 points)

Implement a simple machine learning pipeline to classify wine reviews as **exceptional** or not. Use TF-IDF vectorization to convert text into numerical features and train a logistic regression. Split the dataset into training and testing sets, fit the pipeline on the training data, and evaluate its performance using metrics such as precision, recall, and F1-score. Analyze the texts to find the most influential words or phrases associated with the **exceptional** wines. Use the coefficients from the logistic regression trained on TF-IDF features to identify the top positive and negative keywords for **exceptional** wines. Present these keywords in a simple table or visualization (e.g., bar chart).

In [None]:
# Write your code here. Add as many boxes as you need.