# Base Model

- Train: 75Agree_train
- Test: 75Agree_test

In [1]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
import numpy as np
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import matplotlib.pyplot as plt

def pandas_df_to_Dataset(df, tokenizer):
    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(tokenizer, batched=True)
    dataset = dataset.rename_column("label", "labels")
    return dataset

  from .autonotebook import tqdm as notebook_tqdm


### Step 1: Prepate the data to feed the model

In [2]:
# load the training and test datasets
df_train = pd.read_csv('data/75Agree_train.csv')
df_test = pd.read_csv('data/75Agree_test.csv')

# label encoders
label2id = {"negative": 0, "neutral": 1, "positive": 2}
id2label = {v: k for k, v in label2id.items()}

# label encoding
df_train["label"] = df_train["label"].map(label2id)
df_test["label"] = df_test["label"].map(label2id)

# balance the training set: undersample to minority class
df_train = df_train.groupby("label").apply(lambda x: x.sample(df_train["label"].value_counts().min())).reset_index(drop=True)
df_train.value_counts("label")

  df_train = df_train.groupby("label").apply(lambda x: x.sample(df_train["label"].value_counts().min())).reset_index(drop=True)


label
0    336
1    336
2    336
Name: count, dtype: int64

### Step 2: Models tuning

In [3]:
# tokenization
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
def tokenize_function(examples):
    return tokenizer(examples["sentence"], truncation=True, padding="max_length", max_length=128)

# prepare for cv
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# set random search area
def get_hyperparameters():
    return {
        "num_train_epochs": int(np.random.choice([2, 3, 4, 5])),
        "learning_rate": float(np.random.choice([1e-5, 1e-4, 1e-3, 1e-2])),
        "weight_decay": float(np.random.choice([0.0, 0.01, 0.05, 0.1, 0.5])),
    }

# get the cv results dataframe or create a new one
try:
    all_results = pd.read_csv("results.csv")
except FileNotFoundError:
    all_results = pd.DataFrame(columns=["num_train_epochs", "learning_rate", "weight_decay", "eval_loss"])

# start the hyperparameter tuning
for _ in range(120):
    current_hyperparameters = get_hyperparameters()
    current_hyperparameters_eval_loss = 0
    current_results = {"num_train_epochs": [], "learning_rate": [], "weight_decay": [], "eval_loss": []}

    # make sure the hyperparameters are not already evaluated
    mask = (
        (all_results['num_train_epochs'] == current_hyperparameters['num_train_epochs']) &
        (all_results['learning_rate'] == current_hyperparameters['learning_rate']) &
        (all_results['weight_decay'] == current_hyperparameters['weight_decay'])
    )
    if mask.any():
        print("Hyperparameters already evaluated, skipping...")
        continue
    else:
        print(f"Evaluating hyperparameters: {current_hyperparameters}")
    
    # tuning hyperparameters
    for fold, (train_idx, val_idx) in enumerate(skf.split(df_train, df_train["label"])):

        # get the folds
        df_fold_train = df_train.iloc[train_idx]
        df_fold_val = df_train.iloc[val_idx]

        # dataset preparation for huggingface transformers
        train_dataset = pandas_df_to_Dataset(df_fold_train, tokenize_function)
        val_dataset = pandas_df_to_Dataset(df_fold_val, tokenize_function)

        train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
        val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

        # the model
        model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3, id2label=id2label, label2id=label2id)

        # hyperparameters
        training_args = TrainingArguments(
            num_train_epochs=current_hyperparameters["num_train_epochs"],
            learning_rate=current_hyperparameters["learning_rate"],
            weight_decay=current_hyperparameters["weight_decay"],
            eval_strategy="no",
            logging_strategy="no",
            report_to=None,
            dataloader_pin_memory=False,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            save_total_limit=1,
            save_steps=500,
            output_dir=f"./results/fold_{fold}",
            disable_tqdm=True,
        )

        # training the model
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=val_dataset,
            tokenizer=tokenizer,
        )
        trainer.train()

        # update the evaluation loss
        current_hyperparameters_eval_loss += trainer.evaluate(eval_dataset=val_dataset)["eval_loss"]

    # update the results dataframe
    current_results["num_train_epochs"].append(current_hyperparameters["num_train_epochs"])
    current_results["learning_rate"].append(current_hyperparameters["learning_rate"])
    current_results["weight_decay"].append(current_hyperparameters["weight_decay"])
    current_results["eval_loss"].append(current_hyperparameters_eval_loss / skf.n_splits)
    all_results = pd.concat([all_results, pd.DataFrame(current_results)], ignore_index=True)

Evaluating hyperparameters: {'num_train_epochs': 3, 'learning_rate': 0.01, 'weight_decay': 0.5}


Map: 100%|██████████| 806/806 [00:00<00:00, 2880.81 examples/s]
Map: 100%|██████████| 202/202 [00:00<00:00, 2794.00 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 181.4215, 'train_samples_per_second': 13.328, 'train_steps_per_second': 0.843, 'train_loss': 1.6740886214511845, 'epoch': 3.0}
{'eval_loss': 1.1167324781417847, 'eval_runtime': 3.897, 'eval_samples_per_second': 51.835, 'eval_steps_per_second': 3.336, 'epoch': 3.0}


Map: 100%|██████████| 806/806 [00:00<00:00, 2328.98 examples/s]
Map: 100%|██████████| 202/202 [00:00<00:00, 2422.18 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 190.4712, 'train_samples_per_second': 12.695, 'train_steps_per_second': 0.803, 'train_loss': 1.5209170073465583, 'epoch': 3.0}
{'eval_loss': 1.1091772317886353, 'eval_runtime': 4.1677, 'eval_samples_per_second': 48.469, 'eval_steps_per_second': 3.119, 'epoch': 3.0}


Map: 100%|██████████| 806/806 [00:00<00:00, 1902.52 examples/s]
Map: 100%|██████████| 202/202 [00:00<00:00, 2120.58 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 205.5304, 'train_samples_per_second': 11.765, 'train_steps_per_second': 0.744, 'train_loss': 1.6770897160947713, 'epoch': 3.0}
{'eval_loss': 1.1733763217926025, 'eval_runtime': 4.0946, 'eval_samples_per_second': 49.334, 'eval_steps_per_second': 3.175, 'epoch': 3.0}


Map: 100%|██████████| 807/807 [00:00<00:00, 2798.78 examples/s]
Map: 100%|██████████| 201/201 [00:00<00:00, 2719.81 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 211.0826, 'train_samples_per_second': 11.469, 'train_steps_per_second': 0.725, 'train_loss': 1.5228304395488663, 'epoch': 3.0}
{'eval_loss': 1.1046470403671265, 'eval_runtime': 4.3414, 'eval_samples_per_second': 46.299, 'eval_steps_per_second': 2.994, 'epoch': 3.0}


Map: 100%|██████████| 807/807 [00:00<00:00, 2635.26 examples/s]
Map: 100%|██████████| 201/201 [00:00<00:00, 2890.63 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 209.29, 'train_samples_per_second': 11.568, 'train_steps_per_second': 0.731, 'train_loss': 1.4886943343418095, 'epoch': 3.0}
{'eval_loss': 1.1115427017211914, 'eval_runtime': 4.742, 'eval_samples_per_second': 42.387, 'eval_steps_per_second': 2.741, 'epoch': 3.0}
Evaluating hyperparameters: {'num_train_epochs': 4, 'learning_rate': 0.01, 'weight_decay': 0.5}


Map: 100%|██████████| 806/806 [00:00<00:00, 2962.79 examples/s]
Map: 100%|██████████| 202/202 [00:00<00:00, 2696.93 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 293.2736, 'train_samples_per_second': 10.993, 'train_steps_per_second': 0.696, 'train_loss': 1.5009478400735294, 'epoch': 4.0}
{'eval_loss': 1.1128426790237427, 'eval_runtime': 4.1314, 'eval_samples_per_second': 48.894, 'eval_steps_per_second': 3.147, 'epoch': 4.0}


Map: 100%|██████████| 806/806 [00:00<00:00, 2761.37 examples/s]
Map: 100%|██████████| 202/202 [00:00<00:00, 2755.61 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 275.2576, 'train_samples_per_second': 11.713, 'train_steps_per_second': 0.741, 'train_loss': 1.5904029397403492, 'epoch': 4.0}
{'eval_loss': 1.1196560859680176, 'eval_runtime': 4.1525, 'eval_samples_per_second': 48.645, 'eval_steps_per_second': 3.131, 'epoch': 4.0}


Map: 100%|██████████| 806/806 [00:00<00:00, 2845.15 examples/s]
Map: 100%|██████████| 202/202 [00:00<00:00, 2814.56 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 289.8811, 'train_samples_per_second': 11.122, 'train_steps_per_second': 0.704, 'train_loss': 1.5199000040690105, 'epoch': 4.0}
{'eval_loss': 1.108296275138855, 'eval_runtime': 4.4109, 'eval_samples_per_second': 45.795, 'eval_steps_per_second': 2.947, 'epoch': 4.0}


Map: 100%|██████████| 807/807 [00:00<00:00, 3055.63 examples/s]
Map: 100%|██████████| 201/201 [00:00<00:00, 2916.35 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 316.0694, 'train_samples_per_second': 10.213, 'train_steps_per_second': 0.645, 'train_loss': 1.4710057576497395, 'epoch': 4.0}
{'eval_loss': 1.1502398252487183, 'eval_runtime': 4.6007, 'eval_samples_per_second': 43.689, 'eval_steps_per_second': 2.826, 'epoch': 4.0}


Map: 100%|██████████| 807/807 [00:00<00:00, 2904.77 examples/s]
Map: 100%|██████████| 201/201 [00:00<00:00, 2202.89 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


{'train_runtime': 299.7498, 'train_samples_per_second': 10.769, 'train_steps_per_second': 0.681, 'train_loss': 1.437822977701823, 'epoch': 4.0}
{'eval_loss': 1.1323347091674805, 'eval_runtime': 4.0687, 'eval_samples_per_second': 49.402, 'eval_steps_per_second': 3.195, 'epoch': 4.0}
Hyperparameters already evaluated, skipping...
Evaluating hyperparameters: {'num_train_epochs': 4, 'learning_rate': 0.001, 'weight_decay': 0.5}


Map: 100%|██████████| 806/806 [00:00<00:00, 3056.63 examples/s]
Map: 100%|██████████| 202/202 [00:00<00:00, 2183.69 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


KeyboardInterrupt: 

In [4]:
all_results = all_results.sort_values(by="eval_loss").reset_index(drop=True)
all_results.to_csv("results.csv", index=False)

In [5]:
all_results.head()

Unnamed: 0,num_train_epochs,learning_rate,weight_decay,eval_loss
0,2,0.0001,0.1,0.347579
1,4,0.0001,0.1,0.348092
2,3,0.0001,0.0,0.374654
3,5,0.0001,0.01,0.474633
4,2,1e-05,0.05,0.790321


### Step 3: Extract the best hyperparameters and retrain the model

### Step 4: Evaluate the model

---
---
---
# TO BE DONE YET FOR THE CV

In [None]:
"""train_losses, eval_losses = [], []
train_steps, eval_steps = [], []
for fold in logs:
    log_history = logs[fold]
    train_losses.append([x["loss"] for x in log_history if "loss" in x])
    eval_losses.append([x["eval_loss"] for x in log_history if "eval_loss" in x])
    train_steps.append([x["step"] for x in log_history if "loss" in x])
    eval_steps.append([x["step"] for x in log_history if "eval_loss" in x])

# get mean and std for train and eval losses
train_losses = np.array(train_losses)
train_loss = np.mean(train_losses, axis=0)
train_loss_std = np.std(train_losses, axis=0)

eval_losses = np.array(eval_losses)
eval_loss = np.mean(eval_losses, axis=0)
eval_loss_std = np.std(eval_losses, axis=0)
    
# make sure train_steps and eval_steps sublists are equal
if all(steps == train_steps[0] for steps in train_steps):
    train_steps = train_steps[0]
else:
    raise ValueError("Train steps are not consistent across folds.")

if all(steps == eval_steps[0] for steps in eval_steps):
    eval_steps = eval_steps[0]
else:
    raise ValueError("Eval steps are not consistent across folds.")

# draw the training and evaluation losses
plt.figure(figsize=(20, 6))
plt.plot(train_steps, train_loss, label="Train Loss")
plt.plot(eval_steps, eval_loss, label="Eval Loss")
plt.fill_between(train_steps, train_loss - train_loss_std, train_loss + train_loss_std, alpha=0.2, color='blue')
plt.fill_between(eval_steps, eval_loss - eval_loss_std, eval_loss + eval_loss_std, alpha=0.2, color='orange')
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training and Evaluation Loss")
plt.legend()
plt.grid(True)
plt.show()"""

In [None]:
"""train_dataset = Dataset.from_pandas(df_fold_train)
val_dataset = Dataset.from_pandas(df_fold_val)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset = val_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])"""

In [None]:
"""from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

def evaluate_dataset(dataset, split_name):
    output = trainer.predict(dataset)
    preds = np.argmax(output.predictions, axis=1)
    y_true = output.label_ids

    print(f"\n=== Classification Report ({split_name}) ===")
    print(classification_report(y_true, preds, target_names=["negative", "neutral", "positive"]))

    cm = confusion_matrix(y_true, preds, normalize='true')
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt=".2f", cmap="Blues",
                xticklabels=["negative", "neutral", "positive"],
                yticklabels=["negative", "neutral", "positive"])
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title(f"Normalized Confusion Matrix ({split_name})")
    plt.show()

# evaluate the model on the training and test datasets
evaluate_dataset(train_dataset, "Train")
evaluate_dataset(test_dataset, "Test")"""