Distilbert

In [None]:
pip install transformers[torch] datasets optuna scikit-learn



In [None]:
pip install --upgrade transformers datasets accelerate



In [None]:
import optuna
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score

In [None]:
import pandas as pd
from datasets import Dataset, DatasetDict, ClassLabel
from transformers import AutoTokenizer
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# 1 Load dataset from local CSV
csv_file_path = 'Philippine_Business_TrustPilot_Reviews_Labeled.csv'
df_raw = pd.read_csv(csv_file_path, encoding='ISO-8859-1')

# Rename columns for consistency with DistilBERT pipeline and map labels
df_raw = df_raw.rename(columns={'User Review Body': 'sentence', 'Ground Label': 'label'})

# Map 'Positive' to 1, 'Negative' to 0, and filter out 'Neutral'
label_mapping = {'Positive': 1, 'Negative': 0}
# Use .loc to avoid SettingWithCopyWarning
df_filtered = df_raw.copy()
df_filtered['label'] = df_filtered['label'].map(label_mapping)
df_filtered = df_filtered.dropna(subset=['label']) # Drop rows where mapping resulted in NaN (e.g., 'Neutral')
df_filtered.loc[:, 'label'] = df_filtered['label'].astype(int) # Convert labels to int

# Convert DataFrame to Hugging Face Dataset
dataset = Dataset.from_pandas(df_filtered)

# Cast 'label' column to ClassLabel for stratification
features = dataset.features.copy()
features['label'] = ClassLabel(names=['negative', 'positive']) # Assuming 0=negative, 1=positive
dataset = dataset.cast(features)

# Split into train and test/validation sets
# Since load_dataset("csv") would typically create a single 'train' split, we manually create train/validation splits.
train_test_split = dataset.train_test_split(test_size=0.2, stratify_by_column="label", seed=42)
dataset_dict = DatasetDict({
    'train': train_test_split['train'],
    'validation': train_test_split['test'] # Rename 'test' to 'validation' for consistency
})


tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")

def preprocess(examples):
    return tokenizer(examples["sentence"], truncation=True, padding="max_length", max_length=128)

# Apply preprocessing to the dataset dictionary
encoded_dataset = dataset_dict.map(preprocess, batched=True)

# Rename the label column to 'labels' as expected by the Trainer
encoded_dataset = encoded_dataset.rename_column("label", "labels")
encoded_dataset.set_format("torch")

train_dataset = encoded_dataset["train"]
eval_dataset = encoded_dataset["validation"]

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}


Casting the dataset:   0%|          | 0/10698 [00:00<?, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/466 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

Map:   0%|          | 0/8558 [00:00<?, ? examples/s]

Map:   0%|          | 0/2140 [00:00<?, ? examples/s]

In [None]:
#  2 Define the Optuna objective
def objective(trial):
    model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased", num_labels=2)

    # Optuna suggests hyperparameters for AdamW and training
    learning_rate = trial.suggest_float("learning_rate", 1e-6, 5e-4, log=True)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.3)
    batch_size = trial.suggest_categorical("batch_size", [32, 48])
    num_train_epochs = trial.suggest_int("num_train_epochs", 2, 3)

    training_args = TrainingArguments(
        output_dir=f"./results/{trial.number}",
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        per_device_train_batch_size=batch_size,
        num_train_epochs=num_train_epochs,
        report_to="none",
        # Removed evaluation_strategy, save_strategy, and logging_strategy due to TypeError
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    # 3 Training — internally uses AdamW optimizer
    trainer.train()
    metrics = trainer.evaluate()

    # 4 Report evaluation metric back to Optuna
    return metrics["eval_accuracy"]


In [None]:
# Cell moved and integrated into 'bvikNaI_F2-s'

In [None]:
# Cell moved and integrated into 'bvikNaI_F2-s'

In [None]:
# 5 Run Optuna study
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)


[I 2025-11-19 07:08:59,958] A new study created in memory with name: no-name-53d5d328-d673-42c5-b44a-334474ee35fe


model.safetensors:   0%|          | 0.00/542M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.5243


[I 2025-11-19 07:14:04,634] Trial 0 finished with value: 0.844392523364486 and parameters: {'learning_rate': 1.309893876522793e-06, 'weight_decay': 0.19601533485220887, 'batch_size': 32, 'num_train_epochs': 3}. Best is trial 0 with value: 0.844392523364486.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1682


[I 2025-11-19 07:19:24,566] Trial 1 finished with value: 0.9542056074766355 and parameters: {'learning_rate': 7.689822577736719e-05, 'weight_decay': 0.2459323577535405, 'batch_size': 32, 'num_train_epochs': 3}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.2798


[I 2025-11-19 07:24:37,909] Trial 2 finished with value: 0.9397196261682244 and parameters: {'learning_rate': 5.131825655383258e-06, 'weight_decay': 0.028090645424613526, 'batch_size': 48, 'num_train_epochs': 3}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.4637


[I 2025-11-19 07:29:35,320] Trial 3 finished with value: 0.8724299065420561 and parameters: {'learning_rate': 1.904495944828081e-06, 'weight_decay': 0.297525460514176, 'batch_size': 48, 'num_train_epochs': 3}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1664


[I 2025-11-19 07:33:13,021] Trial 4 finished with value: 0.9490654205607477 and parameters: {'learning_rate': 4.125790679177224e-05, 'weight_decay': 0.037025069431009756, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.5009


[I 2025-11-19 07:38:26,307] Trial 5 finished with value: 0.8864485981308411 and parameters: {'learning_rate': 1.5208606297696022e-06, 'weight_decay': 0.29608579164516036, 'batch_size': 32, 'num_train_epochs': 3}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.2721


[I 2025-11-19 07:43:34,701] Trial 6 finished with value: 0.9415887850467289 and parameters: {'learning_rate': 5.460983312075844e-06, 'weight_decay': 0.13649907604505654, 'batch_size': 48, 'num_train_epochs': 3}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.2273


[I 2025-11-19 07:47:23,011] Trial 7 finished with value: 0.9476635514018692 and parameters: {'learning_rate': 9.84151735381519e-06, 'weight_decay': 0.2651169481144133, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.2793


[I 2025-11-19 07:51:27,349] Trial 8 finished with value: 0.9355140186915888 and parameters: {'learning_rate': 5.4514102449032525e-06, 'weight_decay': 0.10926559219702543, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.2481


[I 2025-11-19 07:56:35,641] Trial 9 finished with value: 0.9411214953271028 and parameters: {'learning_rate': 6.199646772918993e-06, 'weight_decay': 0.2976990283627304, 'batch_size': 48, 'num_train_epochs': 3}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1986


[I 2025-11-19 08:00:05,180] Trial 10 finished with value: 0.9514018691588785 and parameters: {'learning_rate': 0.0002327865736261099, 'weight_decay': 0.20665467046890953, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1983


[I 2025-11-19 08:03:50,097] Trial 11 finished with value: 0.9467289719626168 and parameters: {'learning_rate': 0.0002691634924317859, 'weight_decay': 0.21766920398312126, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 1 with value: 0.9542056074766355.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1645


[I 2025-11-19 08:07:37,902] Trial 12 finished with value: 0.9546728971962617 and parameters: {'learning_rate': 0.0001545043070412705, 'weight_decay': 0.208528886844014, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 12 with value: 0.9546728971962617.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1552


[I 2025-11-19 08:11:30,510] Trial 13 finished with value: 0.9546728971962617 and parameters: {'learning_rate': 7.342226491341316e-05, 'weight_decay': 0.2426024112598425, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 12 with value: 0.9546728971962617.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1642


[I 2025-11-19 08:15:31,350] Trial 14 finished with value: 0.952803738317757 and parameters: {'learning_rate': 0.00011199666377499436, 'weight_decay': 0.1599362475680281, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 12 with value: 0.9546728971962617.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.2795


[I 2025-11-19 08:19:11,049] Trial 15 finished with value: 0.9364485981308411 and parameters: {'learning_rate': 0.00045700890671544856, 'weight_decay': 0.16892643340943464, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 12 with value: 0.9546728971962617.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1848


[I 2025-11-19 08:22:56,203] Trial 16 finished with value: 0.947196261682243 and parameters: {'learning_rate': 2.021382658220752e-05, 'weight_decay': 0.07693619714834779, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 12 with value: 0.9546728971962617.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1521


[I 2025-11-19 08:26:47,492] Trial 17 finished with value: 0.9574766355140187 and parameters: {'learning_rate': 8.172900099370682e-05, 'weight_decay': 0.25206598822672727, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 17 with value: 0.9574766355140187.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss


[I 2025-11-19 08:30:21,542] Trial 18 finished with value: 0.9495327102803738 and parameters: {'learning_rate': 2.9711857498884447e-05, 'weight_decay': 0.24453762774122134, 'batch_size': 48, 'num_train_epochs': 2}. Best is trial 17 with value: 0.9574766355140187.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss
500,0.1544


[I 2025-11-19 08:34:09,074] Trial 19 finished with value: 0.95 and parameters: {'learning_rate': 0.00013662709363357087, 'weight_decay': 0.18306457877282356, 'batch_size': 32, 'num_train_epochs': 2}. Best is trial 17 with value: 0.9574766355140187.


Best hyperparameters: {'learning_rate': 8.172900099370682e-05, 'weight_decay': 0.25206598822672727, 'batch_size': 32, 'num_train_epochs': 2}
Best validation accuracy: 0.9574766355140187


In [None]:
print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

# Initialize model with best hyperparameters
best_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased", num_labels=2)

# Set the best hyperparameters
best_training_args = TrainingArguments(
    output_dir="./best_results",
    learning_rate=study.best_params["learning_rate"],
    weight_decay=study.best_params["weight_decay"],
    per_device_train_batch_size=study.best_params["batch_size"],
    num_train_epochs=study.best_params["num_train_epochs"],
    report_to="none",
)

best_trainer = Trainer(
    model=best_model,
    args=best_training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

# Evaluate the model with the best hyperparameters
final_metrics = best_trainer.evaluate()

print("\nFinal Evaluation Metrics with Best Hyperparameters:")
for key, value in final_metrics.items():
    print(f"{key}: {value:.4f}")

Best hyperparameters: {'learning_rate': 8.172900099370682e-05, 'weight_decay': 0.25206598822672727, 'batch_size': 32, 'num_train_epochs': 2}
Best validation accuracy: 0.9574766355140187


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Final Evaluation Metrics with Best Hyperparameters:
eval_loss: 0.7095
eval_model_preparation_time: 0.0013
eval_accuracy: 0.3304
eval_precision: 0.4913
eval_recall: 0.3304
eval_f1: 0.1730
eval_runtime: 7.4039
eval_samples_per_second: 289.0350
eval_steps_per_second: 36.1970


In [None]:
# Make predictions on the evaluation set
predictions = best_trainer.predict(eval_dataset)

# Extract logits and labels from predictions
logits = predictions.predictions
labels = predictions.label_ids

# Compute all metrics using the compute_metrics function
calculated_metrics = compute_metrics((logits, labels))

print("\nExplicitly Calculated Metrics on Evaluation Set:")
print(f"Accuracy: {calculated_metrics['accuracy']:.4f}")
print(f"Precision: {calculated_metrics['precision']:.4f}")
print(f"Recall: {calculated_metrics['recall']:.4f}")
print(f"F1 Score: {calculated_metrics['f1']:.4f}")

# Print the loss from the final evaluation metrics (if available)
if 'eval_loss' in final_metrics:
    print(f"Loss: {final_metrics['eval_loss']:.4f}")
else:
    print("Loss not available directly from final_metrics. Please refer to eval_loss in previous output.")


Explicitly Calculated Metrics on Evaluation Set:
Accuracy: 0.3304
Precision: 0.4913
Recall: 0.3304
F1 Score: 0.1730
Loss: 0.7095


In [None]:
print("Best hyperparameters:", study.best_params)
print("Best validation accuracy:", study.best_value)

Best hyperparameters: {'learning_rate': 8.172900099370682e-05, 'weight_decay': 0.25206598822672727, 'batch_size': 32, 'num_train_epochs': 2}
Best validation accuracy: 0.9574766355140187


In [None]:
print("DistilBERT Metrics from calculated_metrics:")
print(f"Accuracy: {calculated_metrics['accuracy']:.4f}")
print(f"Precision: {calculated_metrics['precision']:.4f}")
print(f"Recall: {calculated_metrics['recall']:.4f}")
print(f"F1 Score: {calculated_metrics['f1']:.4f}")

DistilBERT Metrics from calculated_metrics:
Accuracy: 0.3304
Precision: 0.4913
Recall: 0.3304
F1 Score: 0.1730


## Corrected DistilBERT Model Evaluation



In [None]:
import torch
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

# Ensure the best hyperparameters are available from the Optuna study
if 'study' not in globals() or study is None:
    print("Error: Optuna study not found. Please run the Optuna optimization cells first.")
else:
    print("Best hyperparameters:", study.best_params)
    print("Best validation accuracy from Optuna study:", study.best_value)

    # Initialize model with best hyperparameters
    # It's crucial to initialize a new model for training if the previous one was used in the study.
    model_to_train = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased", num_labels=2)

    # Set the best hyperparameters
    best_training_args = TrainingArguments(
        output_dir="./best_model_trained_results", # Changed output directory to avoid conflicts
        learning_rate=study.best_params["learning_rate"],
        weight_decay=study.best_params["weight_decay"],
        per_device_train_batch_size=study.best_params["batch_size"],
        num_train_epochs=study.best_params["num_train_epochs"],
        report_to="none",
        # Optional: Add logging/evaluation strategy if needed, but keep it minimal for re-evaluation
        # evaluation_strategy="epoch",
        # save_strategy="epoch",
        # load_best_model_at_end=True, # Requires evaluation strategy
    )

    best_trainer_retrained = Trainer(
        model=model_to_train,
        args=best_training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
    )

    # Crucially, TRAIN the model with the best hyperparameters
    print("\nTraining DistilBERT model with best hyperparameters...")
    best_trainer_retrained.train()
    print("Training complete.\n")

    # Evaluate the retrained model on the evaluation set
    final_retrained_metrics = best_trainer_retrained.evaluate()

    print("\nFinal Evaluation Metrics for **Retrained** DistilBERT Model with Best Hyperparameters:")
    for key, value in final_retrained_metrics.items():
        print(f"{key}: {value:.4f}")

    # Make predictions on the evaluation set to get precision, recall, f1 separately
    predictions_retrained = best_trainer_retrained.predict(eval_dataset)
    logits_retrained = predictions_retrained.predictions
    labels_retrained = predictions_retrained.label_ids

    calculated_metrics_retrained = compute_metrics((logits_retrained, labels_retrained))

    print("\nExplicitly Calculated Metrics for Retrained DistilBERT on Evaluation Set:")
    print(f"Accuracy: {calculated_metrics_retrained['accuracy']:.4f}")
    print(f"Precision: {calculated_metrics_retrained['precision']:.4f}")
    print(f"Recall: {calculated_metrics_retrained['recall']:.4f}")
    print(f"F1 Score: {calculated_metrics_retrained['f1']:.4f}")

    # Save the retrained model
    model_to_train.save_pretrained("./best_distilbert_model")
    tokenizer.save_pretrained("./best_distilbert_model")
    print("\nRetrained DistilBERT model and tokenizer saved to './best_distilbert_model'")


Best hyperparameters: {'learning_rate': 8.172900099370682e-05, 'weight_decay': 0.25206598822672727, 'batch_size': 32, 'num_train_epochs': 2}
Best validation accuracy from Optuna study: 0.9574766355140187


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Training DistilBERT model with best hyperparameters...


Step,Training Loss
500,0.1715


Training complete.




Final Evaluation Metrics for **Retrained** DistilBERT Model with Best Hyperparameters:
eval_loss: 0.1560
eval_accuracy: 0.9486
eval_precision: 0.9486
eval_recall: 0.9486
eval_f1: 0.9482
eval_runtime: 7.4627
eval_samples_per_second: 286.7600
eval_steps_per_second: 35.9120
epoch: 2.0000

Explicitly Calculated Metrics for Retrained DistilBERT on Evaluation Set:
Accuracy: 0.9486
Precision: 0.9486
Recall: 0.9486
F1 Score: 0.9482

Retrained DistilBERT model and tokenizer saved to './best_distilbert_model'


## Sentiment Prediction with DistilBERT

This code block defines a function `predict_sentiment` that takes a text input and uses the retrained DistilBERT model to classify its sentiment. Since the model was originally trained on binary (positive/negative) labels, a heuristic is applied to infer 'neutral' sentiment:

*   If the model's highest predicted probability for either positive or negative is below a certain `confidence_threshold` (e.g., 0.6), the sentiment is classified as 'Neutral'.
*   Otherwise, the sentiment is classified as 'Positive' or 'Negative' based on the highest probability.

This approach provides an estimation for 'neutral' as the model was not explicitly trained on a 'neutral' class. For more accurate 'neutral' predictions, the model would need to be re-trained with a dataset that includes a dedicated 'neutral' class.

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

# Load the saved tokenizer and model
model_path = "./best_distilbert_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Ensure the model is in evaluation mode
model.eval()

def predict_sentiment(text, confidence_threshold=0.6):
    # Tokenize input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

    # Make prediction
    with torch.no_grad():
        outputs = model(**inputs)

    # Get probabilities by applying softmax to logits
    probabilities = F.softmax(outputs.logits, dim=-1)[0].tolist()

    # Assuming 0: Negative, 1: Positive based on previous mapping
    negative_prob = probabilities[0]
    positive_prob = probabilities[1]

    # Determine sentiment based on probabilities and threshold
    if positive_prob > confidence_threshold and positive_prob > negative_prob:
        sentiment = "Positive"
    elif negative_prob > confidence_threshold and negative_prob > positive_prob:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return sentiment, {"Negative_prob": negative_prob, "Positive_prob": positive_prob}

# Example usage:
text1 = "This product is absolutely fantastic! I love it."
sentiment1, probs1 = predict_sentiment(text1)
print(f"Text: '{text1}'\nSentiment: {sentiment1}, Probabilities: {probs1}\n")

text2 = "I am very disappointed with this service, it was terrible."
sentiment2, probs2 = predict_sentiment(text2)
print(f"Text: '{text2}'\nSentiment: {sentiment2}, Probabilities: {probs2}\n")

text3 = "The product is okay, nothing special, just average."
sentiment3, probs3 = predict_sentiment(text3)
print(f"Text: '{text3}'\nSentiment: {sentiment3}, Probabilities: {probs3}\n")

text4 = "This is neither good nor bad, just a product."
sentiment4, probs4 = predict_sentiment(text4)
print(f"Text: '{text4}'\nSentiment: {sentiment4}, Probabilities: {probs4}\n")


## Interactive Sentiment Prediction

Use the input box below to enter text and see the DistilBERT model's sentiment prediction (Positive, Negative, or Neutral). Type `quit` to stop the interactive session.

In [None]:
while True:
    user_input = input("\nEnter text (type 'quit' to exit): ")
    if user_input.lower() == 'quit':
        print("Exiting interactive prediction.")
        break

    sentiment, probabilities = predict_sentiment(user_input)
    print(f"Sentiment: {sentiment}, Probabilities: {probabilities}")

In [None]:
import pandas as pd
from datasets import Dataset, DatasetDict, ClassLabel
from transformers import AutoTokenizer

# 1. Identify and load the authenticity-labeled dataset
csv_file_path = 'Philippine_Business_TrustPilot_Reviews_Labeled.csv'
df = pd.read_csv(csv_file_path, encoding='ISO-8859-1')

# 2. Rename the column containing the text content to 'sentence' and labels to 'label'
df = df.rename(columns={'User Review Body': 'sentence', 'Ground Label': 'label'})

# 3. Define a label mapping and apply it. 'Positive' to 1, 'Negative' to 0, filter out 'Neutral'
label_mapping = {'Positive': 1, 'Negative': 0}
df['label'] = df['label'].map(label_mapping)

# 4. Remove rows where 'sentence' is empty or 'label' could not be mapped
df = df.dropna(subset=['label', 'sentence']) # Drop rows where mapping resulted in NaN (e.g., 'Neutral')
df = df[df['sentence'].astype(str).str.strip() != ''] # Remove rows with empty sentences
df['label'] = df['label'].astype(int) # Convert labels to int after dropping NaNs

print(f"Dataset shape after initial cleaning and label mapping: {df.shape}")
print("Label distribution after cleaning:")
print(df['label'].value_counts())

# 5. Convert preprocessed DataFrame into a Hugging Face `Dataset` object
dataset = Dataset.from_pandas(df)

# 6. Cast the 'label' column to ClassLabel
# Assuming 0: Negative, 1: Positive based on previous mapping
features = dataset.features.copy()
features['label'] = ClassLabel(names=['negative', 'positive'])
dataset = dataset.cast(features)

# 7. Split the `Dataset` into training and validation sets
train_test_split = dataset.train_test_split(test_size=0.2, stratify_by_column="label", seed=42)
dataset_dict = DatasetDict({
    'train': train_test_split['train'],
    'validation': train_test_split['test'] # Rename 'test' to 'validation' for consistency
})

print(f"Train dataset size: {len(dataset_dict['train'])}")
print(f"Validation dataset size: {len(dataset_dict['validation'])}")

# 8. Initialize the `AutoTokenizer`
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")

# 9. Define a preprocessing function and apply it
def preprocess_function(examples):
    return tokenizer(examples["sentence"], truncation=True, padding="max_length", max_length=128)

encoded_dataset = dataset_dict.map(preprocess_function, batched=True)

# 10. Rename the 'label' column to 'labels' and set the format to 'torch'
encoded_dataset = encoded_dataset.rename_column("label", "labels")
encoded_dataset.set_format("torch")

print("Preprocessing complete. Encoded dataset example:")
print(encoded_dataset["train"][0])

## Train DistilBERT for Authenticity Classification

### Subtask:
Initialize a new DistilBERT model and fine-tune it on the authenticity-labeled dataset using the best hyperparameters found by Optuna.


In [None]:
import torch
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

# Ensure the best hyperparameters are available from the Optuna study
if 'study' not in globals() or study is None:
    print("Error: Optuna study not found. Please run the Optuna optimization cells first.")
else:
    print("Best hyperparameters from Optuna study:", study.best_params)
    print("Best validation accuracy from Optuna study:", study.best_value)

    # 1. Initialize AutoModelForSequenceClassification
    # It's crucial to initialize a new model for training, as the Optuna study models were transient.
    model_for_authenticity_classification = AutoModelForSequenceClassification.from_pretrained(
        "distilbert-base-uncased",
        num_labels=2 # Assuming binary classification: Original vs. Computer-Generated
    )

    # 2. Create a TrainingArguments object using best hyperparameters
    best_training_args_authenticity = TrainingArguments(
        output_dir="./distilbert_authenticity_fine_tuned_model", # New output directory for this task
        learning_rate=study.best_params["learning_rate"],
        weight_decay=study.best_params["weight_decay"],
        per_device_train_batch_size=study.best_params["batch_size"],
        num_train_epochs=study.best_params["num_train_epochs"],
        report_to="none"
    )

    # 3. Instantiate a Trainer object
    authenticity_trainer = Trainer(
        model=model_for_authenticity_classification,
        args=best_training_args_authenticity,
        train_dataset=encoded_dataset["train"], # Use the preprocessed authenticity train dataset
        eval_dataset=encoded_dataset["validation"], # Use the preprocessed authenticity validation dataset
        compute_metrics=compute_metrics,
    )

    # 4. Call the train() method to start fine-tuning
    print("\nStarting fine-tuning of DistilBERT for authenticity classification...")
    authenticity_trainer.train()
    print("Fine-tuning complete.\n")

    # Evaluate the fine-tuned model
    authenticity_metrics = authenticity_trainer.evaluate()

    print("\nEvaluation Metrics for Fine-tuned DistilBERT on Authenticity Dataset:")
    for key, value in authenticity_metrics.items():
        print(f"{key}: {value:.4f}")

    # Save the fine-tuned model and tokenizer
    authenticity_trainer.save_model("./distilbert_authenticity_fine_tuned_model")
    tokenizer.save_pretrained("./distilbert_authenticity_fine_tuned_model")
    print("\nFine-tuned DistilBERT model and tokenizer for authenticity saved to './distilbert_authenticity_fine_tuned_model'")