# Fine tuning a BERT model using a labeled Amazon Electronics review dataset

In this fine tuning, I used an already labeled dataset in Kaggle. https://www.kaggle.com/datasets/jimtsiobikas/amazon-reviews-2018-electronics 
This dataset contains the reviewText and all these texts are classified as POSITIVE, NEGATIVE or NEUTRAL.
This dataset have two csv files and here I use the 60k csv file.
Due to computational power constraints, here I chose a random sample of 1.5k data samples with 0.5k reviews from each target class.

# Hyper parameter tuning

### Preparing the dataset to fine tune 

In [10]:
import pandas as pd

df = pd.read_csv('/kaggle/input/amazon-reviews-2018-electronics/labeled_electronics_dataset_60k.csv')
df.head()

Unnamed: 0,overall,vote,verified,reviewTime,reviewText,summary,Label
0,5,0,True,2013-11-14,Have had this approx 6 months now - is much fa...,Great NAS Unit,POSITIVE
1,5,0,True,2013-04-23,I bought my first camera bag with my camera (a...,Great Bag,POSITIVE
2,4,0,True,2017-05-31,I use this case for a Panasonic Lumix ZS50 cam...,Decent Case for a Good Price -- With Some Caveats,POSITIVE
3,1,0,True,2014-11-25,Made no difference to my Sony camcorder.,One Star,NEGATIVE
4,5,0,True,2015-02-21,Raspbmc users will love this!,1424476800,POSITIVE


In [11]:
cols = ['reviewText','Label']
df = df[cols]
df = df.rename(columns = {'reviewText':'text'})
df.head()

Unnamed: 0,text,Label
0,Have had this approx 6 months now - is much fa...,POSITIVE
1,I bought my first camera bag with my camera (a...,POSITIVE
2,I use this case for a Panasonic Lumix ZS50 cam...,POSITIVE
3,Made no difference to my Sony camcorder.,NEGATIVE
4,Raspbmc users will love this!,POSITIVE


In [12]:
# Using only 1500 records from the dataset, containing 500 reviews from each sentiment
df_selected = df.groupby('Label', group_keys=False).apply(lambda x: x.sample(min(len(x), 500)))

# Reset index to clean up the dataframe
df_selected = df_selected.reset_index(drop=True)

# Converting NEGATIVE, NEUTRAL and POSITIVE labels to 0, 1 and 2 respectively.
df_selected['Label'] = df_selected['Label'].map({'NEGATIVE': 0, 'NEUTRAL': 1, 'POSITIVE': 2})

df_selected

  df_selected = df.groupby('Label', group_keys=False).apply(lambda x: x.sample(min(len(x), 500)))


Unnamed: 0,text,Label
0,Does not deliver.,0
1,This unit would not work with chromecast. It ...,0
2,The problem is not precisely the discs themsel...,0
3,After hearing and listening to reviews and vid...,0
4,Failed after 10 months,0
...,...,...
1495,Good Simple Speakers At A Good Price,2
1496,Small and Easy to Use,2
1497,Fits perfect,2
1498,Very nice range - 24-100mm. Like the clutch fo...,2


### Saving the prepared dataset

In [13]:
df_selected.to_csv('/kaggle/working/selected.csv')

### Loading the model, data and tuning hyper-parameters

In [8]:
import optuna
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset

def objective(trial):
    # Define the hyperparameters to tune
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 5e-5, log=True)
    batch_size = trial.suggest_categorical('batch_size', [4, 8, 16])
    num_train_epochs = trial.suggest_int('num_train_epochs', 2, 5)  # Reduced max epochs
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-2, log=True)
    gradient_accumulation_steps = trial.suggest_int('gradient_accumulation_steps', 1, 4)

    model_name = 'bert-base-uncased'
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    dataset_path = '/kaggle/working/selected.csv'
    # Load the CSV file as a dataset
    dataset = load_dataset('csv', data_files=dataset_path)
    
    # Since the dataset is loaded with a single key, we access it with 'train'
    dataset = dataset['train']

    # Split the dataset into train and test sets
    split_dataset = dataset.train_test_split(test_size=0.2)
    
    # Access the splits
    train_dataset = split_dataset['train']
    eval_dataset = split_dataset['test']

    def preprocess_function(examples):
        inputs = tokenizer(examples['text'], truncation=True, padding='max_length')
        inputs['labels'] = examples['Label']
        return inputs
    
    train_dataset = train_dataset.map(preprocess_function, batched=True)
    eval_dataset = eval_dataset.map(preprocess_function, batched=True)
    
    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    eval_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
    
    args = TrainingArguments(
        output_dir='./results',
        eval_strategy="epoch",
        save_strategy="epoch",
        learning_rate=learning_rate,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=num_train_epochs,
        weight_decay=weight_decay,
        save_total_limit=1,
        metric_for_best_model='accuracy',
        load_best_model_at_end=True,
        fp16=True,  # Enable mixed precision training
        gradient_accumulation_steps=gradient_accumulation_steps,
    )
    
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics
    )
    
    trainer.train()
    eval_result = trainer.evaluate()
    return eval_result['eval_accuracy']

from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)


[I 2024-07-09 04:10:41,622] A new study created in memory with name: no-name-d5044e11-0bfa-4954-9ab7-7040de9b14f0
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1200 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
0,No log,1.069563,0.43,0.516911,0.47013,0.414059
1,No log,1.026562,0.483333,0.540908,0.479186,0.467584
2,No log,0.965326,0.606667,0.610569,0.608747,0.609287
3,No log,0.948658,0.62,0.631711,0.618774,0.62323




[I 2024-07-09 04:16:43,531] Trial 0 finished with value: 0.62 and parameters: {'learning_rate': 1.1479499103671342e-05, 'batch_size': 8, 'num_train_epochs': 4, 'weight_decay': 3.536692045001775e-05, 'gradient_accumulation_steps': 4}. Best is trial 0 with value: 0.62.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/1200 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.759314,0.67,0.699338,0.669546,0.675384
2,No log,0.737977,0.713333,0.720848,0.709718,0.712101
3,No log,0.728723,0.7,0.711055,0.699009,0.702129




[I 2024-07-09 04:20:57,578] Trial 1 finished with value: 0.7133333333333334 and parameters: {'learning_rate': 3.656913249721955e-05, 'batch_size': 4, 'num_train_epochs': 3, 'weight_decay': 0.0009587435553273778, 'gradient_accumulation_steps': 2}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/300 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
0,No log,1.059446,0.49,0.488125,0.485704,0.483944
1,No log,1.030336,0.5,0.497608,0.497479,0.496752




[I 2024-07-09 04:23:18,799] Trial 2 finished with value: 0.5 and parameters: {'learning_rate': 1.1491064712924295e-05, 'batch_size': 8, 'num_train_epochs': 2, 'weight_decay': 6.747909306857522e-06, 'gradient_accumulation_steps': 4}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.723392,0.67,0.674405,0.66646,0.668984
2,No log,0.77096,0.7,0.706121,0.69702,0.699806




[I 2024-07-09 04:26:15,491] Trial 3 finished with value: 0.7 and parameters: {'learning_rate': 4.253132899789529e-05, 'batch_size': 4, 'num_train_epochs': 2, 'weight_decay': 0.0015810486175104057, 'gradient_accumulation_steps': 1}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
0,No log,1.04187,0.526667,0.529206,0.522242,0.52168
1,No log,1.014362,0.56,0.573123,0.556834,0.558175




[I 2024-07-09 04:28:36,540] Trial 4 finished with value: 0.56 and parameters: {'learning_rate': 1.3302023688773855e-05, 'batch_size': 8, 'num_train_epochs': 2, 'weight_decay': 0.0008195787205256619, 'gradient_accumulation_steps': 4}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.043436,0.536667,0.540304,0.530646,0.5264
2,No log,1.00495,0.566667,0.563739,0.560594,0.557567
3,No log,0.989148,0.56,0.544124,0.552399,0.543174




[I 2024-07-09 04:32:04,034] Trial 5 finished with value: 0.5666666666666667 and parameters: {'learning_rate': 1.0615549502952573e-05, 'batch_size': 16, 'num_train_epochs': 3, 'weight_decay': 1.3266096412488503e-05, 'gradient_accumulation_steps': 2}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.960336,0.536667,0.511076,0.528725,0.503747
2,No log,0.848983,0.62,0.602821,0.611914,0.601385
3,No log,0.82392,0.63,0.623028,0.625289,0.621048
4,No log,0.795839,0.633333,0.613884,0.625241,0.61353
5,No log,0.788727,0.663333,0.653645,0.658024,0.65387




[I 2024-07-09 04:38:55,462] Trial 6 finished with value: 0.6633333333333333 and parameters: {'learning_rate': 1.4100424038183036e-05, 'batch_size': 4, 'num_train_epochs': 5, 'weight_decay': 0.0010351300755642454, 'gradient_accumulation_steps': 3}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.93048,0.58,0.647841,0.583923,0.585671
2,No log,0.733714,0.683333,0.690366,0.680644,0.683341
3,No log,0.750346,0.7,0.708641,0.698763,0.700651
4,No log,0.803367,0.706667,0.723298,0.705964,0.709528
5,No log,0.815379,0.686667,0.688113,0.683579,0.685321




[I 2024-07-09 04:44:59,580] Trial 7 finished with value: 0.7066666666666667 and parameters: {'learning_rate': 2.014799787126068e-05, 'batch_size': 8, 'num_train_epochs': 5, 'weight_decay': 2.254259309194505e-06, 'gradient_accumulation_steps': 1}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,1.05288,0.446667,0.528743,0.454042,0.427563
2,No log,0.887336,0.603333,0.622826,0.603917,0.602584
3,No log,0.843854,0.643333,0.661889,0.642909,0.646682
4,No log,0.815726,0.663333,0.668651,0.660853,0.663437




[I 2024-07-09 04:49:45,118] Trial 8 finished with value: 0.6633333333333333 and parameters: {'learning_rate': 2.139071590857557e-05, 'batch_size': 8, 'num_train_epochs': 4, 'weight_decay': 0.00011054333259594344, 'gradient_accumulation_steps': 3}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.841525,0.65,0.644709,0.643571,0.64082
2,No log,0.755032,0.696667,0.688947,0.691502,0.689418




[I 2024-07-09 04:52:06,998] Trial 9 finished with value: 0.6966666666666667 and parameters: {'learning_rate': 3.462697320271094e-05, 'batch_size': 16, 'num_train_epochs': 2, 'weight_decay': 9.130901551907551e-06, 'gradient_accumulation_steps': 1}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.7657,0.666667,0.691954,0.667497,0.671554
2,No log,0.734898,0.696667,0.707663,0.693834,0.696546
3,No log,0.765054,0.676667,0.678983,0.673809,0.675745




[I 2024-07-09 04:56:17,154] Trial 10 finished with value: 0.6966666666666667 and parameters: {'learning_rate': 3.0516483538413452e-05, 'batch_size': 4, 'num_train_epochs': 3, 'weight_decay': 0.006578914432965138, 'gradient_accumulation_steps': 2}. Best is trial 1 with value: 0.7133333333333334.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.797924,0.633333,0.646907,0.632042,0.635921
2,No log,0.744568,0.696667,0.70352,0.695337,0.697493
3,No log,0.766105,0.72,0.734847,0.718165,0.72105
4,0.635200,0.881057,0.703333,0.701631,0.700326,0.700755
5,0.635200,0.980247,0.696667,0.699577,0.694966,0.696145




[I 2024-07-09 05:03:23,441] Trial 11 finished with value: 0.72 and parameters: {'learning_rate': 2.0826976349749076e-05, 'batch_size': 4, 'num_train_epochs': 5, 'weight_decay': 1.6916972103647168e-06, 'gradient_accumulation_steps': 1}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.827679,0.673333,0.69223,0.673103,0.677178
2,No log,0.744511,0.686667,0.693657,0.683567,0.685437
3,No log,0.746561,0.706667,0.704177,0.703381,0.703552
4,No log,0.779843,0.71,0.712068,0.707664,0.70917




[I 2024-07-09 05:08:53,982] Trial 12 finished with value: 0.71 and parameters: {'learning_rate': 2.850022220026283e-05, 'batch_size': 4, 'num_train_epochs': 4, 'weight_decay': 0.00015822018094287406, 'gradient_accumulation_steps': 2}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.758239,0.686667,0.696277,0.685065,0.688363
2,No log,0.85112,0.68,0.694356,0.675858,0.679457
3,No log,0.943539,0.693333,0.721165,0.694009,0.698793




[I 2024-07-09 05:13:12,970] Trial 13 finished with value: 0.6933333333333334 and parameters: {'learning_rate': 4.591095160998311e-05, 'batch_size': 4, 'num_train_epochs': 3, 'weight_decay': 1.6746572752506658e-06, 'gradient_accumulation_steps': 1}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.86482,0.593333,0.572281,0.584905,0.573074
2,No log,0.776956,0.633333,0.63521,0.627698,0.628558
3,No log,0.731661,0.683333,0.684177,0.679912,0.681587
4,No log,0.732464,0.696667,0.692958,0.692365,0.692626
5,No log,0.746916,0.686667,0.684552,0.682967,0.683541




[I 2024-07-09 05:20:03,772] Trial 14 finished with value: 0.6966666666666667 and parameters: {'learning_rate': 1.7585035670021412e-05, 'batch_size': 4, 'num_train_epochs': 5, 'weight_decay': 0.0003327332013322374, 'gradient_accumulation_steps': 2}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.911437,0.56,0.634485,0.561618,0.544012
2,No log,0.751941,0.686667,0.692891,0.683573,0.686223
3,No log,0.745417,0.696667,0.707585,0.695457,0.698607
4,No log,0.761108,0.713333,0.714519,0.711096,0.711683




[I 2024-07-09 05:25:29,584] Trial 15 finished with value: 0.7133333333333334 and parameters: {'learning_rate': 2.625564726740623e-05, 'batch_size': 4, 'num_train_epochs': 4, 'weight_decay': 0.004972972074366428, 'gradient_accumulation_steps': 3}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.846298,0.61,0.59409,0.603007,0.596956
2,No log,0.737816,0.706667,0.702802,0.702883,0.70283
3,No log,0.723683,0.696667,0.690721,0.692622,0.691448




[I 2024-07-09 05:28:58,724] Trial 16 finished with value: 0.7066666666666667 and parameters: {'learning_rate': 3.62767183974613e-05, 'batch_size': 16, 'num_train_epochs': 3, 'weight_decay': 3.094148561059414e-05, 'gradient_accumulation_steps': 1}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.870172,0.606667,0.57454,0.595134,0.567055
2,No log,0.793034,0.646667,0.631491,0.637808,0.627862
3,No log,0.751123,0.66,0.660117,0.65633,0.657357
4,No log,0.74207,0.696667,0.695977,0.693605,0.694515
5,No log,0.75138,0.71,0.71532,0.708287,0.709965




[I 2024-07-09 05:35:49,439] Trial 17 finished with value: 0.71 and parameters: {'learning_rate': 1.6980640349864236e-05, 'batch_size': 4, 'num_train_epochs': 5, 'weight_decay': 1.012692234850736e-06, 'gradient_accumulation_steps': 2}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.753883,0.67,0.67238,0.667088,0.669045
2,No log,0.759259,0.713333,0.70971,0.710233,0.708667
3,No log,0.829193,0.696667,0.705207,0.694714,0.697958
4,0.607900,0.953222,0.686667,0.687343,0.684082,0.685254




[I 2024-07-09 05:41:31,123] Trial 18 finished with value: 0.7133333333333334 and parameters: {'learning_rate': 2.5033923850550347e-05, 'batch_size': 4, 'num_train_epochs': 4, 'weight_decay': 0.00033485598494455913, 'gradient_accumulation_steps': 1}. Best is trial 11 with value: 0.72.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.788092,0.666667,0.691792,0.663634,0.665994
2,No log,0.724573,0.696667,0.707384,0.69372,0.697521
3,No log,0.765732,0.703333,0.712283,0.701686,0.704905




[I 2024-07-09 05:45:40,164] Trial 19 finished with value: 0.7033333333333334 and parameters: {'learning_rate': 3.816459268604907e-05, 'batch_size': 4, 'num_train_epochs': 3, 'weight_decay': 0.0025262339383209406, 'gradient_accumulation_steps': 2}. Best is trial 11 with value: 0.72.


### Checking for the best set of hyperparameters

In [9]:
print("Best trial:")
trial = study.best_trial
print(f"  Accuracy: {trial.value}")
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

Best trial:
  Accuracy: 0.72
  Params: 
    learning_rate: 2.0826976349749076e-05
    batch_size: 4
    num_train_epochs: 5
    weight_decay: 1.6916972103647168e-06
    gradient_accumulation_steps: 1


### Training the model with selected best set of hyperparameters

In [14]:
best_params = {
    'learning_rate' : 2.0826976349749076e-05,
    'batch_size' : 4,
    'num_train_epochs' : 5,
    'weight_decay' : 1.6916972103647168e-06,
    'gradient_accumulation_steps' : 1
}
model_name = 'bert-base-uncased'
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
tokenizer = AutoTokenizer.from_pretrained(model_name)


dataset_path = '/kaggle/working/selected.csv'
# Load the CSV file as a dataset
dataset = load_dataset('csv', data_files=dataset_path)
    
# Since the dataset is loaded with a single key, we access it with 'train'
dataset = dataset['train']

# Split the dataset into train and test sets
split_dataset = dataset.train_test_split(test_size=0.2)
    
# Access the splits
train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']

def preprocess_function(examples):
    inputs = tokenizer(examples['text'], truncation=True, padding='max_length')
    inputs['labels'] = examples['Label']
    return inputs
    
train_dataset = train_dataset.map(preprocess_function, batched=True)
eval_dataset = eval_dataset.map(preprocess_function, batched=True)
    
# Set format for PyTorch
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
eval_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    accuracy = accuracy_score(labels, preds)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='macro')
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

args = TrainingArguments(
    output_dir='./results',
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=best_params['learning_rate'],
    per_device_train_batch_size=best_params['batch_size'],
    per_device_eval_batch_size=best_params['batch_size'],
    num_train_epochs=best_params['num_train_epochs'],
    weight_decay=best_params['weight_decay'],
    save_total_limit=1,
    metric_for_best_model='accuracy',
    load_best_model_at_end=True,
)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics
)

trainer.train()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1200 [00:00<?, ? examples/s]

Map:   0%|          | 0/300 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,No log,0.750022,0.68,0.668299,0.665951,0.655259
2,No log,0.670335,0.743333,0.736191,0.73956,0.734988
3,No log,0.770269,0.73,0.734226,0.7315,0.730262
4,0.614700,0.920943,0.733333,0.739932,0.736341,0.734422
5,0.614700,1.016352,0.73,0.729584,0.728029,0.72858




TrainOutput(global_step=750, training_loss=0.4631092681884766, metrics={'train_runtime': 458.9921, 'train_samples_per_second': 13.072, 'train_steps_per_second': 1.634, 'total_flos': 1578680506368000.0, 'train_loss': 0.4631092681884766, 'epoch': 5.0})

### Fine-tuned model evaluation

In [15]:
trainer.evaluate()



{'eval_loss': 0.670335054397583,
 'eval_accuracy': 0.7433333333333333,
 'eval_precision': 0.7361908666877257,
 'eval_recall': 0.7395597618539339,
 'eval_f1': 0.7349884325429757,
 'eval_runtime': 6.571,
 'eval_samples_per_second': 45.655,
 'eval_steps_per_second': 5.783,
 'epoch': 5.0}

In [16]:
trainer.save_model('/kaggle/working/models/bert-finetuned')

### Saving the fine-tuned model

In [18]:
model_save_path = './my_pretrained_model'
model.save_pretrained(model_save_path)
tokenizer.save_pretrained(model_save_path)

('./my_pretrained_model/tokenizer_config.json',
 './my_pretrained_model/special_tokens_map.json',
 './my_pretrained_model/vocab.txt',
 './my_pretrained_model/added_tokens.json',
 './my_pretrained_model/tokenizer.json')

In [19]:
import shutil
shutil.make_archive('/kaggle/working/my_pretrained_model', 'zip', '/kaggle/working/my_pretrained_model')

'/kaggle/working/my_pretrained_model.zip'