# BERT

## Importing the necessory libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch
from datasets import Dataset

  from .autonotebook import tqdm as notebook_tqdm


### Reading the data

In [None]:
df = pd.read_csv('compiled_output.csv')  

# Step 2: Remove rows with empty or irrelevant content
df = df[df['Text'].str.strip().notna()]  # Remove rows with empty strings
df = df[df['Text'].str.strip() != '']  # Remove rows with only whitespace
df = df[~df['Text'].str.contains(r'\[.*\]')]  # Remove rows with non-textual content like '[2]'

In [None]:

# Step 3: Map labels to integers for 3 classes (Adjust this based on your exact label names)
label_mapping = {
    'background_claim': 0,  # Example: background_claim -> 0
    'own_claim': 1,         # Example: own_claim -> 1
    'data': 2               # Example: data -> 2
}
df['Label'] = df['Label'].map(label_mapping)

# Step 4: Split the dataset into train and test sets
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['Text'].tolist(), df['Label'].tolist(), test_size=0.3, random_state=42
)

# Step 5: Tokenize the texts using BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize texts
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128)
test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=128)

# Convert to Dataset format
train_dataset = Dataset.from_dict({
    'input_ids': train_encodings['input_ids'],
    'attention_mask': train_encodings['attention_mask'],
    'labels': train_labels
})

test_dataset = Dataset.from_dict({
    'input_ids': test_encodings['input_ids'],
    'attention_mask': test_encodings['attention_mask'],
    'labels': test_labels
})

# Step 6: Load pre-trained BERT model for sequence classification (with 3 labels)
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)

# Step 7: Training arguments
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=1,              # number of training epochs
    per_device_train_batch_size=8,   # batch size for training
    per_device_eval_batch_size=16,   # batch size for evaluation
    warmup_steps=500,                # number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    logging_steps=10,
    evaluation_strategy="epoch",     # Evaluate after each epoch
    save_strategy="epoch",           # Save model after each epoch
)

# Step 8: Compute metrics function for multi-class classification
def compute_metrics(p):
    predictions = p.predictions.argmax(axis=-1)  # Convert logits to class predictions
    accuracy = accuracy_score(p.label_ids, predictions)
    return {'accuracy': accuracy}

# Step 9: Trainer setup
trainer = Trainer(
    model=model,                         # the model to train
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=test_dataset,           # evaluation dataset
    compute_metrics=compute_metrics      # pass the compute metrics function
)

# Step 10: Train the model
trainer.train()

# Step 11: Evaluate the model
eval_results = trainer.evaluate()

print("\nEvaluation Results:")
print(eval_results)

# Step 12: Make predictions on the test set
test_preds = trainer.predict(test_dataset)
test_preds_labels = test_preds.predictions.argmax(axis=-1)

# Step 13: Accuracy and Classification Report
accuracy = accuracy_score(test_labels, test_preds_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

print("\nClassification Report:")
print(classification_report(test_labels, test_preds_labels))


  from .autonotebook import tqdm as notebook_tqdm
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  1%|          | 10/1178 [02:37<4:58:53, 15.35s/it]

{'loss': 1.1166, 'grad_norm': 6.849336624145508, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.01}


  2%|▏         | 20/1178 [05:03<4:38:02, 14.41s/it]

{'loss': 1.1446, 'grad_norm': 9.413037300109863, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.02}


  3%|▎         | 30/1178 [07:30<4:43:25, 14.81s/it]

{'loss': 1.119, 'grad_norm': 7.555083274841309, 'learning_rate': 3e-06, 'epoch': 0.03}


  3%|▎         | 40/1178 [10:00<4:44:49, 15.02s/it]

{'loss': 1.0878, 'grad_norm': 6.982838153839111, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.03}


  4%|▍         | 50/1178 [11:48<2:32:17,  8.10s/it]

{'loss': 1.1144, 'grad_norm': 9.624858856201172, 'learning_rate': 5e-06, 'epoch': 0.04}


  5%|▌         | 60/1178 [12:38<1:34:57,  5.10s/it]

{'loss': 1.0682, 'grad_norm': 6.960295677185059, 'learning_rate': 6e-06, 'epoch': 0.05}


  6%|▌         | 70/1178 [13:45<2:41:56,  8.77s/it]

{'loss': 1.0316, 'grad_norm': 8.424686431884766, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.06}


  7%|▋         | 80/1178 [16:04<4:07:11, 13.51s/it]

{'loss': 0.9555, 'grad_norm': 6.689379692077637, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.07}


  8%|▊         | 90/1178 [18:10<4:12:04, 13.90s/it]

{'loss': 0.8832, 'grad_norm': 9.39930534362793, 'learning_rate': 9e-06, 'epoch': 0.08}


  8%|▊         | 100/1178 [20:24<3:07:38, 10.44s/it]

{'loss': 0.8456, 'grad_norm': 5.790355205535889, 'learning_rate': 1e-05, 'epoch': 0.08}


  9%|▉         | 110/1178 [22:20<3:46:27, 12.72s/it]

{'loss': 0.7929, 'grad_norm': 7.180288791656494, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.09}


 10%|█         | 120/1178 [24:20<3:35:31, 12.22s/it]

{'loss': 0.8222, 'grad_norm': 6.7989044189453125, 'learning_rate': 1.2e-05, 'epoch': 0.1}


 11%|█         | 130/1178 [26:50<4:16:38, 14.69s/it]

{'loss': 0.7822, 'grad_norm': 6.587442874908447, 'learning_rate': 1.3000000000000001e-05, 'epoch': 0.11}


 12%|█▏        | 140/1178 [29:12<3:47:31, 13.15s/it]

{'loss': 0.8216, 'grad_norm': 8.051600456237793, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.12}


 13%|█▎        | 150/1178 [30:40<3:08:25, 11.00s/it]

{'loss': 0.7434, 'grad_norm': 4.994478702545166, 'learning_rate': 1.5e-05, 'epoch': 0.13}


 14%|█▎        | 160/1178 [32:52<4:02:19, 14.28s/it]

{'loss': 0.7195, 'grad_norm': 6.608966827392578, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.14}


 14%|█▍        | 170/1178 [34:14<1:44:55,  6.25s/it]

{'loss': 0.6273, 'grad_norm': 5.22614049911499, 'learning_rate': 1.7000000000000003e-05, 'epoch': 0.14}


 15%|█▌        | 180/1178 [35:08<1:28:01,  5.29s/it]

{'loss': 0.7541, 'grad_norm': 11.086908340454102, 'learning_rate': 1.8e-05, 'epoch': 0.15}


 16%|█▌        | 190/1178 [36:03<1:34:23,  5.73s/it]

{'loss': 0.7863, 'grad_norm': 11.378976821899414, 'learning_rate': 1.9e-05, 'epoch': 0.16}


 17%|█▋        | 200/1178 [36:56<1:25:05,  5.22s/it]

{'loss': 0.8324, 'grad_norm': 8.694211959838867, 'learning_rate': 2e-05, 'epoch': 0.17}


 18%|█▊        | 210/1178 [37:52<1:33:32,  5.80s/it]

{'loss': 0.7534, 'grad_norm': 7.892892837524414, 'learning_rate': 2.1e-05, 'epoch': 0.18}


 19%|█▊        | 220/1178 [38:49<1:28:57,  5.57s/it]

{'loss': 0.7198, 'grad_norm': 7.51101541519165, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.19}


 20%|█▉        | 230/1178 [39:46<1:29:30,  5.66s/it]

{'loss': 0.6912, 'grad_norm': 15.751819610595703, 'learning_rate': 2.3000000000000003e-05, 'epoch': 0.2}


 20%|██        | 240/1178 [42:18<3:58:03, 15.23s/it]

{'loss': 0.8535, 'grad_norm': 4.963855743408203, 'learning_rate': 2.4e-05, 'epoch': 0.2}


 21%|██        | 250/1178 [44:55<4:01:13, 15.60s/it]

{'loss': 0.7282, 'grad_norm': 13.637601852416992, 'learning_rate': 2.5e-05, 'epoch': 0.21}


 22%|██▏       | 260/1178 [47:19<3:37:58, 14.25s/it]

{'loss': 0.7156, 'grad_norm': 6.738131046295166, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.22}


 23%|██▎       | 270/1178 [49:47<3:45:31, 14.90s/it]

{'loss': 0.6495, 'grad_norm': 6.954984664916992, 'learning_rate': 2.7000000000000002e-05, 'epoch': 0.23}


 24%|██▍       | 280/1178 [52:19<3:45:49, 15.09s/it]

{'loss': 0.7903, 'grad_norm': 6.628042221069336, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.24}


 25%|██▍       | 290/1178 [54:54<3:48:37, 15.45s/it]

{'loss': 0.68, 'grad_norm': 8.398133277893066, 'learning_rate': 2.9e-05, 'epoch': 0.25}


 25%|██▌       | 300/1178 [57:05<3:29:05, 14.29s/it]

{'loss': 0.5833, 'grad_norm': 4.903298854827881, 'learning_rate': 3e-05, 'epoch': 0.25}


 26%|██▋       | 310/1178 [59:14<2:18:00,  9.54s/it]

{'loss': 0.5896, 'grad_norm': 14.559911727905273, 'learning_rate': 3.1e-05, 'epoch': 0.26}


 27%|██▋       | 320/1178 [1:01:02<2:30:53, 10.55s/it]

{'loss': 0.8484, 'grad_norm': 13.889381408691406, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.27}


 28%|██▊       | 330/1178 [1:03:31<3:31:52, 14.99s/it]

{'loss': 0.8324, 'grad_norm': 5.82192850112915, 'learning_rate': 3.3e-05, 'epoch': 0.28}


 29%|██▉       | 340/1178 [1:05:07<1:32:59,  6.66s/it]

{'loss': 0.6914, 'grad_norm': 9.854321479797363, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.29}


 30%|██▉       | 350/1178 [1:06:03<1:20:13,  5.81s/it]

{'loss': 0.7147, 'grad_norm': 10.600592613220215, 'learning_rate': 3.5e-05, 'epoch': 0.3}


 31%|███       | 360/1178 [1:07:01<1:17:11,  5.66s/it]

{'loss': 0.7152, 'grad_norm': 12.568655014038086, 'learning_rate': 3.6e-05, 'epoch': 0.31}


 31%|███▏      | 370/1178 [1:08:01<1:19:56,  5.94s/it]

{'loss': 0.7902, 'grad_norm': 12.951045989990234, 'learning_rate': 3.7e-05, 'epoch': 0.31}


 32%|███▏      | 380/1178 [1:09:01<1:15:46,  5.70s/it]

{'loss': 0.713, 'grad_norm': 6.462077617645264, 'learning_rate': 3.8e-05, 'epoch': 0.32}


 33%|███▎      | 390/1178 [1:10:00<1:14:25,  5.67s/it]

{'loss': 0.7666, 'grad_norm': 11.885149955749512, 'learning_rate': 3.9000000000000006e-05, 'epoch': 0.33}


 34%|███▍      | 400/1178 [1:10:52<1:07:59,  5.24s/it]

{'loss': 0.6499, 'grad_norm': 7.188576698303223, 'learning_rate': 4e-05, 'epoch': 0.34}


 35%|███▍      | 410/1178 [1:11:47<1:06:16,  5.18s/it]

{'loss': 0.6596, 'grad_norm': 15.840916633605957, 'learning_rate': 4.1e-05, 'epoch': 0.35}


 36%|███▌      | 420/1178 [1:12:36<1:03:03,  4.99s/it]

{'loss': 0.6225, 'grad_norm': 9.719857215881348, 'learning_rate': 4.2e-05, 'epoch': 0.36}


 37%|███▋      | 430/1178 [1:13:26<1:01:50,  4.96s/it]

{'loss': 0.6431, 'grad_norm': 6.847768783569336, 'learning_rate': 4.3e-05, 'epoch': 0.37}


 37%|███▋      | 440/1178 [1:14:16<1:01:02,  4.96s/it]

{'loss': 0.6984, 'grad_norm': 15.863906860351562, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.37}


 38%|███▊      | 450/1178 [1:15:06<1:00:09,  4.96s/it]

{'loss': 0.5859, 'grad_norm': 17.198543548583984, 'learning_rate': 4.5e-05, 'epoch': 0.38}


 39%|███▉      | 460/1178 [1:15:56<59:55,  5.01s/it]  

{'loss': 0.6658, 'grad_norm': 8.197728157043457, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.39}


 40%|███▉      | 470/1178 [1:16:46<59:21,  5.03s/it]

{'loss': 0.7544, 'grad_norm': 7.307081699371338, 'learning_rate': 4.7e-05, 'epoch': 0.4}


 41%|████      | 480/1178 [1:17:37<1:00:05,  5.17s/it]

{'loss': 0.7917, 'grad_norm': 10.596160888671875, 'learning_rate': 4.8e-05, 'epoch': 0.41}


 42%|████▏     | 490/1178 [1:18:29<59:24,  5.18s/it]  

{'loss': 0.7736, 'grad_norm': 4.703081130981445, 'learning_rate': 4.9e-05, 'epoch': 0.42}


 42%|████▏     | 500/1178 [1:19:21<57:53,  5.12s/it]

{'loss': 0.867, 'grad_norm': 5.217000484466553, 'learning_rate': 5e-05, 'epoch': 0.42}


 43%|████▎     | 510/1178 [1:20:18<1:07:08,  6.03s/it]

{'loss': 0.7217, 'grad_norm': 5.476141452789307, 'learning_rate': 4.926253687315635e-05, 'epoch': 0.43}


 44%|████▍     | 520/1178 [1:21:22<1:10:08,  6.40s/it]

{'loss': 0.642, 'grad_norm': 11.194867134094238, 'learning_rate': 4.8525073746312687e-05, 'epoch': 0.44}


 45%|████▍     | 530/1178 [1:22:24<1:07:36,  6.26s/it]

{'loss': 0.6518, 'grad_norm': 5.179981708526611, 'learning_rate': 4.778761061946903e-05, 'epoch': 0.45}


 46%|████▌     | 540/1178 [1:23:30<1:11:56,  6.77s/it]

{'loss': 0.5948, 'grad_norm': 15.25527572631836, 'learning_rate': 4.705014749262537e-05, 'epoch': 0.46}


 47%|████▋     | 550/1178 [1:24:36<1:10:38,  6.75s/it]

{'loss': 0.6957, 'grad_norm': 5.857075214385986, 'learning_rate': 4.631268436578171e-05, 'epoch': 0.47}


 48%|████▊     | 560/1178 [1:25:41<1:06:23,  6.45s/it]

{'loss': 0.6532, 'grad_norm': 7.131637096405029, 'learning_rate': 4.5575221238938055e-05, 'epoch': 0.48}


 48%|████▊     | 570/1178 [1:26:43<1:01:35,  6.08s/it]

{'loss': 0.7151, 'grad_norm': 4.3493218421936035, 'learning_rate': 4.48377581120944e-05, 'epoch': 0.48}


 49%|████▉     | 580/1178 [1:27:54<1:13:38,  7.39s/it]

{'loss': 0.6513, 'grad_norm': 7.796670436859131, 'learning_rate': 4.410029498525074e-05, 'epoch': 0.49}


 50%|█████     | 590/1178 [1:28:51<53:16,  5.44s/it]  

{'loss': 0.73, 'grad_norm': 17.34234619140625, 'learning_rate': 4.3362831858407084e-05, 'epoch': 0.5}


 51%|█████     | 600/1178 [1:30:17<1:27:47,  9.11s/it]

{'loss': 0.5382, 'grad_norm': 4.76526403427124, 'learning_rate': 4.262536873156342e-05, 'epoch': 0.51}


 52%|█████▏    | 610/1178 [1:31:42<1:18:35,  8.30s/it]

{'loss': 0.5542, 'grad_norm': 4.099661827087402, 'learning_rate': 4.188790560471977e-05, 'epoch': 0.52}


 53%|█████▎    | 620/1178 [1:32:55<1:06:45,  7.18s/it]

{'loss': 0.756, 'grad_norm': 6.6526994705200195, 'learning_rate': 4.115044247787611e-05, 'epoch': 0.53}


 53%|█████▎    | 630/1178 [1:34:05<1:04:50,  7.10s/it]

{'loss': 0.6075, 'grad_norm': 9.813131332397461, 'learning_rate': 4.0412979351032446e-05, 'epoch': 0.53}


 54%|█████▍    | 640/1178 [1:35:15<1:01:29,  6.86s/it]

{'loss': 0.6209, 'grad_norm': 9.095086097717285, 'learning_rate': 3.967551622418879e-05, 'epoch': 0.54}


 55%|█████▌    | 650/1178 [1:36:23<51:11,  5.82s/it]  

{'loss': 0.9251, 'grad_norm': 6.784060955047607, 'learning_rate': 3.893805309734514e-05, 'epoch': 0.55}


 56%|█████▌    | 660/1178 [1:37:13<43:43,  5.07s/it]

{'loss': 0.6111, 'grad_norm': 5.175957202911377, 'learning_rate': 3.8200589970501475e-05, 'epoch': 0.56}


 57%|█████▋    | 670/1178 [1:38:36<1:10:54,  8.37s/it]

{'loss': 0.5258, 'grad_norm': 5.842252731323242, 'learning_rate': 3.746312684365782e-05, 'epoch': 0.57}


 58%|█████▊    | 680/1178 [1:39:57<1:01:59,  7.47s/it]

{'loss': 0.4845, 'grad_norm': 9.489477157592773, 'learning_rate': 3.672566371681416e-05, 'epoch': 0.58}


 59%|█████▊    | 690/1178 [1:40:55<48:00,  5.90s/it]  

{'loss': 0.8779, 'grad_norm': 10.41603946685791, 'learning_rate': 3.5988200589970505e-05, 'epoch': 0.59}


 59%|█████▉    | 700/1178 [1:41:54<46:58,  5.90s/it]

{'loss': 0.6043, 'grad_norm': 10.657732963562012, 'learning_rate': 3.5250737463126844e-05, 'epoch': 0.59}


 60%|██████    | 710/1178 [1:42:52<45:35,  5.84s/it]

{'loss': 0.7289, 'grad_norm': 7.927719593048096, 'learning_rate': 3.451327433628319e-05, 'epoch': 0.6}


 61%|██████    | 720/1178 [1:43:51<45:03,  5.90s/it]

{'loss': 0.6818, 'grad_norm': 7.498287200927734, 'learning_rate': 3.377581120943953e-05, 'epoch': 0.61}


 62%|██████▏   | 730/1178 [1:44:50<44:25,  5.95s/it]

{'loss': 0.7134, 'grad_norm': 7.016335487365723, 'learning_rate': 3.303834808259587e-05, 'epoch': 0.62}


 63%|██████▎   | 740/1178 [1:45:49<42:43,  5.85s/it]

{'loss': 0.6538, 'grad_norm': 10.367898941040039, 'learning_rate': 3.230088495575221e-05, 'epoch': 0.63}


 64%|██████▎   | 750/1178 [1:46:48<42:09,  5.91s/it]

{'loss': 0.617, 'grad_norm': 8.410455703735352, 'learning_rate': 3.156342182890856e-05, 'epoch': 0.64}


 65%|██████▍   | 760/1178 [1:47:47<41:15,  5.92s/it]

{'loss': 0.4908, 'grad_norm': 4.406312465667725, 'learning_rate': 3.08259587020649e-05, 'epoch': 0.65}


 65%|██████▌   | 770/1178 [1:48:46<40:05,  5.89s/it]

{'loss': 1.0283, 'grad_norm': 11.605901718139648, 'learning_rate': 3.008849557522124e-05, 'epoch': 0.65}


 66%|██████▌   | 780/1178 [1:49:45<39:31,  5.96s/it]

{'loss': 0.5745, 'grad_norm': 8.774942398071289, 'learning_rate': 2.935103244837758e-05, 'epoch': 0.66}


 67%|██████▋   | 790/1178 [1:50:44<37:57,  5.87s/it]

{'loss': 0.6685, 'grad_norm': 6.7293901443481445, 'learning_rate': 2.8613569321533922e-05, 'epoch': 0.67}


 68%|██████▊   | 800/1178 [1:51:42<36:36,  5.81s/it]

{'loss': 0.665, 'grad_norm': 10.872639656066895, 'learning_rate': 2.7876106194690264e-05, 'epoch': 0.68}


 69%|██████▉   | 810/1178 [1:52:41<36:07,  5.89s/it]

{'loss': 0.7044, 'grad_norm': 17.491865158081055, 'learning_rate': 2.713864306784661e-05, 'epoch': 0.69}


 70%|██████▉   | 820/1178 [1:53:39<35:22,  5.93s/it]

{'loss': 0.5624, 'grad_norm': 10.540190696716309, 'learning_rate': 2.640117994100295e-05, 'epoch': 0.7}


 70%|███████   | 830/1178 [1:54:38<33:48,  5.83s/it]

{'loss': 0.7637, 'grad_norm': 6.610258102416992, 'learning_rate': 2.5663716814159294e-05, 'epoch': 0.7}


 71%|███████▏  | 840/1178 [1:55:36<33:09,  5.89s/it]

{'loss': 0.6971, 'grad_norm': 6.731423854827881, 'learning_rate': 2.4926253687315636e-05, 'epoch': 0.71}


 72%|███████▏  | 850/1178 [1:56:35<32:00,  5.85s/it]

{'loss': 0.7731, 'grad_norm': 5.638117790222168, 'learning_rate': 2.4188790560471978e-05, 'epoch': 0.72}


 73%|███████▎  | 860/1178 [1:57:33<30:49,  5.81s/it]

{'loss': 0.5402, 'grad_norm': 6.015178203582764, 'learning_rate': 2.345132743362832e-05, 'epoch': 0.73}


 74%|███████▍  | 870/1178 [1:58:32<30:16,  5.90s/it]

{'loss': 0.6901, 'grad_norm': 7.6337738037109375, 'learning_rate': 2.2713864306784662e-05, 'epoch': 0.74}


 75%|███████▍  | 880/1178 [1:59:31<29:18,  5.90s/it]

{'loss': 0.8343, 'grad_norm': 8.293607711791992, 'learning_rate': 2.1976401179941004e-05, 'epoch': 0.75}


 76%|███████▌  | 890/1178 [2:00:30<28:20,  5.90s/it]

{'loss': 0.6281, 'grad_norm': 5.961369037628174, 'learning_rate': 2.1238938053097346e-05, 'epoch': 0.76}


 76%|███████▋  | 900/1178 [2:01:28<26:51,  5.80s/it]

{'loss': 0.5878, 'grad_norm': 4.103511333465576, 'learning_rate': 2.0501474926253688e-05, 'epoch': 0.76}


 77%|███████▋  | 910/1178 [2:02:26<25:53,  5.80s/it]

{'loss': 0.4909, 'grad_norm': 3.6223106384277344, 'learning_rate': 1.976401179941003e-05, 'epoch': 0.77}


 78%|███████▊  | 920/1178 [2:03:25<25:13,  5.86s/it]

{'loss': 0.5893, 'grad_norm': 7.3555803298950195, 'learning_rate': 1.9026548672566372e-05, 'epoch': 0.78}


 79%|███████▉  | 930/1178 [2:04:24<24:17,  5.88s/it]

{'loss': 0.801, 'grad_norm': 13.141008377075195, 'learning_rate': 1.8289085545722714e-05, 'epoch': 0.79}


 80%|███████▉  | 940/1178 [2:05:23<23:24,  5.90s/it]

{'loss': 0.5431, 'grad_norm': 6.437355041503906, 'learning_rate': 1.7551622418879056e-05, 'epoch': 0.8}


 81%|████████  | 950/1178 [2:06:22<22:21,  5.88s/it]

{'loss': 0.5976, 'grad_norm': 8.953352928161621, 'learning_rate': 1.6814159292035402e-05, 'epoch': 0.81}


 81%|████████▏ | 960/1178 [2:07:21<21:24,  5.89s/it]

{'loss': 0.6642, 'grad_norm': 5.320005893707275, 'learning_rate': 1.607669616519174e-05, 'epoch': 0.81}


 82%|████████▏ | 970/1178 [2:08:19<20:20,  5.87s/it]

{'loss': 0.6554, 'grad_norm': 13.947515487670898, 'learning_rate': 1.5339233038348082e-05, 'epoch': 0.82}


 83%|████████▎ | 980/1178 [2:09:18<19:23,  5.87s/it]

{'loss': 0.8134, 'grad_norm': 9.120600700378418, 'learning_rate': 1.4601769911504426e-05, 'epoch': 0.83}


 84%|████████▍ | 990/1178 [2:10:17<18:32,  5.92s/it]

{'loss': 0.4882, 'grad_norm': 12.959755897521973, 'learning_rate': 1.3864306784660768e-05, 'epoch': 0.84}


 85%|████████▍ | 1000/1178 [2:11:16<17:33,  5.92s/it]

{'loss': 0.5305, 'grad_norm': 5.356115818023682, 'learning_rate': 1.3126843657817109e-05, 'epoch': 0.85}


 86%|████████▌ | 1010/1178 [2:12:15<16:36,  5.93s/it]

{'loss': 0.6226, 'grad_norm': 8.040938377380371, 'learning_rate': 1.2389380530973452e-05, 'epoch': 0.86}


 87%|████████▋ | 1020/1178 [2:13:13<15:26,  5.87s/it]

{'loss': 0.3607, 'grad_norm': 3.2543091773986816, 'learning_rate': 1.1651917404129794e-05, 'epoch': 0.87}


 87%|████████▋ | 1030/1178 [2:14:12<14:37,  5.93s/it]

{'loss': 0.5987, 'grad_norm': 7.370449066162109, 'learning_rate': 1.0914454277286137e-05, 'epoch': 0.87}


 88%|████████▊ | 1040/1178 [2:15:11<13:32,  5.89s/it]

{'loss': 0.7063, 'grad_norm': 10.006916999816895, 'learning_rate': 1.0176991150442479e-05, 'epoch': 0.88}


 89%|████████▉ | 1050/1178 [2:16:10<12:37,  5.92s/it]

{'loss': 0.6238, 'grad_norm': 6.64994478225708, 'learning_rate': 9.43952802359882e-06, 'epoch': 0.89}


 90%|████████▉ | 1060/1178 [2:17:09<11:30,  5.85s/it]

{'loss': 0.4863, 'grad_norm': 7.216398239135742, 'learning_rate': 8.702064896755163e-06, 'epoch': 0.9}


 91%|█████████ | 1070/1178 [2:18:08<10:41,  5.94s/it]

{'loss': 0.5511, 'grad_norm': 9.978235244750977, 'learning_rate': 7.964601769911505e-06, 'epoch': 0.91}


 92%|█████████▏| 1080/1178 [2:19:06<09:32,  5.85s/it]

{'loss': 0.448, 'grad_norm': 9.662606239318848, 'learning_rate': 7.227138643067848e-06, 'epoch': 0.92}


 93%|█████████▎| 1090/1178 [2:20:05<08:33,  5.84s/it]

{'loss': 0.5323, 'grad_norm': 4.5190582275390625, 'learning_rate': 6.489675516224189e-06, 'epoch': 0.93}


 93%|█████████▎| 1100/1178 [2:21:03<07:38,  5.88s/it]

{'loss': 0.7299, 'grad_norm': 7.808739185333252, 'learning_rate': 5.752212389380531e-06, 'epoch': 0.93}


 94%|█████████▍| 1110/1178 [2:22:02<06:37,  5.85s/it]

{'loss': 0.5878, 'grad_norm': 7.241094589233398, 'learning_rate': 5.014749262536873e-06, 'epoch': 0.94}


 95%|█████████▌| 1120/1178 [2:23:01<05:42,  5.90s/it]

{'loss': 0.4547, 'grad_norm': 5.9413743019104, 'learning_rate': 4.277286135693216e-06, 'epoch': 0.95}


 96%|█████████▌| 1130/1178 [2:24:00<04:42,  5.89s/it]

{'loss': 0.6355, 'grad_norm': 13.74876880645752, 'learning_rate': 3.5398230088495575e-06, 'epoch': 0.96}


 97%|█████████▋| 1140/1178 [2:24:59<03:48,  6.00s/it]

{'loss': 0.7298, 'grad_norm': 7.763744831085205, 'learning_rate': 2.8023598820059e-06, 'epoch': 0.97}


 98%|█████████▊| 1150/1178 [2:25:58<02:45,  5.90s/it]

{'loss': 0.487, 'grad_norm': 8.622570991516113, 'learning_rate': 2.064896755162242e-06, 'epoch': 0.98}


 98%|█████████▊| 1160/1178 [2:26:57<01:46,  5.93s/it]

{'loss': 0.5171, 'grad_norm': 6.4297027587890625, 'learning_rate': 1.3274336283185841e-06, 'epoch': 0.98}


 99%|█████████▉| 1170/1178 [2:27:55<00:46,  5.86s/it]

{'loss': 0.7059, 'grad_norm': 11.373796463012695, 'learning_rate': 5.899705014749263e-07, 'epoch': 0.99}


                                                     
100%|██████████| 1178/1178 [2:49:25<00:00,  4.52s/it]

{'eval_loss': 0.6162757873535156, 'eval_accuracy': 0.7389150359177608, 'eval_runtime': 1245.4576, 'eval_samples_per_second': 3.241, 'eval_steps_per_second': 0.203, 'epoch': 1.0}


100%|██████████| 1178/1178 [2:49:27<00:00,  8.63s/it]


{'train_runtime': 10167.3228, 'train_samples_per_second': 0.926, 'train_steps_per_second': 0.116, 'train_loss': 0.7029295143733, 'epoch': 1.0}


100%|██████████| 253/253 [22:50<00:00,  5.42s/it]



Evaluation Results:
{'eval_loss': 0.6162757873535156, 'eval_accuracy': 0.7389150359177608, 'eval_runtime': 1377.2063, 'eval_samples_per_second': 2.931, 'eval_steps_per_second': 0.184, 'epoch': 1.0}


100%|██████████| 253/253 [16:00<00:00,  3.80s/it]

Accuracy: 73.89%

Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.55      0.62      1039
           1       0.70      0.85      0.77      1733
           2       0.85      0.74      0.79      1265

    accuracy                           0.74      4037
   macro avg       0.75      0.71      0.72      4037
weighted avg       0.75      0.74      0.74      4037




