In [1]:
!pip install datasets
!pip install update transformers

Collecting update
  Downloading update-0.0.1-py2.py3-none-any.whl (2.9 kB)
Collecting style==1.1.0 (from update)
  Downloading style-1.1.0-py2.py3-none-any.whl (6.4 kB)
Installing collected packages: style, update
Successfully installed style-1.1.0 update-0.0.1


In [3]:
import torch
torch.cuda.empty_cache()

In [4]:
import os
import numpy as np
from collections import Counter
import torch
import datasets
datasets.logging.set_verbosity_error()
from datasets import load_metric
# from google.colab import drive
from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import f1_score
import pandas as pd
import random

from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
import torch
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from tqdm import tqdm
from sklearn.metrics import classification_report, accuracy_score
from transformers import BertTokenizer, BertModel


# # uncomment if CAN'T CONNECT TO GPU
# import psutil
# import platform

In [5]:
# add-in as occasionally receive an error which requires this to be added
# uncomment if the issue arises
!pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.24.0-py3-none-any.whl (260 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.22.0
    Uninstalling accelerate-0.22.0:
      Successfully uninstalled accelerate-0.22.0
Successfully installed accelerate-0.24.0


In [6]:
def enforce_reproducibility(seed=42):
    # Sets seed manually for both CPU and CUDA
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # For atomic operations there is currently
    # no simple way to enforce determinism, as
    # the order of parallel operations is not known.
    # CUDNN
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # System based
    random.seed(seed)
    np.random.seed(seed)

device = torch.device("cpu")
if torch.cuda.is_available():
  device = torch.device("cuda")

enforce_reproducibility()

In [7]:
# Preamble
import sys

sys.path.append('..')

# Preprocessing

In [8]:
from datasets import load_dataset
import pandas as pd

dataset = load_dataset("copenlu/answerable_tydiqa")

train_set = dataset["train"]
validation_set = dataset["validation"]

df_train = train_set.to_pandas()
df_val = validation_set.to_pandas()

Downloading:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

Downloading and preparing dataset None/None (download: 75.43 MiB, generated: 131.78 MiB, post-processed: Unknown size, total: 207.21 MiB) to /root/.cache/huggingface/datasets/parquet/copenlu--nlp_course_tydiqa-42333912ea665dd0/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901...


Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/71.6M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/7.49M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Dataset parquet downloaded and prepared to /root/.cache/huggingface/datasets/parquet/copenlu--nlp_course_tydiqa-42333912ea665dd0/0.0.0/0b6d5799bb726b24ad7fc7be720c170d8e497f575d02d47537de9a5bac074901. Subsequent calls will reuse this data.


  0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
# Get train and validation data for each language
df_train_bengali = df_train[df_train['language'] == 'bengali']
df_train_arabic = df_train[df_train['language'] == 'arabic']
df_train_indonesian = df_train[df_train['language'] == 'indonesian']

df_val_bengali = df_val[df_val['language'] == 'bengali']
df_val_arabic = df_val[df_val['language'] == 'arabic']
df_val_indonesian = df_val[df_val['language'] == 'indonesian']


# For testing
df_val_english = df_val[df_val['language'] == 'english']
df_train_english = df_train[df_train['language'] == 'english']


In [10]:
# Create a new dataframe with the combined documents and questions and add if they are answerable
df_train_bengali_merged = pd.DataFrame({
    'text':df_train_bengali["document_plaintext"],
    'question': df_train_bengali["question_text"],
    'answerable':(df_train_bengali["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
    })
df_train_arabic_merged = pd.DataFrame({
    'text': df_train_arabic["document_plaintext"],
    'question': df_train_arabic["question_text"],
    'answerable': (df_train_arabic["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
                                    })
df_train_indonesian_merged = pd.DataFrame({
    'text':df_train_indonesian["document_plaintext"],
    'question': df_train_indonesian["question_text"],
    'answerable':(df_train_indonesian["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
    })
df_train_english_merged = pd.DataFrame({
    'text':df_train_english["document_plaintext"],
    'question': df_train_english["question_text"],
    'answerable':(df_train_english["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
    })


## Same for validation data
df_val_bengali_merged = pd.DataFrame({
    'text':df_val_bengali["document_plaintext"],
    'question': df_val_bengali["question_text"],
    'answerable':(df_val_bengali["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
    })
df_val_arabic_merged = pd.DataFrame({
    'text': df_val_arabic["document_plaintext"],
    'question': df_val_arabic["question_text"],
    'answerable': (df_val_arabic["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
                                    })
df_val_indonesian_merged = pd.DataFrame({
    'text':df_val_indonesian["document_plaintext"],
    'question': df_val_indonesian["question_text"],
    'answerable':(df_val_indonesian["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
    })
df_val_english_merged = pd.DataFrame({
    'text':df_val_english["document_plaintext"],
    'question':  df_val_english["question_text"],
    'answerable':(df_val_english["annotations"].apply(lambda x: 0 if x['answer_start'] == [-1] else 1))
    })

# Tokenization of text

In [11]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-uncased")

def tokenize_text(df, max_length=128):
    input_ids = []
    attention_masks = []


    encoded_text = tokenizer(
        df["question"].tolist(),
        df["text"].tolist(),
        max_length=max_length,
        padding="max_length",
        truncation='only_second',
        return_attention_mask=True,
        return_tensors="pt"
    )

    return encoded_text


Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.72M [00:00<?, ?B/s]

Defining model, training arguments and compute metrics

In [12]:
model = AutoModelForSequenceClassification.from_pretrained("bert-base-multilingual-uncased", num_labels=2)
model.cuda()  # Use GPU for training


Downloading model.safetensors:   0%|          | 0.00/672M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

### Indonesian

In [20]:
# For Indonesian

train_tokenized_text_indonesian = tokenize_text(df_train_indonesian_merged)
val_tokenized_text_indonesian = tokenize_text(df_val_indonesian_merged)


train_input_ids_indonesian = torch.cat([train_tokenized_text_indonesian["input_ids"]], dim=0)
train_attention_masks_indonesian = torch.cat([train_tokenized_text_indonesian["attention_mask"]], dim=0)
val_input_ids_indonesian = torch.cat([val_tokenized_text_indonesian["input_ids"]], dim=0)
val_attention_masks_indonesian = torch.cat([val_tokenized_text_indonesian["attention_mask"]], dim=0)

train_labels_indonesian = torch.tensor(df_train_indonesian_merged["answerable"].tolist())
val_labels_indonesian = torch.tensor(df_val_indonesian_merged["answerable"].tolist())


batch_size = 16

train_data_indonesian = TensorDataset(train_input_ids_indonesian.to('cuda'), train_attention_masks_indonesian.to('cuda'), train_labels_indonesian.to('cuda'))
train_sampler_indonesian = RandomSampler(train_data_indonesian)
train_dataloader_indonesian = DataLoader(train_data_indonesian, sampler=train_sampler_indonesian, batch_size=batch_size)

val_data_indonesian = TensorDataset(val_input_ids_indonesian.to('cuda'), val_attention_masks_indonesian.to('cuda'), val_labels_indonesian.to('cuda'))
val_sampler_indonesian = SequentialSampler(val_data_indonesian)
val_dataloader_indonesian = DataLoader(val_data_indonesian, sampler=val_sampler_indonesian, batch_size=batch_size)



In [15]:
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
epochs = 4
total_steps = len(train_dataloader_indonesian) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)




In [16]:
model = model.to("cuda")


# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0  # Initialize the total loss for the epoch

    for batch in tqdm(train_dataloader_indonesian, desc=f"Epoch {epoch + 1}"):
        inputs = batch[:2]
        labels = batch[2]

        model.zero_grad()
        outputs = model(*inputs, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()  # Accumulate the loss
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    average_loss = total_loss / len(train_dataloader_indonesian)  # Compute the average loss for the epoch

    model.eval()
    predictions = []
    true_labels = []
    for batch in tqdm(val_dataloader_indonesian, desc=f"Evaluating Epoch {epoch + 1}"):
        inputs = batch[:2]
        labels = batch[2]
        with torch.no_grad():
            outputs = model(*inputs)
        logits = outputs.logits
        predictions.extend(logits.argmax(dim=1).tolist())
        true_labels.extend(labels.tolist())

    accuracy = accuracy_score(true_labels, predictions)
    report = classification_report(true_labels, predictions, target_names=["Not Answerable", "Answerable"], digits=5)
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy:.4f} - Average Loss: {average_loss:.4f}")
    print(report)

Epoch 1: 100%|██████████| 713/713 [02:25<00:00,  4.91it/s]
Evaluating Epoch 1: 100%|██████████| 75/75 [00:04<00:00, 17.62it/s]


Epoch 1 - Accuracy: 0.8724 - Average Loss: 0.4121
                precision    recall  f1-score   support

Not Answerable    0.91386   0.82155   0.86525       594
    Answerable    0.83866   0.92295   0.87879       597

      accuracy                        0.87238      1191
     macro avg    0.87626   0.87225   0.87202      1191
  weighted avg    0.87616   0.87238   0.87204      1191



Epoch 2: 100%|██████████| 713/713 [02:23<00:00,  4.97it/s]
Evaluating Epoch 2: 100%|██████████| 75/75 [00:04<00:00, 17.60it/s]


Epoch 2 - Accuracy: 0.8783 - Average Loss: 0.2536
                precision    recall  f1-score   support

Not Answerable    0.87731   0.87879   0.87805       594
    Answerable    0.87919   0.87772   0.87846       597

      accuracy                        0.87825      1191
     macro avg    0.87825   0.87825   0.87825      1191
  weighted avg    0.87826   0.87825   0.87825      1191



Epoch 3: 100%|██████████| 713/713 [02:23<00:00,  4.97it/s]
Evaluating Epoch 3: 100%|██████████| 75/75 [00:04<00:00, 17.66it/s]


Epoch 3 - Accuracy: 0.8757 - Average Loss: 0.1695
                precision    recall  f1-score   support

Not Answerable    0.86438   0.89057   0.87728       594
    Answerable    0.88774   0.86097   0.87415       597

      accuracy                        0.87573      1191
     macro avg    0.87606   0.87577   0.87571      1191
  weighted avg    0.87609   0.87573   0.87571      1191



Epoch 4: 100%|██████████| 713/713 [02:23<00:00,  4.97it/s]
Evaluating Epoch 4: 100%|██████████| 75/75 [00:04<00:00, 17.56it/s]

Epoch 4 - Accuracy: 0.8732 - Average Loss: 0.1067
                precision    recall  f1-score   support

Not Answerable    0.88522   0.85690   0.87083       594
    Answerable    0.86201   0.88945   0.87552       597

      accuracy                        0.87322      1191
     macro avg    0.87362   0.87317   0.87317      1191
  weighted avg    0.87359   0.87322   0.87318      1191






In [17]:
model.save_pretrained("./indonesian_classification")

In [None]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/indonesian_classification/pytorch_model.bin')

### Bengali

In [19]:
# For Bengali

train_tokenized_text_bengali = tokenize_text(df_train_bengali_merged)
val_tokenized_text_bengali = tokenize_text(df_val_bengali_merged)


train_input_ids_bengali = torch.cat([train_tokenized_text_bengali["input_ids"]], dim=0)
train_attention_masks_bengali = torch.cat([train_tokenized_text_bengali["attention_mask"]], dim=0)
val_input_ids_bengali = torch.cat([val_tokenized_text_bengali["input_ids"]], dim=0)
val_attention_masks_bengali = torch.cat([val_tokenized_text_bengali["attention_mask"]], dim=0)

train_labels_bengali = torch.tensor(df_train_bengali_merged["answerable"].tolist())
val_labels_bengali = torch.tensor(df_val_bengali_merged["answerable"].tolist())

batch_size = 16

train_data_bengali = TensorDataset(train_input_ids_bengali.to('cuda'), train_attention_masks_bengali.to('cuda'), train_labels_bengali.to('cuda'))
train_sampler_bengali = RandomSampler(train_data_bengali)
train_dataloader_bengali = DataLoader(train_data_bengali, sampler=train_sampler_bengali, batch_size=batch_size)

val_data_bengali = TensorDataset(val_input_ids_bengali.to('cuda'), val_attention_masks_bengali.to('cuda'), val_labels_bengali.to('cuda'))
val_sampler_bengali = SequentialSampler(val_data_bengali)
val_dataloader_bengali = DataLoader(val_data_bengali, sampler=val_sampler_bengali, batch_size=batch_size)


In [14]:
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
epochs = 4
total_steps = len(train_dataloader_bengali) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)




In [15]:
# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0  # Initialize the total loss for the epoch

    for batch in tqdm(train_dataloader_bengali, desc=f"Epoch {epoch + 1}"):
        inputs = batch[:2]
        labels = batch[2]

        model.zero_grad()
        outputs = model(*inputs, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()  # Accumulate the loss
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    average_loss = total_loss / len(train_dataloader_bengali)  # Compute the average loss for the epoch

    model.eval()
    predictions = []
    true_labels = []
    for batch in tqdm(val_dataloader_bengali, desc=f"Evaluating Epoch {epoch + 1}"):
        inputs = batch[:2]
        labels = batch[2]
        with torch.no_grad():
            outputs = model(*inputs)
        logits = outputs.logits
        predictions.extend(logits.argmax(dim=1).tolist())
        true_labels.extend(labels.tolist())

    accuracy = accuracy_score(true_labels, predictions)
    report = classification_report(true_labels, predictions, target_names=["Not Answerable", "Answerable"], digits=5)
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy:.4f} - Average Loss: {average_loss:.4f}")
    print(report)


Epoch 1: 100%|██████████| 299/299 [01:01<00:00,  4.83it/s]
Evaluating Epoch 1: 100%|██████████| 14/14 [00:00<00:00, 17.49it/s]


Epoch 1 - Accuracy: 0.7723 - Average Loss: 0.5368
                precision    recall  f1-score   support

Not Answerable    0.76068   0.79464   0.77729       112
    Answerable    0.78505   0.75000   0.76712       112

      accuracy                        0.77232       224
     macro avg    0.77287   0.77232   0.77221       224
  weighted avg    0.77287   0.77232   0.77221       224



Epoch 2: 100%|██████████| 299/299 [01:00<00:00,  4.97it/s]
Evaluating Epoch 2: 100%|██████████| 14/14 [00:00<00:00, 17.50it/s]


Epoch 2 - Accuracy: 0.7723 - Average Loss: 0.3969
                precision    recall  f1-score   support

Not Answerable    0.82105   0.69643   0.75362       112
    Answerable    0.73643   0.84821   0.78838       112

      accuracy                        0.77232       224
     macro avg    0.77874   0.77232   0.77100       224
  weighted avg    0.77874   0.77232   0.77100       224



Epoch 3: 100%|██████████| 299/299 [01:00<00:00,  4.97it/s]
Evaluating Epoch 3: 100%|██████████| 14/14 [00:00<00:00, 17.51it/s]


Epoch 3 - Accuracy: 0.7589 - Average Loss: 0.2739
                precision    recall  f1-score   support

Not Answerable    0.79592   0.69643   0.74286       112
    Answerable    0.73016   0.82143   0.77311       112

      accuracy                        0.75893       224
     macro avg    0.76304   0.75893   0.75798       224
  weighted avg    0.76304   0.75893   0.75798       224



Epoch 4: 100%|██████████| 299/299 [01:00<00:00,  4.97it/s]
Evaluating Epoch 4: 100%|██████████| 14/14 [00:00<00:00, 17.50it/s]

Epoch 4 - Accuracy: 0.7768 - Average Loss: 0.1784
                precision    recall  f1-score   support

Not Answerable    0.83696   0.68750   0.75490       112
    Answerable    0.73485   0.86607   0.79508       112

      accuracy                        0.77679       224
     macro avg    0.78590   0.77679   0.77499       224
  weighted avg    0.78590   0.77679   0.77499       224






In [16]:
model.save_pretrained("./bengali_classification")

In [None]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/bengali_classification/pytorch_model.bin')

### Arabic

In [13]:
# For Arabic

train_tokenized_text_arabic = tokenize_text(df_train_arabic_merged)
val_tokenized_text_arabic = tokenize_text(df_val_arabic_merged)


train_input_ids_arabic = torch.cat([train_tokenized_text_arabic["input_ids"]], dim=0)
train_attention_masks_arabic = torch.cat([train_tokenized_text_arabic["attention_mask"]], dim=0)
val_input_ids_arabic = torch.cat([val_tokenized_text_arabic["input_ids"]], dim=0)
val_attention_masks_arabic = torch.cat([val_tokenized_text_arabic["attention_mask"]], dim=0)

train_labels_arabic = torch.tensor(df_train_arabic_merged["answerable"].tolist())
val_labels_arabic = torch.tensor(df_val_arabic_merged["answerable"].tolist())

batch_size = 16

train_data_arabic = TensorDataset(train_input_ids_arabic.to('cuda'), train_attention_masks_arabic.to('cuda'), train_labels_arabic.to('cuda'))
train_sampler_arabic = RandomSampler(train_data_arabic)
train_dataloader_arabic = DataLoader(train_data_arabic, sampler=train_sampler_arabic, batch_size=batch_size)

val_data_arabic = TensorDataset(val_input_ids_arabic.to('cuda'), val_attention_masks_arabic.to('cuda'), val_labels_arabic.to('cuda'))
val_sampler_arabic = SequentialSampler(val_data_arabic)
val_dataloader_arabic = DataLoader(val_data_arabic, sampler=val_sampler_arabic, batch_size=batch_size)


In [14]:
optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
epochs = 4
total_steps = len(train_dataloader_arabic) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)




In [15]:
model = model.to("cuda")

# Training loop
for epoch in range(epochs):
    model.train()
    total_loss = 0  # Initialize the total loss for the epoch

    for batch in tqdm(train_dataloader_arabic, desc=f"Epoch {epoch + 1}"):
        inputs = batch[:2]
        labels = batch[2]

        model.zero_grad()
        outputs = model(*inputs, labels=labels)
        loss = outputs.loss
        total_loss += loss.item()  # Accumulate the loss
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    average_loss = total_loss / len(train_dataloader_arabic)  # Compute the average loss for the epoch

    model.eval()
    predictions = []
    true_labels = []
    for batch in tqdm(val_dataloader_arabic, desc=f"Evaluating Epoch {epoch + 1}"):
        inputs = batch[:2]
        labels = batch[2]
        with torch.no_grad():
            outputs = model(*inputs)
        logits = outputs.logits
        predictions.extend(logits.argmax(dim=1).tolist())
        true_labels.extend(labels.tolist())

    accuracy = accuracy_score(true_labels, predictions)
    report = classification_report(true_labels, predictions, target_names=["Not Answerable", "Answerable"], digits=5)
    print(f"Epoch {epoch + 1} - Accuracy: {accuracy:.4f} - Average Loss: {average_loss:.4f}")
    print(report)

Epoch 1: 100%|██████████| 1850/1850 [06:13<00:00,  4.95it/s]
Evaluating Epoch 1: 100%|██████████| 119/119 [00:06<00:00, 17.58it/s]


Epoch 1 - Accuracy: 0.9238 - Average Loss: 0.2977
                precision    recall  f1-score   support

Not Answerable    0.92332   0.92429   0.92380       951
    Answerable    0.92421   0.92324   0.92372       951

      accuracy                        0.92376      1902
     macro avg    0.92376   0.92376   0.92376      1902
  weighted avg    0.92376   0.92376   0.92376      1902



Epoch 2: 100%|██████████| 1850/1850 [06:12<00:00,  4.97it/s]
Evaluating Epoch 2: 100%|██████████| 119/119 [00:06<00:00, 17.58it/s]


Epoch 2 - Accuracy: 0.9196 - Average Loss: 0.1972
                precision    recall  f1-score   support

Not Answerable    0.94333   0.89274   0.91734       951
    Answerable    0.89820   0.94637   0.92166       951

      accuracy                        0.91956      1902
     macro avg    0.92077   0.91956   0.91950      1902
  weighted avg    0.92077   0.91956   0.91950      1902



Epoch 3: 100%|██████████| 1850/1850 [06:12<00:00,  4.97it/s]
Evaluating Epoch 3: 100%|██████████| 119/119 [00:06<00:00, 17.54it/s]


Epoch 3 - Accuracy: 0.9301 - Average Loss: 0.1413
                precision    recall  f1-score   support

Not Answerable    0.94264   0.91588   0.92907       951
    Answerable    0.91820   0.94427   0.93105       951

      accuracy                        0.93007      1902
     macro avg    0.93042   0.93007   0.93006      1902
  weighted avg    0.93042   0.93007   0.93006      1902



Epoch 4: 100%|██████████| 1850/1850 [06:12<00:00,  4.97it/s]
Evaluating Epoch 4: 100%|██████████| 119/119 [00:06<00:00, 17.57it/s]

Epoch 4 - Accuracy: 0.9311 - Average Loss: 0.0939
                precision    recall  f1-score   support

Not Answerable    0.94372   0.91693   0.93013       951
    Answerable    0.91922   0.94532   0.93209       951

      accuracy                        0.93113      1902
     macro avg    0.93147   0.93113   0.93111      1902
  weighted avg    0.93147   0.93113   0.93111      1902






In [16]:
model.save_pretrained("./arabic_classification")

In [None]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/arabic_classification/pytorch_model.bin')