In [1]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(torch.cuda.get_device_name(device))

NVIDIA GeForce RTX 3060 Ti


In [2]:
import pandas as pd
df = pd.read_csv('data/data_4000_selected.csv')
df = df.iloc[:, 2:4]
df

Unnamed: 0,Comment,Majority_Label
0,@User.IDX في فترة الصغر والمراهقة يكون من الصع...,Non-Offensive
1,"""ردا على معظم الردود .. أحب اوضحلكم ان عمليات ...",Non-Offensive
2,@User.IDX يجب ان تذكروا ان لكل سنة ثيم للحفل و...,Non-Offensive
3,بتعمل حلقة صغيرة عشان عندي امتحان بكرة ومتضيعل...,Non-Offensive
4,على طاري السطحيه مدري ليه تذكرت فيحان,Non-Offensive
...,...,...
3995,والله ما اعرف ابكي على حالنا لي وصلنا ليه ام ا...,Non-Offensive
3996,انا الحين ملخبط ذي كلها في البطاقه الائتمانية ...,Non-Offensive
3997,أنا متأكد لو كان ستيفن_هوكينج مسلما وثبتت تجار...,Non-Offensive
3998,على فكرة في الدين حكم الاراهبين يطبق عليهم حكم...,Non-Offensive


In [3]:
test_data = df.sample(frac=0.2,random_state=200)
test_data.shape
data=df.drop(test_data.index)
data.shape,test_data.shape

((3200, 2), (800, 2))

In [4]:
test_data

Unnamed: 0,Comment,Majority_Label
501,ممكن تعمل فيديو عن crispr ابغى اعرف رأيك في هذ...,Non-Offensive
3118,بقول لأمي كدسوا ذهب للأزمة القادمة تقولي أصلا ...,Non-Offensive
228,الي جاي من خلف لايك ❤️👍🏻,Non-Offensive
2879,@User.IDX عباسي مدني. رحمك الله يا رجل ورحم أم...,Offensive
1220,أكرم مخلوق عند الله هو سيدنا محمد ﷺ ومن يحبه أ...,Non-Offensive
...,...,...
2200,تره العثمانين احسن من الشريف بس الشريف بس الشر...,Non-Offensive
2370,النصارى الي دينهم غير ديننا لا يقومون بهده الف...,Non-Offensive
479,"""@User.IDX مجتمع الحريات """"الغير اسلامية"""" مجت...",Non-Offensive
1437,@User.IDX @User.IDX ربنا ينولها ألفي بالها يار...,Non-Offensive


In [5]:
import numpy as np

In [6]:
import emoji
import nltk
import re
import string

arabic_stopwords = set(nltk.corpus.stopwords.words("arabic"))

arabic_diacritics = re.compile("""
                             ّ    | # Tashdid
                             َ    | # Fatha
                             ً    | # Tanwin Fath
                             ُ    | # Damma
                             ٌ    | # Tanwin Damm
                             ِ    | # Kasra
                             ٍ    | # Tanwin Kasr
                             ْ    | # Sukun
                             ـ     # Tatwil/Kashida
                         """, re.VERBOSE)

arabic_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ'''
english_punctuations = string.punctuation
punctuations = arabic_punctuations + english_punctuations


def remove_urls (text):
    text = re.sub(r'(https|http)?:\/\/(\w|\.|\/|\?|\=|\&|\%)*\b', '', text, flags=re.MULTILINE)
    return text


def remove_emails(text):
    text = re.sub(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", "",  text, flags=re.MULTILINE)
    return text

def remove_emoji(text):
    return emoji.get_emoji_regexp().sub(u'', text)

def normalization(text):
    text = re.sub("[إأآا]", "ا", text)
    text = re.sub("ى", "ي", text)
    text = re.sub("ؤ", "ء", text)
    text = re.sub("ئ", "ء", text)
    text = re.sub("ة", "ه", text)
    text = re.sub("گ", "ك", text)
    return text

def remove_diacritics(text):
    text = re.sub(arabic_diacritics, '', text)
    return text

def remove_stopwords(text):
    filtered_sentence = [w for w in text.split() if not w in arabic_stopwords]
    return ' '.join(filtered_sentence)

def cleaning_content(line):
    if (isinstance(line, float)):
        return None
    line.replace('\n', ' ')
    line = remove_emails(line)
    line = remove_urls(line)
    line = remove_emoji(line)
    nline = [w if '@' not in w else 'USERID' for w in line.split()]
    line = ' '.join(nline)
    line = line.replace('RT', '').replace('<LF>', '').replace('<br />','').replace('&quot;', '').replace('<url>', '').replace('USERID', '')


    # add spaces between punc,
    line = line.translate(str.maketrans({key: " {0} ".format(key) for key in punctuations}))

    # then remove punc,
    translator = str.maketrans('', '', punctuations)
    line = line.translate(translator)

    line = remove_stopwords(line)
    line=remove_diacritics(normalization(line))
    return line

def hasDigits(s):
    return any( 48 <= ord(char) <= 57  or 1632 <= ord(char) <= 1641 for char in s)


In [7]:
df.Comment = df.Comment.apply(cleaning_content)

In [8]:
comments = ' '.join(list(df.Comment))
words = comments.split(' ')

In [9]:
words

['فتره',
 'الصغر',
 'والمراهقه',
 'يكون',
 'الصعب',
 'تقبل',
 'تعليقات',
 'الاصدقاء',
 'حول',
 'شعرك',
 'المجعد',
 'وهنا',
 'ياتي',
 'دور',
 'الوالدين',
 'ادخال',
 'الثقه',
 'الي',
 'نفسيه',
 'طفلهما',
 'تقدم',
 'بالسن',
 'يصبح',
 'الامر',
 'عاديا',
 'اتكلم',
 'تجربتي',
 'شخصيه',
 'ردا',
 'معظم',
 'الردود',
 'احب',
 'اوضحلكم',
 'ان',
 'عمليات',
 'زرع',
 'الكلي',
 'تتم',
 'الا',
 'في',
 'حالات',
 'معينه',
 'تضمن',
 'عدم',
 'تكرار',
 'الفشل',
 'في',
 'الكليه',
 'المزروعه',
 'وهذا',
 'يحدث',
 'الامراض',
 'الوراثيه',
 'لابد',
 'استبعادها',
 'قبل',
 'بدء',
 'اجراءات',
 'الزرع',
 'فارجو',
 'التواصل',
 'الاطباء',
 'قبل',
 'تجميع',
 'اي',
 'مبلغ',
 'مالي',
 'لعمليه',
 'الزرع',
 'لضمان',
 'وصول',
 'المال',
 'الي',
 'مستحقيه',
 'التواصل',
 'تليفونيا',
 'متاح',
 'حاليا',
 'ويرجي',
 'الذهاب',
 'شخصيا',
 'لمكان',
 'تواجد',
 'الطفل',
 'الطفل',
 'اسمه',
 'محمد',
 'سعد',
 'النادي',
 'موجود',
 'ايام',
 'الاحد',
 'والتلات',
 'والخميس',
 'مستشفي',
 'الدمرداش',
 'مستشفيات',
 'جامعه',
 'عين',
 'شمس',
 'مبن

In [10]:
unigram = (pd.Series(nltk.ngrams(words, 1)).value_counts())[:20]
unigram

(و,)           1232
(الله,)         900
(ان,)           456
(الي,)          383
(انا,)          319
(اللي,)         298
(او,)           245
(مش,)           206
(والله,)        200
(كان,)          193
(انت,)          189
(عمر,)          172
(الا,)          157
(يعني,)         154
(العرب,)        148
(انه,)          146
(علي,)          134
(شي,)           133
(السعوديه,)     133
(قطر,)          131
dtype: int64

In [11]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('aubmindlab/bert-base-arabertv02', do_lower_case=True)

MAX_LEN = 256

def preprocessing_for_bert(data):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    # Create empty lists to store outputs
    input_ids = []
    attention_masks = []
    for sent in data:
        encoded_sent = tokenizer.encode_plus(
            text=cleaning_content(sent),  # Preprocess sentence
            add_special_tokens=True,        # Add `[CLS]` and `[SEP]`
            max_length=MAX_LEN,             # Max length to truncate/pad
            pad_to_max_length=True,         # Pad sentence to max length
            #return_tensors='pt',           # Return PyTorch tensor
            return_attention_mask=True      # Return attention mask
        )
        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))
    # Convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)

    return input_ids, attention_masks

In [12]:
data

Unnamed: 0,Comment,Majority_Label
0,@User.IDX في فترة الصغر والمراهقة يكون من الصع...,Non-Offensive
1,"""ردا على معظم الردود .. أحب اوضحلكم ان عمليات ...",Non-Offensive
2,@User.IDX يجب ان تذكروا ان لكل سنة ثيم للحفل و...,Non-Offensive
3,بتعمل حلقة صغيرة عشان عندي امتحان بكرة ومتضيعل...,Non-Offensive
5,الإنسانة الوحيدة اللي ممكن أتابع جميع حلقاتها ...,Non-Offensive
...,...,...
3994,@User.IDX غبيه بس تستاهل اللي صار لها,Offensive
3995,والله ما اعرف ابكي على حالنا لي وصلنا ليه ام ا...,Non-Offensive
3996,انا الحين ملخبط ذي كلها في البطاقه الائتمانية ...,Non-Offensive
3997,أنا متأكد لو كان ستيفن_هوكينج مسلما وثبتت تجار...,Non-Offensive


In [13]:
data["Majority_Label"].replace({"Non-Offensive":"0","Offensive":"1"},inplace=True)
test_data["Majority_Label"].replace({"Non-Offensive":"0","Offensive":"1"},inplace=True)

In [14]:
data

Unnamed: 0,Comment,Majority_Label
0,@User.IDX في فترة الصغر والمراهقة يكون من الصع...,0
1,"""ردا على معظم الردود .. أحب اوضحلكم ان عمليات ...",0
2,@User.IDX يجب ان تذكروا ان لكل سنة ثيم للحفل و...,0
3,بتعمل حلقة صغيرة عشان عندي امتحان بكرة ومتضيعل...,0
5,الإنسانة الوحيدة اللي ممكن أتابع جميع حلقاتها ...,0
...,...,...
3994,@User.IDX غبيه بس تستاهل اللي صار لها,1
3995,والله ما اعرف ابكي على حالنا لي وصلنا ليه ام ا...,0
3996,انا الحين ملخبط ذي كلها في البطاقه الائتمانية ...,0
3997,أنا متأكد لو كان ستيفن_هوكينج مسلما وثبتت تجار...,0


In [15]:
test_data

Unnamed: 0,Comment,Majority_Label
501,ممكن تعمل فيديو عن crispr ابغى اعرف رأيك في هذ...,0
3118,بقول لأمي كدسوا ذهب للأزمة القادمة تقولي أصلا ...,0
228,الي جاي من خلف لايك ❤️👍🏻,0
2879,@User.IDX عباسي مدني. رحمك الله يا رجل ورحم أم...,1
1220,أكرم مخلوق عند الله هو سيدنا محمد ﷺ ومن يحبه أ...,0
...,...,...
2200,تره العثمانين احسن من الشريف بس الشريف بس الشر...,0
2370,النصارى الي دينهم غير ديننا لا يقومون بهده الف...,0
479,"""@User.IDX مجتمع الحريات """"الغير اسلامية"""" مجت...",0
1437,@User.IDX @User.IDX ربنا ينولها ألفي بالها يار...,0


In [16]:
max_len = 512

# Concatenate train data and test data
all_tweets = np.concatenate([data.Comment.values, test_data.Comment.values])

# Encode our concatenated data
encoded_comments = [tokenizer.encode(sent, add_special_tokens=True) for sent in all_tweets]

# Find the maximum length
max_len1 = max([len(sent) for sent in encoded_comments])
print('Max length: ', max_len1)

Token indices sequence length is longer than the specified maximum sequence length for this model (776 > 512). Running this sequence through the model will result in indexing errors


Max length:  823


In [17]:
X_train = data.Comment.values
Y_train = data.Majority_Label.values

X_val= test_data.Comment.values
Y_val = test_data.Majority_Label.values

In [18]:
test_data

Unnamed: 0,Comment,Majority_Label
501,ممكن تعمل فيديو عن crispr ابغى اعرف رأيك في هذ...,0
3118,بقول لأمي كدسوا ذهب للأزمة القادمة تقولي أصلا ...,0
228,الي جاي من خلف لايك ❤️👍🏻,0
2879,@User.IDX عباسي مدني. رحمك الله يا رجل ورحم أم...,1
1220,أكرم مخلوق عند الله هو سيدنا محمد ﷺ ومن يحبه أ...,0
...,...,...
2200,تره العثمانين احسن من الشريف بس الشريف بس الشر...,0
2370,النصارى الي دينهم غير ديننا لا يقومون بهده الف...,0
479,"""@User.IDX مجتمع الحريات """"الغير اسلامية"""" مجت...",0
1437,@User.IDX @User.IDX ربنا ينولها ألفي بالها يار...,0


In [19]:
token_ids = list(preprocessing_for_bert([X_train[0]])[0].squeeze().numpy())
print('Original: ', X_train[0])
print('Token IDs: ', token_ids)

# Run function `preprocessing_for_bert` on the train set and the validation set
print('Tokenizing data...')
train_inputs, train_masks = preprocessing_for_bert(X_train)
val_inputs, val_masks = preprocessing_for_bert(X_val)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Original:  @User.IDX في فترة الصغر والمراهقة يكون من الصعب تقبل تعليقات الأصدقاء حول شعرك المجعد😔 وهنا يأتي دور الوالدين في ادخال الثقة الى نفسية طفلهما لكن مع تقدم بالسن يصبح الأمر عاديا (اتكلم عن تجربتي شخصية) 
Token IDs:  [2, 47639, 195, 21835, 34848, 2389, 887, 6742, 8007, 23706, 33387, 795, 4730, 209, 2527, 339, 2339, 43978, 193, 1150, 20004, 17854, 427, 2389, 487, 6599, 195, 51854, 321, 1541, 47248, 5520, 3253, 26188, 682, 8144, 40752, 18884, 195, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [20]:
Y_val

array(['0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '1', '0', '0',
       '0', '0', '0', '1', '0', '0', '1', '0', '1', '1', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '0',
       '0', '1', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '0', '1',
       '0', '0', '1', '0', '1', '1', '0', '0', '0', '0', '1', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0',
       '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '0', '0', '0',
       '1', '0', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '1', '0', '0', '0', '1', '0', '1', '1', '0', '0',
       '0', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '1',
       '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0

In [21]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

train_labels = torch.tensor(Y_train.astype(float))
val_labels = torch.tensor(Y_val.astype(float))

In [22]:
batch_size = 32

train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)


In [23]:
%%time
import torch
import torch.nn as nn
from transformers import BertModel

# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self, freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 50, 2

        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('aubmindlab/bert-base-arabertv02')

        # Instantiate an one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(H, D_out)
        )

        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits

Wall time: 21.5 ms


In [24]:
from transformers import AdamW, get_linear_schedule_with_warmup

def initialize_model(epochs=4):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(freeze_bert=False)

    # Tell PyTorch to run the model on GPU
    bert_classifier.to(device)

    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                      lr=0.0001,    # Default learning rate
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_dataloader) * epochs

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0, # Default value
                                                num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler

In [26]:
import random
import time
import torch
from torch import nn
# Specify loss function
loss_fn = nn.CrossEntropyLoss()

def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_dataloader, val_dataloader=None, epochs=4, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
            
            # Always clear any previously calculated gradients before performing a
            # backward pass. PyTorch doesn't do this automatically because 
            # accumulating the gradients is "convenient while training RN
            # Zero out any previously calculated gradients
            model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)
            
            # Accumulate the training loss over all of the batches so that we can
            # calculate the average loss at the end. `loss` is a Tensor containing a
            # single value; the `.item()` function just returns the Python value 
            # from the tensor.

            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels.long())
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20 batches
            if (step % 20 == 0 and step != 0) or (step == len(train_dataloader) - 1):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)
        

        print("-"*70)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch
            
            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f}")
            print("-"*70)
        print("\n")
        #torch.save(model.state_dict(), './mymodel.h5')
        #model.save_pretrained('./mymodel.h5',model)
        torch.save(model, './mymodel1.h5')
        
    torch.save(model, './mymodel1.h5')
    print("Training complete!")


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_accuracy = []
    val_loss = []

    # For each batch in our validation set...
    for batch in val_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)

        # Compute loss
        loss = loss_fn(logits, b_labels.long())
        val_loss.append(loss.item())

        # Get the predictions
        preds = torch.argmax(logits, dim=1).flatten()

        # Calculate the accuracy rate
        accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        val_accuracy.append(accuracy)

    # Compute the average accuracy and loss over the validation set.
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    return val_loss, val_accuracy

In [27]:
import gc

gc.collect()

torch.cuda.empty_cache()

In [54]:
set_seed(32)    # Set seed for reproducibility
bert_classifier, optimizer, scheduler = initialize_model(epochs=3)
train(bert_classifier, train_dataloader, val_dataloader, epochs=3, evaluation=True)

RuntimeError: CUDA out of memory. Tried to allocate 188.00 MiB (GPU 0; 8.00 GiB total capacity; 6.39 GiB already allocated; 0 bytes free; 6.44 GiB reserved in total by PyTorch)