# Import Data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
test_path = '/content/drive/MyDrive/NNDL_HW5/Test.csv'
train_path = '/content/drive/MyDrive/NNDL_HW5/Train.csv'
val_path = '/content/drive/MyDrive/NNDL_HW5/Val.csv'

import pandas as pd

test_df = pd.read_csv(test_path)
train_df = pd.read_csv(train_path)
val_df = pd.read_csv(val_path)


# train_df.head()
# val_df.head()
# test_df.sample(20)

In [3]:
train_df['label'] = train_df['label'].map({"real" : 1 , "fake" : 0})
val_df['label'] = val_df['label'].map({"real" : 1 , "fake" : 0})

train_df

Unnamed: 0,id,tweet,label
0,1,The CDC currently reports 99031 deaths. In gen...,1
1,2,States reported 1121 deaths a small rise from ...,1
2,3,Politically Correct Woman (Almost) Uses Pandem...,0
3,4,#IndiaFightsCorona: We have 1524 #COVID testin...,1
4,5,Populous states can generate large case counts...,1
...,...,...,...
6415,6416,A tiger tested positive for COVID-19 please st...,0
6416,6417,???Autopsies prove that COVID-19 is??� a blood...,0
6417,6418,_A post claims a COVID-19 vaccine has already ...,0
6418,6419,Aamir Khan Donate 250 Cr. In PM Relief Cares Fund,0


# Preprocess Data

In [4]:
# !pip install transformers

In [5]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as functional
import matplotlib.pyplot as plt
from transformers import BertForSequenceClassification, AdamW, BertConfig
import gc
from transformers import BertModel
from sklearn.metrics import roc_auc_score,f1_score
import time
import datetime

KeyboardInterrupt: 

In [None]:
data = pd.concat([train_df , val_df], axis=0, ignore_index=True).drop(["id"], axis=1)
data

In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                          do_lower_case=True)

In [None]:
import re
from string import punctuation

def preprocess(data):
    for i in range(data.shape[0]):
        text = data[i].lower()
        text1 = ''.join([ word + " " for word in text.split()])
        data[i] = text1
    giant_url_regex = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
        '[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    mention_regex = '@[\w\-]+'
    hashtag_regex = '#[\w\-]+'
    space_pattern = '\s+'

    samples_count = data.shape[0]

    for i in range(samples_count):
        text_string = data[i]
        parsed_text = re.sub(hashtag_regex, '', text_string)
        parsed_text = re.sub(giant_url_regex, '', parsed_text)
        parsed_text = re.sub(mention_regex, '', parsed_text)

        parsed_text = re.sub(r"[{}]+".format(punctuation), '', parsed_text)
        parsed_text = re.sub(space_pattern, ' ', parsed_text)
        data[i] = parsed_text

    return data

tweets = data.tweet.values
tweets = preprocess(tweets)
print(tweets)

In [None]:
print(' Original: \n', tweets[10])

print('Tokenized: \n ', tokenizer.tokenize(tweets[10]))


print('Token IDs: \n',
      tokenizer.convert_tokens_to_ids(tokenizer.tokenize(tweets[10])))

In [None]:
tweets = data.tweet.values
labels = data.label.values

In [None]:
input_ids = []
attention_masks = []

# Add the encoded sentence And its attention mask to the list.

for tweet in tweets:
    encoded_dict = tokenizer.encode_plus(
                        tweet,
                        add_special_tokens = True,
                        max_length = 128,
                        pad_to_max_length = True,
                        return_attention_mask = True,
                        return_tensors = 'pt',
                   )

    input_ids.append(encoded_dict['input_ids'])
    attention_masks.append(encoded_dict['attention_mask'])

input_ids = torch.cat(input_ids, dim=0)
attention_masks = torch.cat(attention_masks, dim=0)
labels = torch.tensor(labels)

print('Original: ', tweets[10])
print('Token IDs:', input_ids[10])

# Train and Test

In [None]:
from torch.utils.data import TensorDataset, random_split

dataset = TensorDataset(input_ids, attention_masks, labels)

train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size], \
                            generator = torch.Generator().manual_seed(42))

print('{} training   samples'.format(train_size))
print('{} validation samples'.format(val_size))

In [None]:
from torch.utils.data import DataLoader, RandomSampler,\
SequentialSampler

batch_size = 4

train_dataloader = DataLoader(
            train_dataset,
            shuffle = True,
            batch_size = batch_size
        )

validation_dataloader = DataLoader(
            val_dataset,
            shuffle = False,
            batch_size = batch_size
        )

In [None]:
def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

# (1) BERT Model

In [None]:
from transformers import BertForSequenceClassification, AdamW, BertConfig
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# model = BertForSequenceClassification.from_pretrained(
#     "bert-base-uncased",
#     num_labels = 2,
#     output_attentions = False,
#     output_hidden_states = False,
# )

from transformers import BertModel
model = BertModel.from_pretrained("bert-base-uncased", torch_dtype=torch.float32)

model.to(device)

In [None]:
optimizer = AdamW(model.parameters(),
                  lr = 10e-5,
                  eps = 1e-8
                )
epochs = 3
criterion = nn.CrossEntropyLoss()

$ JavaScript $

In [None]:
# function ConnectButton(){
#   console.log("Connect pushed");
#   document.querySelector("#top-toolbar > colab-connectbutton").shadowRoot.querySelector("#connect").click()
# }
# setInterval(ConnectButton , 40000);

# Bert Model

In [None]:
class BertClassifier(nn.Module):
    def __init__(self, model_tune):
        super().__init__()
        self.bert = model_tune
        self.classifier = nn.Linear(768 , 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask, token_type_ids):
        input_ids = input_ids.to(torch.long)
        attention_mask = attention_mask.to(torch.float32)

        if token_type_ids is not None:
            token_type_ids = token_type_ids.to(torch.float32)

        bert_output = self.bert(input_ids = input_ids,
                                attention_mask = attention_mask,
                                token_type_ids = token_type_ids)

        pooler = bert_output.pooler_output
        logits = self.classifier(pooler)
        pred = self.sigmoid(logits)
        pred = (pred >= 0.5)

        return pred

In [None]:
model1 = BertClassifier(model)
model1 = model1.to(torch.float32)

model1.to(device)

for param in model1.parameters():
    param.requires_grad = True

In [None]:
import random
import numpy as np
import torch
import time
import matplotlib.pyplot as plt

seed_val = 42
random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []
total_t0 = time.time()
best_accuracy = 0


train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch_i in range(0, epochs):
    print('Epoch {}'.format(epoch_i + 1))

    t0 = time.time()
    total_train_loss = 0
    total_train_accuracy = 0
    model1.train()

    for step, batch in enumerate(train_dataloader):
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        model1.zero_grad()
        out = model1(input_ids , attention_mask=input_mask , token_type_ids=None)
        loss = out[0]
        logits = out[1]

        total_train_loss += loss.item()
        # print(loss.requires_grad)
        for param in model1.parameters():
            param.requires_grad = True

        optimizer.zero_grad()

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model1.parameters(), 1.0)
        optimizer.step()

        pred = torch.argmax(logits, dim=1)
        pred = (pred >= 0.5).long()

        total_train_accuracy += torch.sum(pred == labels).item()

    avg_train_accuracy = total_train_accuracy / len(train_dataloader.dataset)
    avg_train_loss = total_train_loss / len(train_dataloader.dataset)
    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_accuracy)

    print("Training Accuracy : {}".format(avg_train_accuracy))
    print("Training loss : {}".format(avg_train_loss))
    print('------------------------------------------')

    model1.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    y_true = []
    y_pred = []

    for batch in validation_dataloader:
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        with torch.no_grad():
            out = model1(input_ids, token_type_ids=None, attention_mask=input_mask)
            loss = out[0]
            logits = out[1]

        total_eval_loss += loss.item()
        pred = torch.argmax(logits, dim=1)

        # pred = (pred >= 0.5).long()
        # print(f'pred : {pred}')

        total_eval_accuracy += torch.sum(pred == labels).item()
        y_true.append(labels.flatten())
        y_pred.append(pred.flatten())

    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader.dataset)
    avg_val_loss = total_eval_loss / len(validation_dataloader.dataset)
    val_losses.append(avg_val_loss)
    val_accuracies.append(avg_val_accuracy)

    print("Validation Accuracy: {}".format(avg_val_accuracy))
    print("Validation loss: {}".format(avg_val_loss))
    training_time = format_time(time.time() - t0)
    print('---------------------------------------------------\n\n')

    y_true = torch.cat(y_true).tolist()
    y_pred = torch.cat(y_pred).tolist()
    print("This epoch took: {:}".format(training_time))
    print('roc_auc score: ', roc_auc_score(y_true, y_pred))
    print('F1 score : ', f1_score(y_true, y_pred))
    print()

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Train Accur.': avg_train_accuracy,
            'Training Loss': avg_train_loss,
            'Valid_Loss': avg_val_loss,
            'Valid_Accur.': avg_val_accuracy,
            'Training Time': training_time,
        }
    )
    print()

    if avg_val_accuracy > best_accuracy:
        best_accuracy = avg_val_accuracy
        best_model = model

print()
print("-----------------------------------------------------")
print("Total time {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

In [None]:
epochs_range = range(1, epochs + 1)
plt.figure(figsize=(12, 5))


plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, label='Training Loss')
plt.plot(epochs_range, val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()


plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accuracies, label='Training Accuracy')
plt.plot(epochs_range, val_accuracies, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()

# Test

In [None]:
import random
import numpy as np
import torch
import time
import matplotlib.pyplot as plt

# Assuming you have defined BertClassifier somewhere earlier
model1 = BertClassifier(model)
model1 = model1.to(device)

# Set all the parameters to require gradients
for param in model1.parameters():
    param.requires_grad = True

seed_val = 42
random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []
total_t0 = time.time()
best_accuracy = 0

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

# Define your loss function and optimizer
loss_function = nn.BCELoss()  # Assuming binary classification
optimizer = torch.optim.Adam(model1.parameters(), lr=2e-5)

# Training loop
for epoch_i in range(epochs):
    print('Epoch {}'.format(epoch_i + 1))

    t0 = time.time()
    total_train_loss = 0
    total_train_accuracy = 0
    model1.train()

    for step, batch in enumerate(train_dataloader):
        input_ids = batch[0].to(device).to(torch.long)  # Ensure input IDs are Long
        input_mask = batch[1].to(device).to(torch.float32)  # Ensure mask is Float
        labels = batch[2].to(device).to(torch.float32)  # Ensure labels are Float

        optimizer.zero_grad()  # Clear previous gradients

        # Forward pass
        outputs = model1(input_ids, attention_mask=input_mask, token_type_ids=None)

        # Compute loss
        loss = loss_function(outputs, labels)  # BCELoss expects probabilities

        total_train_loss += loss.item()

        # Backward pass
        loss.backward()  # Compute gradients

        # Clip the gradients to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model1.parameters(), 1.0)

        # Update weights
        optimizer.step()

        # Calculate predictions
        preds = (outputs >= 0.5).long()  # Binary prediction using 0.5 threshold

        # Calculate the accuracy
        total_train_accuracy += torch.sum(preds == labels.long()).item()

    avg_train_accuracy = total_train_accuracy / len(train_dataloader.dataset)
    avg_train_loss = total_train_loss / len(train_dataloader.dataset)
    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_accuracy)

    print("Training Accuracy: {}".format(avg_train_accuracy))
    print("Training Loss: {}".format(avg_train_loss))
    print('------------------------------------------')

    model1.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    y_true = []
    y_pred = []

    for batch in validation_dataloader:
        input_ids = batch[0].to(device).to(torch.long)  # Ensure input IDs are Long
        input_mask = batch[1].to(device).to(torch.float32)  # Ensure mask is Float
        labels = batch[2].to(device).to(torch.float32)  # Ensure labels are Float

        with torch.no_grad():  # Disable gradient computation for evaluation
            outputs = model1(input_ids, attention_mask=input_mask, token_type_ids=None)

            # Compute loss
            loss = loss_function(outputs, labels)  # BCELoss expects probabilities

            total_eval_loss += loss.item()

            # Calculate predictions
            preds = (outputs >= 0.5).long()  # Binary prediction using 0.5 threshold

            total_eval_accuracy += torch.sum(preds == labels.long()).item()
            y_true.append(labels.flatten())
            y_pred.append(preds.flatten())

    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader.dataset)
    avg_val_loss = total_eval_loss / len(validation_dataloader.dataset)
    val_losses.append(avg_val_loss)
    val_accuracies.append(avg_val_accuracy)

    print("Validation Accuracy: {}".format(avg_val_accuracy))
    print("Validation Loss: {}".format(avg_val_loss))
    training_time = format_time(time.time() - t0)
    print('---------------------------------------------------\n\n')

    y_true = torch.cat(y_true).tolist()
    y_pred = torch.cat(y_pred).tolist()
    print("This epoch took: {:}".format(training_time))
    print('roc_auc score: ', roc_auc_score(y_true, y_pred))
    print('F1 score: ', f1_score(y_true, y_pred))
    print()

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Train Accur.': avg_train_accuracy,
            'Training Loss': avg_train_loss,
            'Valid_Loss': avg_val_loss,
            'Valid_Accur.': avg_val_accuracy,
            'Training Time': training_time,
        }
    )
    print()

    if avg_val_accuracy > best_accuracy:
        best_accuracy = avg_val_accuracy
        best_model = model1

print()
print("-----------------------------------------------------")
print("Total time {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


# Plotting training and validation loss/accuracy
epochs_range = range(1, epochs + 1)
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, label='Training Loss')
plt.plot(epochs_range, val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accuracies, label='Training Accuracy')
plt.plot(epochs_range, val_accuracies, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()


# Test 2

In [None]:
# Iterate over the training data
for step, batch in enumerate(train_dataloader):
    # Transfer the batch tensors to the device (GPU or CPU)
    input_ids = batch[0].to(device)
    input_mask = batch[1].to(device)
    labels = batch[2].to(device)

    # Ensure the input tensors are of integer types for models like BERT
    # (This depends on the model you're using; BERT usually takes long tensors)
    print(type(input_ids), type(input_mask), type(labels))

    # Usually, we don't need to set input requires_grad for inputs,
    # only for model parameters and outputs.

    # Forward pass: Get the model's predictions
    outputs = model1(input_ids, attention_mask=input_mask, token_type_ids=None)

    # The outputs typically contain loss in the first element (out[0])
    # and logits in the second element (out[1])
    loss = outputs[0]  # Assuming outputs[0] is the loss

    # Accumulate the loss for logging or other purposes
    total_train_loss += loss.item()

    # Zero the gradients of all model parameters
    optimizer.zero_grad()

    # Perform backpropagation: Compute gradients
    loss.backward()

    # Clip the gradient norm to avoid exploding gradients
    torch.nn.utils.clip_grad_norm_(model1.parameters(), 1.0)

    # Update the model parameters
    optimizer.step()

    # If needed, print the requires_grad status of model parameters
    # for param in model1.parameters():
    #     print(param.requires_grad)


# (2) Bert With Freezing Parameters

In [None]:
model2 = best_model.cuda()
for param in model2.bert.parameters():
    param.requires_grad = False

epochs = 3
learning_rate = 10e-5
optimizer = AdamW(model.parameters(), lr = learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
import random
import numpy as np
import torch
import time
import matplotlib.pyplot as plt

seed_val = 42
random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []
total_t0 = time.time()
best_accuracy = 0


train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch_i in range(0, epochs):
    print('Epoch {}'.format(epoch_i + 1))

    t0 = time.time()
    total_train_loss = 0
    total_train_accuracy = 0
    model2.train()

    for step, batch in enumerate(train_dataloader):
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        model2.zero_grad()
        out = model2(input_ids, token_type_ids=None, attention_mask=input_mask, labels=labels)
        loss = out[0]
        logits = out[1]

        total_train_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        pred = torch.argmax(logits, dim=1)
        total_train_accuracy += torch.sum(pred == labels).item()

    avg_train_accuracy = total_train_accuracy / len(train_dataloader.dataset)
    avg_train_loss = total_train_loss / len(train_dataloader.dataset)
    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_accuracy)

    print("Training Accuracy : {}".format(avg_train_accuracy))
    print("Training loss : {}".format(avg_train_loss))
    print('------------------------------------------')

    model2.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    y_true = []
    y_pred = []

    for batch in validation_dataloader:
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        with torch.no_grad():
            out = model2(input_ids, token_type_ids=None, attention_mask=input_mask, labels=labels)
            loss = out[0]
            logits = out[1]

        total_eval_loss += loss.item()
        pred = torch.argmax(logits, dim=1)
        total_eval_accuracy += torch.sum(pred == labels).item()
        y_true.append(labels.flatten())
        y_pred.append(pred.flatten())

    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader.dataset)
    avg_val_loss = total_eval_loss / len(validation_dataloader.dataset)
    val_losses.append(avg_val_loss)
    val_accuracies.append(avg_val_accuracy)

    print("Validation Accuracy : {}".format(avg_val_accuracy))
    print("Validation Loss : {}".format(avg_val_loss))
    training_time = format_time(time.time() - t0)
    print('---------------------------------------------------\n\n')

    y_true = torch.cat(y_true).tolist()
    y_pred = torch.cat(y_pred).tolist()
    print("This epoch took: {:}".format(training_time))
    print('roc_auc score: ', roc_auc_score(y_true, y_pred))
    print('F1 score : ', f1_score(y_true, y_pred))
    print()

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Train Accur.': avg_train_accuracy,
            'Training Loss': avg_train_loss,
            'Valid_Loss': avg_val_loss,
            'Valid_Accur.': avg_val_accuracy,
            'Training Time': training_time,
        }
    )
    print()

    if avg_val_accuracy > best_accuracy:
        best_accuracy = avg_val_accuracy
        best_model = model

print()
print("-----------------------------------------------------")
print("Total time {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


epochs_range = range(1, epochs + 1)
plt.figure(figsize=(12, 5))


plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, label='Training Loss')
plt.plot(epochs_range, val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()


plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accuracies, label='Training Accuracy')
plt.plot(epochs_range, val_accuracies, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()

# (3) Bert BiGRU

In [None]:
class BertGRUClassifier(nn.Module):
    def __init__(self, model_tune):
        super().__init__()
        self.bert = model_tune
        self.gru = nn.GRU(  input_size = 768,
                            hidden_size = 128,
                            num_layers = 1,
                            batch_first = True,
                            bidirectional = True)
        self.classifier = nn.Linear(128 * 2, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask, token_type_ids):
        bert_output = self.bert(input_ids = input_ids,
                                attention_mask = attention_mask,
                                token_type_ids = token_type_ids)

        out , hidden = self.gru(bert_output)
        concatenated = torch.cat(hidden[0 , : , :] , hidden[1 , : , :] , dim = 1)

        logits = self.classifier(concatenated)
        return self.sigmoid(logits)

In [None]:
import random
import numpy as np
import time
import torch
import matplotlib.pyplot as plt
from torch.optim import AdamW
from torch import nn
from sklearn.metrics import roc_auc_score, f1_score

seed_val = 42

random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []
total_t0 = time.time()
best_accuracy = 0

model4 = BertGRUClassifier(model).cuda()
epochs = 3
learning_rate = 10e-5
optimizer = AdamW(model4.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch_i in range(0, epochs):
    print('Epoch {}'.format(epoch_i + 1))

    t0 = time.time()
    total_train_loss = 0
    total_train_accuracy = 0
    model4.train()

    for step, batch in enumerate(train_dataloader):
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        model4.zero_grad()
        out = model4(input_ids=input_ids, attention_mask=input_mask, token_type_ids=None)

        loss = criterion(out, labels)
        total_train_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model4.parameters(), 1.0)
        optimizer.step()

        pred = torch.argmax(out, dim=1)
        total_train_accuracy += torch.sum(pred == labels).item()

    avg_train_accuracy = total_train_accuracy / len(train_dataloader.dataset)
    avg_train_loss = total_train_loss / len(train_dataloader.dataset)
    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_accuracy)

    print("Training Accuracy : {}".format(avg_train_accuracy))
    print("Training Loss : {}".format(avg_train_loss))
    print('-----------------------------------------------')

    model4.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    y_true = []
    y_pred = []

    for batch in validation_dataloader:
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        with torch.no_grad():
            out = model4(input_ids=input_ids, attention_mask=input_mask, token_type_ids=None)

        loss = criterion(out, labels)
        total_eval_loss += loss.item()
        pred = torch.argmax(out, dim=1)
        total_eval_accuracy += torch.sum(pred == labels).item()
        y_true.append(labels.flatten())
        y_pred.append(pred.flatten())

    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader.dataset)
    avg_val_loss = total_eval_loss / len(validation_dataloader.dataset)
    val_losses.append(avg_val_loss)
    val_accuracies.append(avg_val_accuracy)

    print("Validation Accuracy : {}".format(avg_val_accuracy))
    print("Validation Loss : {}".format(avg_val_loss))

    training_time = format_time(time.time() - t0)
    print("This epoch took: {:}".format(training_time))
    print('-------------------------------------------------')

    y_true = torch.cat(y_true).tolist()
    y_pred = torch.cat(y_pred).tolist()
    print('roc_auc score : ', roc_auc_score(y_true, y_pred))
    print('F1 score : ', f1_score(y_true, y_pred))

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Train Accur.': avg_train_accuracy,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
        }
    )

    if avg_val_accuracy > best_accuracy:
        best_accuracy = avg_val_accuracy
        best_model = model4

print("-----------------------------------------------------------------------")
print("Total time {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))
print('Best Accuracy : ', best_accuracy)

epochs_range = range(1, epochs + 1)
plt.figure(figsize=(12, 5))


plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, label='Training Loss')
plt.plot(epochs_range, val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()


plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accuracies, label='Training Accuracy')
plt.plot(epochs_range, val_accuracies, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()

#(4) Bert BiGRU With Freeze Parameters

In [None]:
# class BertGRUClassifierWithFreeze(nn.Module):
#     def __init__(self, model_tune):
#         super().__init__()
#         self.bert = model_tune.bert
#         self.gru = nn.LSTM(input_size = 768,
#                             hidden_size = 768,
#                             num_layers = 2,
#                             batch_first = True,
#                             bidirectional = True)
#         self.classifier = nn.Linear(768 * 2, 2)
#         self.softmax = nn.Softmax(dim = 1)

#     def forward(self, input_ids, attention_mask, token_type_ids):
#         bert_output = self.bert(input_ids = input_ids,
#                                 attention_mask = attention_mask,
#                                 token_type_ids = token_type_ids)
#         out, _ = self.gru(bert_output[0])
#         logits = self.classifier(out[:, 1, :])
#         return self.softmax(logits)

In [None]:
model5 = BertGRUClassifierWithFreeze(best_model).cuda()
for param in model5.bert.parameters():
    param.requires_grad = False

epochs = 3
learning_rate = 10e-5
optimizer = AdamW(model5.parameters(), lr = learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
import random
import numpy as np
import time
import torch
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score, f1_score

seed_val = 42
random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []
total_t0 = time.time()
best_accuracy = 0

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch_i in range(0, epochs):
    print('Epoch {}'.format(epoch_i + 1))

    t0 = time.time()
    total_train_loss = 0
    total_train_accuracy = 0
    model5.train()

    for step, batch in enumerate(train_dataloader):
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        model5.zero_grad()
        out = model5(input_ids=input_ids, attention_mask=input_mask, token_type_ids=None)
        loss = criterion(out, labels)
        total_train_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model5.parameters(), 1.0)
        optimizer.step()

        pred = torch.argmax(out, dim=1)
        total_train_accuracy += torch.sum(pred == labels).item()

    avg_train_accuracy = total_train_accuracy / len(train_dataloader.dataset)
    avg_train_loss = total_train_loss / len(train_dataloader.dataset)
    train_losses.append(avg_train_loss)
    train_accuracies.append(avg_train_accuracy)

    print("Training Accuracy : {}".format(avg_train_accuracy))
    print("Training Loss : {}".format(avg_train_loss))

    model5.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    y_true = []
    y_pred = []

    for batch in validation_dataloader:
        input_ids = batch[0].to(device)
        input_mask = batch[1].to(device)
        labels = batch[2].to(device)

        with torch.no_grad():
            out = model5(input_ids=input_ids, attention_mask=input_mask, token_type_ids=None)
        loss = criterion(out, labels)
        total_eval_loss += loss.item()
        pred = torch.argmax(out , dim = 1)
        total_eval_accuracy += torch.sum(pred == labels).item()
        y_true.append(labels.flatten())
        y_pred.append(pred.flatten())

    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader.dataset)
    avg_val_loss = total_eval_loss / len(validation_dataloader.dataset)
    val_losses.append(avg_val_loss)
    val_accuracies.append(avg_val_accuracy)

    print("Validation Accuracy : {}".format(avg_val_accuracy))
    print("Validation Loss : {}".format(avg_val_loss))
    training_time = format_time(time.time() - t0)

    print("This epoch took: {}".format(training_time))
    print('------------------------------------------------------')
    y_true = torch.cat(y_true).tolist()
    y_pred = torch.cat(y_pred).tolist()
    print('roc_auc score : ', roc_auc_score(y_true, y_pred))
    print('F1 score : ', f1_score(y_true, y_pred))

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Train Accur.': avg_train_accuracy,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
        }
    )

    if avg_val_accuracy > best_accuracy:
        best_accuracy = avg_val_accuracy
        best_model = model5

print("-------------------------------------------------------")
print("Total time {:} (h:mm:ss)".format(format_time(time.time() - total_t0)))
print('best acc:', best_accuracy)

epochs_range = range(1, epochs + 1)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_losses, label='Training Loss')
plt.plot(epochs_range, val_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_accuracies, label='Training Accuracy')
plt.plot(epochs_range, val_accuracies, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.show()