In [1]:
!pip install tensorflow_io
!pip install keras-tuner
!pip install print_schema
!pip install pydub
!pip install opensmile
!pip install tqdm boto3 requests regex sentencepiece sacremoses
!pip install transformers
!pip install wandb

Collecting tensorflow_io
  Using cached tensorflow_io-0.26.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (25.9 MB)
Collecting tensorflow-io-gcs-filesystem==0.26.0
  Using cached tensorflow_io_gcs_filesystem-0.26.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.4 MB)
Installing collected packages: tensorflow-io-gcs-filesystem, tensorflow_io
Successfully installed tensorflow-io-gcs-filesystem-0.26.0 tensorflow_io-0.26.0
Collecting keras-tuner
  Using cached keras_tuner-1.1.3-py3-none-any.whl (135 kB)
Collecting kt-legacy
  Using cached kt_legacy-1.0.4-py3-none-any.whl (9.6 kB)
Collecting tensorboard
  Using cached tensorboard-2.10.0-py3-none-any.whl (5.9 MB)
Collecting absl-py>=0.4
  Using cached absl_py-1.2.0-py3-none-any.whl (123 kB)
Collecting tensorboard-plugin-wit>=1.6.0
  Using cached tensorboard_plugin_wit-1.8.1-py3-none-any.whl (781 kB)
Collecting werkzeug>=1.0.1
  Using cached Werkzeug-2.2.2-py3-none-any.whl (232 kB)
Collecting protobuf<3.20,>=3.9.2
 

In [2]:
import numpy as np
import pandas as pd
import gc

In [3]:
import torch
import torchvision
import torch.nn as nn
from transformers import BertModel
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader
import random
import time
import wandb

class ADdataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels)
    
# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self, archs,dropout,freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 256, 2

        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        # Instantiate an one-layer feed-forward classifier
        
        layers = []          
        layers.append(nn.Linear(D_in, archs[0]))
        layers.append(nn.BatchNorm1d(num_features=archs[0]))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))
          
        for i in range(1,len(archs)):
          layers.append(nn.Linear(archs[i-1], archs[i]))
          layers.append(nn.BatchNorm1d(num_features=archs[i]))
          layers.append(nn.ReLU())
          layers.append(nn.Dropout(dropout))

        layers.append(nn.Linear(archs[-1], 1))
        self.classifier  = nn.Sequential(*layers)

        """
        for i in range(number_of_layers-1):
          layers.append(nn.Linear(fc_layer_size, fc_layer_size))
          layers.append(nn.BatchNorm1d(num_features=fc_layer_size))
          layers.append(nn.ReLU())
          layers.append(nn.Dropout(dropout))

        layers.append(nn.Linear(fc_layer_size, 1))
        self.classifier  = nn.Sequential(*layers)
        """

        """
        self.classifier = nn.Sequential(
            

            nn.Linear(D_in, fc_layer_size),
            nn.BatchNorm1d(num_features=fc_layer_size), ## kk
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(fc_layer_size, 1),
            #nn.BatchNorm1d(num_features=128),
            #nn.ReLU(),
            #nn.Linear(128, 1)
            
            #nn.BatchNorm1d(num_features=128), ###
            #nn.ReLU(),
            
            #nn.Dropout(0.5),

            #nn.Linear(H, D_out)
            #nn.Linear(128, 2)
            #nn.Linear(128, 1)
        )
        """
        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits
    
def initialize_model(archs,dropout,train_loader,epochs):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(archs,dropout,freeze_bert=False)

    # Tell PyTorch to run the model on GPU
    bert_classifier.to(device)

    # Create the optimizer
    optimizer = torch.optim.Adam(bert_classifier.parameters(),
                      lr=5e-5,    # Default learning rate
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_loader) * epochs

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0, # Default value
                                                num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler


import random
import time

# Specify loss function
#loss_fn = nn.CrossEntropyLoss()
#loss_fn = nn.BCELoss()
loss_fn =  torch.nn.BCEWithLogitsLoss()
def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train_epoch(model, train_loader,val_loader, optim,scheduler):
      model.train()
      # For each batch of training data...
      train_loss_sum = 0
      train_accuracy_epoch = 0
      for batch in (train_loader):
          optim.zero_grad()
          input_ids = batch['input_ids'].to(device)
          attention_mask = batch['attention_mask'].to(device)
          labels = batch['labels'].to(device)

          # Perform a forward pass. This will return logits.
          logits = model(input_ids, attention_mask)

          # Compute loss and accumulate the loss values
          logits = logits.reshape(-1) #silebilirsin
          
          loss = loss_fn(logits, labels.float())
          train_loss_sum += loss.item()

          logits_class = logits > 0.5

          train_acc = (labels == logits_class).sum().item() / labels.size(0)
          train_accuracy_epoch += train_acc

          # Perform a backward pass to calculate gradients
          loss.backward()
          # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
          torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
          # Update parameters and the learning rate
          optim.step()
          scheduler.step()
          #wandb.log({"batch loss": loss.item()})

      model.eval()
      # Tracking variables
      val_loss_sum = 0
      val_accuracy_epoch = 0
      # For each batch in our validation set...
      for batch in val_loader:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Compute logits
        with torch.no_grad():
          logits = model(input_ids, attention_mask)

          # Compute loss
          logits = logits.reshape(-1)
          loss = loss_fn(logits, labels.float())
          val_loss_sum +=loss.item()

          logits_class = logits > 0.5
          val_acc = (labels == logits_class).sum().item() / labels.size(0)
          val_accuracy_epoch += val_acc


      return model,(train_loss_sum / len(train_loader)),(train_accuracy_epoch/len(train_loader)),(val_loss_sum / len(val_loader)),(val_accuracy_epoch/len(val_loader))



def train(model, train_loader, optim, val_loader=None, epochs=50, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    train_loss_list = []
    val_loss_list = []
    train_acc_list = []
    val_acc_list = [] 
    for epoch_i in range(epochs):
        train_loss_sum = 0
        train_accuracy_epoch = 0
        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for batch in (train_loader):
            optim.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Perform a forward pass. This will return logits.
            logits = model(input_ids, attention_mask)

            # Compute loss and accumulate the loss values
            logits = logits.reshape(-1) #silebilirsin
            
            loss = loss_fn(logits, labels.float())
            train_loss_sum += loss.item()

            logits_class = logits > 0.5
            train_acc = (labels == logits_class).sum().item() / labels.size(0)
            train_accuracy_epoch += train_acc

            # Perform a backward pass to calculate gradients
            loss.backward()


            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)


            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

        avg_train_loss = np.round(train_loss_sum/len(train_loader),2)
        avg_train_acc = np.round(train_accuracy_epoch/len(train_loader),2)

        if evaluation == True:
            avg_val_loss, avg_val_acc = evaluate(model, val_loader)
        print('Epoch {}, train loss {} , val loss is {}, train acc is {}, val acc is {} '.format(epoch_i,avg_train_loss,avg_val_loss,avg_train_acc,avg_val_acc))
        train_loss_list.append(avg_train_loss)
        val_loss_list.append(avg_val_loss)
        val_acc_list.append(avg_val_acc)
        train_acc_list.append(avg_train_acc)

    print("Training complete!")
    return model,train_loss_list,val_loss_list,train_acc_list,val_acc_list


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_loss_sum = 0
    val_accuracy_epoch = 0
    # For each batch in our validation set...
    for batch in val_dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)
      
      # Compute logits
      with torch.no_grad():
        logits = model(input_ids, attention_mask)

        # Compute loss
        logits = logits.reshape(-1)
        loss = loss_fn(logits, labels.float())
        val_loss_sum +=loss.item()
        avg_val_loss = np.round(val_loss_sum/len(val_dataloader),2)

        logits_class = logits > 0.5
        val_acc = (labels == logits_class).sum().item() / labels.size(0)
        val_accuracy_epoch += val_acc
        avg_val_acc = np.round(val_accuracy_epoch/len(val_dataloader),2)

    return avg_val_loss, avg_val_acc


def evaluate_test(model, test_dataloader):
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    test_loss_sum = 0
    test_accuracy_epoch = 0
    predictions = []
    labels_list = []
    for batch in test_dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)
      
      # Compute logits
      with torch.no_grad():
        logits = model(input_ids, attention_mask)
        labels_list.append(labels.cpu())

        # Compute loss
        logits = logits.reshape(-1)
        loss = loss_fn(logits, labels.float())
        test_loss_sum +=loss.item()
        avg_test_loss = np.round(test_loss_sum/len(test_dataloader),5)

        logits_class = logits > 0.5
        predictions.append(logits.cpu())
        test_acc = (labels == logits_class).sum().item() / labels.size(0)
        test_accuracy_epoch += test_acc
        avg_test_acc = np.round(test_accuracy_epoch/len(test_dataloader),5)

    return avg_test_loss, avg_test_acc, predictions, labels_list


def evaluate_ensemble(models, test_dataloader):
    test_loss_sum = 0
    test_accuracy_epoch = 0
    predictions = []
    labels_list = []
    for batch in test_dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      prediction = []
      for model in models:
        model.eval()
        # Compute logits
        with torch.no_grad():
          logits = model(input_ids, attention_mask)
          labels_list.append(labels.cpu())

          logits = logits.reshape(-1)
          logits_class = logits > 0.5
          prediction.append(logits_class)

      prediction_ensemble = sum(prediction) > 0.5*len(prediction)
      predictions.append(prediction_ensemble.cpu())

      test_acc = (labels == prediction_ensemble).sum().item() / labels.size(0)
      test_accuracy_epoch += test_acc
      avg_test_acc = np.round(test_accuracy_epoch/len(test_dataloader),5)

    return avg_test_acc, predictions, labels_list

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased')

Using cache found in /home/jupyter/.cache/torch/hub/huggingface_pytorch-transformers_main


In [5]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [23]:
df = pd.read_csv("data/test.csv")
df["Content"].fillna("",inplace=True)
test_texts = list(df.loc[:,"Content"])
test_labels = list(df.loc[:,"Label"])

In [24]:
test_encodings = tokenizer(test_texts, truncation=True, padding=True)
test_dataset = ADdataset(test_encodings, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [6]:
from transformers import logging
logging.set_verbosity_error()

In [28]:
from glob import glob
from tqdm import tqdm
filenames = glob("weights/*.pt")

In [30]:
results = []
for i in tqdm(range(0,100,10)):
    model = []
    for filename in filenames:
        seed = filename.split("seed-")[-1].split("_")[0]
        # index = filename.split("index-")[-1].split("_")[0]
        if int(seed) == i:
            model.append(torch.load(filename))
            if len(model) == 10:
                break
    # print(len(model))
    avg_test_acc, predictions, labels_list = evaluate_ensemble(model,test_loader)
    results.append(avg_test_acc)

100%|██████████| 10/10 [08:57<00:00, 53.73s/it]


In [31]:
print(min(results))
print(max(results))
print(np.mean(results))
print(np.std(results))

0.81786
0.88393
0.855002
0.01967152195433795


In [32]:
index_results = []

In [33]:
for i in tqdm(range(1,4)):
    model = []
    for filename in filenames:
        # seed = filename.split("seed-")[-1].split("_")[0]
        index = filename.split("index-")[-1].split("_")[0]
        if int(index) == i:
            model.append(torch.load(filename))
            if len(model) == 10:
                break
    # print(len(model))
    avg_test_acc, predictions, labels_list = evaluate_ensemble(model,test_loader)
    index_results.append(avg_test_acc)

100%|██████████| 3/3 [02:22<00:00, 47.35s/it]


In [35]:
for i in tqdm(range(1,4)):
    model = []
    for filename in reversed(filenames):
        # seed = filename.split("seed-")[-1].split("_")[0]
        index = filename.split("index-")[-1].split("_")[0]
        if int(index) == i:
            model.append(torch.load(filename))
            if len(model) == 10:
                break
    # print(len(model))
    avg_test_acc, predictions, labels_list = evaluate_ensemble(model,test_loader)
    index_results.append(avg_test_acc)

100%|██████████| 3/3 [02:39<00:00, 53.02s/it]


In [34]:
print(min(index_results))
print(max(index_results))
print(np.mean(index_results))
print(np.std(index_results))

0.85536
0.88393
0.8744066666666667
0.013468027158999767


In [36]:
print(min(index_results))
print(max(index_results))
print(np.mean(index_results))
print(np.std(index_results))

0.84286
0.88393
0.8675616666666667
0.016890335912059937


In [37]:
seed_results = []
for i in tqdm(range(0,100,10)):
    model = []
    for filename in filenames:
        seed = filename.split("seed-")[-1].split("_")[0]
        # index = filename.split("index-")[-1].split("_")[0]
        if int(seed) == i:
            model.append(torch.load(filename))
    # print(len(model))
    avg_test_acc, predictions, labels_list = evaluate_ensemble(model,test_loader)
    seed_results.append(avg_test_acc)

100%|██████████| 10/10 [23:50<00:00, 143.02s/it]


In [38]:
print(min(seed_results))
print(max(seed_results))
print(np.mean(seed_results))
print(np.std(seed_results))

0.84643
0.88393
0.873573
0.012000293371413881
