# Imports

In [1]:
import torch
from torch.utils.data import DataLoader
from torch import nn

from transformers import BertTokenizer, BertModel

import pandas as pd

import numpy as np

from tqdm import tqdm

from NNs import BERTFinetune, TextDataSet

from sklearn.model_selection import train_test_split

%load_ext autoreload
%autoreload 2

# Loading Tweets

In [2]:
data = pd.read_csv('data/train.csv')

dataset = data[['text', 'target']]

print(dataset.head())

                                                text  target
0  Our Deeds are the Reason of this #earthquake M...       1
1             Forest fire near La Ronge Sask. Canada       1
2  All residents asked to 'shelter in place' are ...       1
3  13,000 people receive #wildfires evacuation or...       1
4  Just got sent this photo from Ruby #Alaska as ...       1


# Creating Datasets

In [3]:
max_len = 512
encoder = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

train_x, test_x, train_y, test_y = train_test_split(dataset['text'].tolist(), dataset['target'].tolist(), test_size=0.2)

full_dataset = TextDataSet(dataset['text'].tolist(), dataset['target'].tolist(), encoder, max_len)
train_dataset = TextDataSet(train_x, train_y, encoder, max_len)
test_dataset = TextDataSet(test_x, test_y, encoder, max_len)

full_loader = DataLoader(full_dataset, batch_size=64, shuffle=False, num_workers=4, pin_memory=True)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4, pin_memory=True)

## Setting which layers are Fine Tuned

In [4]:
model = BERTFinetune('bert-base-uncased', 2, 0.1)

## Turning off gradients except for the last layer of BERT

attention_heads = ['pool', '8', '9', '10', '11']

for name, param in model.named_parameters():
    if name.startswith('bert'):
        if  not any(x in name for x in attention_heads):
            param.requires_grad = False
    print(name, param.requires_grad)

#for name, param in model.named_parameters():
    #if name.startswith('bert'):
        #if 'pooler' not in name:
            #param.requires_grad = False
    #print(name, param.requires_grad)

bert.embeddings.word_embeddings.weight False
bert.embeddings.position_embeddings.weight False
bert.embeddings.token_type_embeddings.weight False
bert.embeddings.LayerNorm.weight False
bert.embeddings.LayerNorm.bias False
bert.encoder.layer.0.attention.self.query.weight False
bert.encoder.layer.0.attention.self.query.bias False
bert.encoder.layer.0.attention.self.key.weight False
bert.encoder.layer.0.attention.self.key.bias False
bert.encoder.layer.0.attention.self.value.weight False
bert.encoder.layer.0.attention.self.value.bias False
bert.encoder.layer.0.attention.output.dense.weight False
bert.encoder.layer.0.attention.output.dense.bias False
bert.encoder.layer.0.attention.output.LayerNorm.weight False
bert.encoder.layer.0.attention.output.LayerNorm.bias False
bert.encoder.layer.0.intermediate.dense.weight False
bert.encoder.layer.0.intermediate.dense.bias False
bert.encoder.layer.0.output.dense.weight False
bert.encoder.layer.0.output.dense.bias False
bert.encoder.layer.0.output.Lay

## Training

In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)
model.finetune()

torch.cuda.empty_cache()

epochs = 5

for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        y = batch['labels'].to(device)
        optimizer.zero_grad()
        output = model(input_ids, attention_mask)
        preds = torch.argmax(output, dim=1)
        correct += (preds == y).sum().item()
        total += y.size(0)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch: {epoch}, Loss: {total_loss}, Accuracy: {correct/total}')

    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            y = batch['labels'].to(device)
            output = model(input_ids, attention_mask)
            preds = torch.argmax(output, dim=1)
            correct += (preds == y).sum().item()
            total += y.size(0)
        print(f'Accuracy: {correct/total}')

100%|██████████| 381/381 [03:26<00:00,  1.85it/s]

Epoch: 0, Loss: 181.18653206527233, Accuracy: 0.7794745484400657





Accuracy: 0.8384766907419566


100%|██████████| 381/381 [03:35<00:00,  1.77it/s]

Epoch: 1, Loss: 147.16738249361515, Accuracy: 0.8343185550082102





Accuracy: 0.8305975049244911


100%|██████████| 381/381 [03:37<00:00,  1.76it/s]

Epoch: 2, Loss: 135.4189364463091, Accuracy: 0.8474548440065681





Accuracy: 0.8279711096520026


100%|██████████| 381/381 [03:37<00:00,  1.76it/s]

Epoch: 3, Loss: 124.8157116100192, Accuracy: 0.8666666666666667





Accuracy: 0.8358502954694682


100%|██████████| 381/381 [03:35<00:00,  1.77it/s]

Epoch: 4, Loss: 114.08158492296934, Accuracy: 0.8770114942528736





Accuracy: 0.8430728824688115


## Creating Embeddings for fine tuned output

In [6]:
model.feature_extractor()
model.eval()

data_features = []

with torch.no_grad():
    for batch in tqdm(full_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        output = model(input_ids, attention_mask)
        data_features.append(output)

data_features = torch.cat(data_features, dim=0)

new_data = pd.DataFrame(data_features.cpu().numpy())

new_data['target'] = dataset['target']
new_data['text'] = dataset['text']

print(new_data.head())

new_data.to_csv('data/processed_fine_encode4_pool_bert.csv', index=False)



100%|██████████| 119/119 [02:37<00:00,  1.32s/it]


          0         1         2         3         4         5         6  \
0 -0.706173 -0.479326  0.695775  0.105923  0.223339 -0.020102  0.166236   
1 -0.292061 -0.203339 -0.012935 -0.440923  0.521172  0.085015  0.007540   
2 -0.131100  0.053226 -0.548904 -0.121370  0.658592 -0.089726 -0.297121   
3  0.128701  0.005418  0.173510 -0.634204  0.039833  0.456838 -0.580948   
4 -0.220905 -0.434828 -0.033333 -0.451370  0.842869  0.286226 -0.604444   

          7         8         9  ...       760       761       762       763  \
0 -0.118495  0.227647 -0.517053  ... -0.092164  0.139747 -0.496852  0.922823   
1 -0.240759 -0.673398  0.952635  ... -0.586681 -0.748257 -0.970952  0.007169   
2 -0.192829 -0.740907 -0.001359  ...  0.929475 -0.510744 -0.698830  0.517608   
3 -0.463875 -0.434695  0.994325  ... -0.331720 -0.740078 -0.848269  0.427743   
4 -0.219417 -0.062626  0.475039  ...  0.892794 -0.146139 -0.736431  0.861435   

        764       765       766       767  target  \
0 -0.391260  0.