<a href="https://colab.research.google.com/github/Jacob-Lewis/athena/blob/flags/lab_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers
# Importing the libraries needed
import pandas as pd
import torch
import transformers
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertModel, DistilBertTokenizer



In [47]:
!pip install wandb --upgrade
import wandb

wandb.login()
wandb.init(project="pytorch-bert")

Requirement already up-to-date: wandb in /usr/local/lib/python3.7/dist-packages (0.10.25)




VBox(children=(Label(value=' 0.01MB of 0.01MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epoch,0.0
loss,1.41122
accuracy,86.65418
_runtime,774.0
_timestamp,1618289899.0
_step,1101.0


0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁
loss,█▄▃▃▂▂▂▂▁▁▁▁
accuracy,▁███████████
_runtime,▁▁▂▂▃▃▃▄▄▅▅█
_timestamp,▁▁▂▂▃▃▃▄▄▅▅█
_step,▁▂▂▃▄▄▅▅▆▇▇█


In [25]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [27]:
# Import the csv into pandas dataframe and add the headers
df = pd.read_csv('https://raw.githubusercontent.com/ak-tpsy/telemeet/main/Data/telemeet_dataset.csv?token=ADCTZ3EMQY6H5MTTBIUEH6LAOURC6', names = ['label', 'utterance'])
df.head()

Unnamed: 0,label,utterance
0,8,I am the psychiatrist here in this department.
1,8,I came to see you because my GP sent me to see...
2,8,"Yeah, yeah."
3,8,He said come and see a trick cyclist cuz then ...
4,8,Come and see us a trick cyclist a psychiatrist.


In [41]:
MAX_LEN = 128
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 4
EPOCHS = 5
LEARNING_RATE = 1e-05
WEIGHTS = torch.tensor([.3,.3,.3,.3,.3,.3,.3,.3,.004]) #Weights for the classes. set to 1/number of samples in class
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')

In [42]:
class Triage(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __getitem__(self, index):
        title = str(self.data.utterance[index])
        title = " ".join(title.split()) #Concerns here
        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True,
            truncation=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'targets': torch.tensor(self.data.label[index], dtype=torch.long)
        } 
    
    def __len__(self):
        return self.len

In [43]:
# Creating the dataset and dataloader for the neural network

train_size = 0.86
train_dataset=df.sample(frac=train_size,random_state=200)
test_dataset=df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

#Holdout validation isn't necessary
print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))


training_set = Triage(train_dataset, tokenizer, MAX_LEN)
testing_set = Triage(test_dataset, tokenizer, MAX_LEN)


FULL Dataset: (22018, 2)
TRAIN Dataset: (18935, 2)
TEST Dataset: (3083, 2)


In [48]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [49]:
# Creating the customized model, by adding a drop out and a dense layer on top of distil bert to get the final output for the model. 
class DistillBERTClass(torch.nn.Module):
    def __init__(self):
        super(DistillBERTClass, self).__init__()
        self.l1 = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 9) #For 9 classes

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

In [50]:
model = DistillBERTClass()
model.to(device)

DistillBERTClass(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(in_feat

In [51]:
# Creating the loss function and optimizer
loss_function = torch.nn.CrossEntropyLoss(weight=WEIGHTS).cuda()
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [52]:
# Function to calcuate the accuracy of the model

def calcuate_accu(big_idx, targets):
    n_correct = (big_idx==targets).sum().item()
    return n_correct

In [53]:
# Defining the training function on the 80% of the dataset for tuning the distilbert model
wandb.watch(model, loss_function, log="all", log_freq=10) #Does this need to be in the train loop?

def train(epoch):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model.train()
    for _,data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.long)

        outputs = model(ids, mask)
        loss = loss_function(outputs, targets)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.data, dim=1)
        n_correct += calcuate_accu(big_idx, targets)

        nb_tr_steps += 1
        nb_tr_examples+=targets.size(0)
        
        if _%100==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples 
            wandb.log({"epoch": epoch, "loss": loss_step}, step=nb_tr_steps)
            wandb.log({"epoch": epoch, "accuracy": accu_step}, step=nb_tr_steps)
            print(f"Training Loss per 100 steps: {loss_step}")
            print(f"Training Accuracy per 100 steps: {accu_step}")

        optimizer.zero_grad()
        loss.backward()
        # # When using GPU
        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}")

    return 

In [54]:
for epoch in range(EPOCHS):
    train(epoch)



Training Loss per 100 steps: 2.2780652046203613
Training Accuracy per 100 steps: 6.25
Training Loss per 100 steps: 1.9918599164131845
Training Accuracy per 100 steps: 48.886138613861384
Training Loss per 100 steps: 1.9042255896240918
Training Accuracy per 100 steps: 44.309701492537314
Training Loss per 100 steps: 1.8625335828014387
Training Accuracy per 100 steps: 45.6187707641196
Training Loss per 100 steps: 1.827962028118143
Training Accuracy per 100 steps: 44.17082294264339
Training Loss per 100 steps: 1.8004087183527842
Training Accuracy per 100 steps: 42.365269461077844
Training Loss per 100 steps: 1.789513750203239
Training Accuracy per 100 steps: 40.45341098169717
Training Loss per 100 steps: 1.7798692355312395
Training Accuracy per 100 steps: 41.07524964336662
Training Loss per 100 steps: 1.7734280448132538
Training Accuracy per 100 steps: 40.09051186017478
Training Loss per 100 steps: 1.7650297968818927
Training Accuracy per 100 steps: 40.885127635960046
Training Loss per 100 



The Total Accuracy for Epoch 0: 43.18986004753103
Training Loss Epoch: 1.7370708092443041
Training Accuracy Epoch: 43.18986004753103
Training Loss per 100 steps: 1.570237636566162
Training Accuracy per 100 steps: 68.75




Training Loss per 100 steps: 1.6864500972304013
Training Accuracy per 100 steps: 54.20792079207921




Training Loss per 100 steps: 1.6054721287826994
Training Accuracy per 100 steps: 59.82587064676617




Training Loss per 100 steps: 1.6473695617973607
Training Accuracy per 100 steps: 55.357142857142854




Training Loss per 100 steps: 1.6503316580208758
Training Accuracy per 100 steps: 56.95137157107232




Training Loss per 100 steps: 1.638720361534469
Training Accuracy per 100 steps: 56.599301397205586




Training Loss per 100 steps: 1.6385017050284514
Training Accuracy per 100 steps: 56.260399334442596




Training Loss per 100 steps: 1.6236884580359139
Training Accuracy per 100 steps: 56.94543509272468




Training Loss per 100 steps: 1.6226979337083862
Training Accuracy per 100 steps: 56.31242197253433




Training Loss per 100 steps: 1.6254175229289556
Training Accuracy per 100 steps: 56.132075471698116




Training Loss per 100 steps: 1.625068387249252
Training Accuracy per 100 steps: 56.52472527472528
Training Loss per 100 steps: 1.6230210549283526
Training Accuracy per 100 steps: 55.98319709355132




The Total Accuracy for Epoch 1: 56.04964351729601
Training Loss Epoch: 1.617159241810441
Training Accuracy Epoch: 56.04964351729601
Training Loss per 100 steps: 1.6107500791549683
Training Accuracy per 100 steps: 81.25




Training Loss per 100 steps: 1.531242328115029
Training Accuracy per 100 steps: 53.40346534653465




Training Loss per 100 steps: 1.521575667223527
Training Accuracy per 100 steps: 57.493781094527364




Training Loss per 100 steps: 1.5597579202976735
Training Accuracy per 100 steps: 54.56810631229236




Training Loss per 100 steps: 1.5608484482676013
Training Accuracy per 100 steps: 53.78740648379053




Training Loss per 100 steps: 1.5667166156088284
Training Accuracy per 100 steps: 54.815369261477045




Training Loss per 100 steps: 1.5665859609296833
Training Accuracy per 100 steps: 54.64850249584027




Training Loss per 100 steps: 1.56240630485702
Training Accuracy per 100 steps: 55.13552068473609




Training Loss per 100 steps: 1.5566580556379572
Training Accuracy per 100 steps: 55.74282147315855




Training Loss per 100 steps: 1.5488785562187135
Training Accuracy per 100 steps: 56.51359600443951




Training Loss per 100 steps: 1.5471377017852905
Training Accuracy per 100 steps: 56.56843156843157
Training Loss per 100 steps: 1.5434497007990187
Training Accuracy per 100 steps: 55.79586739327884




The Total Accuracy for Epoch 2: 56.02851861631898
Training Loss Epoch: 1.5375288217454344
Training Accuracy Epoch: 56.02851861631898
Training Loss per 100 steps: 1.889532446861267
Training Accuracy per 100 steps: 50.0




Training Loss per 100 steps: 1.4462061164402726
Training Accuracy per 100 steps: 64.48019801980197




Training Loss per 100 steps: 1.4710713613092603
Training Accuracy per 100 steps: 60.72761194029851




Training Loss per 100 steps: 1.4956428866053737
Training Accuracy per 100 steps: 58.45099667774086




Training Loss per 100 steps: 1.4590779421038165
Training Accuracy per 100 steps: 59.02431421446384




Training Loss per 100 steps: 1.456423037066431
Training Accuracy per 100 steps: 58.93213572854292




Training Loss per 100 steps: 1.458864074776851
Training Accuracy per 100 steps: 58.537853577371045




Training Loss per 100 steps: 1.4475780221262944
Training Accuracy per 100 steps: 59.04065620542083




Training Loss per 100 steps: 1.4477286719055509
Training Accuracy per 100 steps: 59.332084893882644




Training Loss per 100 steps: 1.4526607799080182
Training Accuracy per 100 steps: 59.586570477247506




Training Loss per 100 steps: 1.455918636593547
Training Accuracy per 100 steps: 59.49050949050949
Training Loss per 100 steps: 1.44474210417563
Training Accuracy per 100 steps: 59.59355131698456




The Total Accuracy for Epoch 3: 59.83100079218379
Training Loss Epoch: 1.4332581676693785
Training Accuracy Epoch: 59.83100079218379
Training Loss per 100 steps: 1.778192400932312
Training Accuracy per 100 steps: 62.5




Training Loss per 100 steps: 1.3375031142541678
Training Accuracy per 100 steps: 60.45792079207921




Training Loss per 100 steps: 1.3383960909214778
Training Accuracy per 100 steps: 61.1318407960199




Training Loss per 100 steps: 1.3126121469312333
Training Accuracy per 100 steps: 61.93936877076412




Training Loss per 100 steps: 1.307458645462098
Training Accuracy per 100 steps: 61.408977556109726




Training Loss per 100 steps: 1.3094916093849136
Training Accuracy per 100 steps: 61.93862275449102




Training Loss per 100 steps: 1.3082575282816482
Training Accuracy per 100 steps: 62.13602329450915




Training Loss per 100 steps: 1.2874806803065257
Training Accuracy per 100 steps: 62.339514978602




Training Loss per 100 steps: 1.307275832145848
Training Accuracy per 100 steps: 60.8458177278402




Training Loss per 100 steps: 1.3075276643997027
Training Accuracy per 100 steps: 61.175083240843506




Training Loss per 100 steps: 1.315791315578676
Training Accuracy per 100 steps: 60.67682317682318
Training Loss per 100 steps: 1.318253854017275
Training Accuracy per 100 steps: 61.205722070844686
The Total Accuracy for Epoch 4: 61.40480591497227
Training Loss Epoch: 1.321406525610065
Training Accuracy Epoch: 61.40480591497227


In [56]:
def valid(model, testing_loader):
    model.eval()
    tr_loss = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    n_correct = 0; n_wrong = 0; total = 0
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.long)
            outputs = model(ids, mask).squeeze()
            print(outputs)
            loss = loss_function(outputs, targets)
            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calcuate_accu(big_idx, targets)

            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)
            
            if _%5000==0:
                loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples
                print(f"Validation Loss per 100 steps: {loss_step}")
                print(f"Validation Accuracy per 100 steps: {accu_step}")
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: {epoch_accu}")
    
    return epoch_accu

In [55]:
print('This is the validation section to print the accuracy and see how it performs')
print('Here we are leveraging on the dataloader crearted for the validation dataset, the approcah is using more of pytorch')

acc = valid(model, testing_loader)
print("Accuracy on test data = %0.2f%%" % acc)

This is the validation section to print the accuracy and see how it performs
Here we are leveraging on the dataloader crearted for the validation dataset, the approcah is using more of pytorch
tensor([[-0.9802,  0.7446,  1.1349, -0.2877, -0.6729,  1.5529, -2.5706, -2.5612,
          0.9788],
        [-0.9784, -0.0087,  0.4417,  0.9758, -0.2500,  1.3492, -2.2974, -2.3836,
         -0.1530],
        [-0.6368, -0.0602,  0.1471, -0.1343, -0.6845,  1.5253, -1.6925, -1.7211,
          0.6395],
        [-1.1885,  0.4394,  0.2476, -1.7722, -1.5775,  0.9669, -2.5763, -2.4244,
          3.7281]], device='cuda:0')
Validation Loss per 100 steps: 1.0766730308532715
Validation Accuracy per 100 steps: 50.0
tensor([[-0.8137,  0.6803,  0.5204,  0.2808, -0.6425,  1.2909, -2.1158, -2.3506,
          0.3586],
        [-1.3967,  1.6983,  2.6558, -0.2708, -1.1030,  0.0594, -2.9843, -2.7702,
          0.3993],
        [-1.1378,  0.3026,  0.4512, -2.0596, -1.5672,  0.5005, -2.4557, -2.2320,
          3.9735],



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
          0.8752],
        [-1.0476,  0.3745, -0.0916, -1.9069, -1.6729,  0.1890, -2.3267, -1.9881,
          4.3923],
        [-1.0706,  1.1691,  1.0917, -1.5093, -1.5614,  1.2258, -2.6752, -2.3427,
          2.6612],
        [-1.0281, -0.2889, -0.2641, -1.4685, -1.4304,  1.7761, -1.9229, -2.1823,
          2.8513]], device='cuda:0')
tensor([[-0.9935,  0.6778,  0.1625, -1.9908, -1.7603,  0.7238, -2.3583, -2.1353,
          3.8792],
        [-0.6031, -0.0812, -0.1633, -1.1161, -1.0092,  0.7225, -1.7245, -1.5746,
          2.2343],
        [-0.9724, -0.0536, -0.0262, -1.5426, -1.4096,  1.6488, -1.9431, -2.2627,
          3.1947],
        [-0.3215, -0.0354, -0.5870,  0.8899, -0.0767,  1.3719, -1.4327, -1.9080,
         -0.6029]], device='cuda:0')
tensor([[-1.2087,  1.0584,  0.7519, -1.2127, -1.1898,  1.6747, -2.6488, -2.5880,
          2.0272],
        [-0.9928,  1.5625,  1.6969, -1.0118, -1.1551,  0.7415, -2.7711, -2.5401,

In [None]:
#See what the model outputs for specific utterances
