# 1.import packages

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# genral packages
import torch
import numpy as np
import pandas as pd

from tqdm.notebook import trange, tqdm

from torch import nn
from torch.optim import Adam

In [None]:
from sklearn import metrics

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from transformers import BertModel, BertTokenizer

# 2.functions

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.labels = [labels[label] for label in df['Category']]
        self.texts = [tokenizer(text, 
                                padding='max_length', 
                                max_length = 512, 
                                truncation=True,
                                return_tensors="pt") 
                      for text in df['Text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        # Fetch a batch of inputs
        return self.texts[idx]

    def __getitem__(self, idx):
        batch_texts = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)
        return batch_texts, batch_y

In [None]:
class BertClassifierEN(nn.Module):
    def __init__(self, dropout=0.5):
        super(BertClassifierEN, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 3)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):
        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)
        return final_layer

In [None]:
class BertClassifierES(nn.Module):
    def __init__(self, dropout=0.5):
        super(BertClassifierES, self).__init__()
        self.bert = BertModel.from_pretrained('dccuchile/bert-base-spanish-wwm-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 3)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):
        _, pooled_output = self.bert(input_ids=input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)
        return final_layer

In [None]:
def train(model, train_data, val_data, learning_rate, epochs):
  # get data for training and validation using Dataset
    train, val = Dataset(train_data), Dataset(val_data)
    # get data by batch using DataLoader
    # shuffle the order of samples
    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)
  # use GPU or not 
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    # loss function and optimizer 
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)

    if use_cuda:
            model = model.cuda()
            criterion = criterion.cuda()
    # train 
    for epoch_num in range(epochs):
      # accuracy and loss for train data 
            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)
        # get ouput from model 
                output = model(input_id, mask)
                # compute batch loss
                batch_loss = criterion(output, train_label)
                total_loss_train += batch_loss.item()
                # compute batch accuracy 
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc
        # update model 
                model.zero_grad()
                batch_loss.backward()
                optimizer.step()

            # ------ validate model -----------
            # accuracy and loss for validation  data 
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:
        
                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)
  
                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label)
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            
            print(
                f'''Epochs: {epoch_num + 1} 
              | Train Loss: {total_loss_train / len(train_data): .3f} 
              | Train Accuracy: {total_acc_train / len(train_data): .3f} 
              | Val Loss: {total_loss_val / len(val_data): .3f} 
              | Val Accuracy: {total_acc_val / len(val_data): .3f}''')

In [None]:
def evaluate(model, test_data):

    test = Dataset(test_data)
    test_dataloader = torch.utils.data.DataLoader(test, batch_size=2)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    if use_cuda:
        model = model.cuda()
    
    trues = []
    preds = []

    with torch.no_grad():
        for test_input, test_label in tqdm(test_dataloader):
              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)
              output = model(input_id, mask)
              true = test_label.cpu().numpy().tolist()
              trues.extend(true)
              pred = output.argmax(dim=1).cpu().numpy().tolist()
              preds.extend(pred)

    f1 = metrics.f1_score(preds, trues, average='macro')
    p = metrics.precision_score(preds, trues, average='macro')
    r = metrics.recall_score(preds, trues, average='macro')
    a = metrics.accuracy_score(preds, trues)

    
    return preds, trues, f1, p, r, a

#3. Data 

## 3.1 English data





In [None]:
# read data
df = pd.read_csv('/content/drive/MyDrive/EM/Episodicity.csv')
df = df[df['diag'] != 'INV']

In [None]:
# English data
df = df[df['Source'] == 'Pitt']

In [None]:
# define label
# labels = {'EP':1, 'NONEP':2, 'OtherOther':3, 'OtherMess': 4, 'OtherInj': 5}
labels = {'EP':0,
       'NONEP':1,
       'OtherOther':2,
       'OtherMess':2,
       'OtherInj':2}

In [None]:
np.random.seed(112)
df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42), 
                                     [int(.8*len(df)), int(.9*len(df))])

print(len(df_train),len(df_val), len(df_test))

2247 281 281


In [None]:
# train 
EPOCHS = 5
LR = 1e-6
model = BertClassifierEN()
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
train(model, df_train, df_val, LR, EPOCHS)

  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 1 
              | Train Loss:  0.466 
              | Train Accuracy:  0.559 
              | Val Loss:  0.385 
              | Val Accuracy:  0.698


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 2 
              | Train Loss:  0.320 
              | Train Accuracy:  0.762 
              | Val Loss:  0.278 
              | Val Accuracy:  0.811


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 3 
              | Train Loss:  0.207 
              | Train Accuracy:  0.871 
              | Val Loss:  0.215 
              | Val Accuracy:  0.872


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 4 
              | Train Loss:  0.132 
              | Train Accuracy:  0.944 
              | Val Loss:  0.179 
              | Val Accuracy:  0.897


  0%|          | 0/1124 [00:00<?, ?it/s]

Epochs: 5 
              | Train Loss:  0.091 
              | Train Accuracy:  0.960 
              | Val Loss:  0.179 
              | Val Accuracy:  0.886


In [None]:
# test
performance_en = []
num = 0
while num<10: 
  preds, trues, f1, p, r, a = evaluate(model, df_test)
  performance_en.append([f1, p, r, a])
  num += 1
# print(f"""\nTest precision\t{p: .3f}\nTest recall\t{r: .3f}\nTest F1-score\t{f1: .3f}\nTest accuracy\t{a: .3f}""")

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/141 [00:00<?, ?it/s]

In [None]:
for x in performance_en:
  print(f'{x[0]}\t{x[1]}\t{x[2]}\t{x[3]}\t')

0.9166301693475606	0.9099439285575358	0.9245396566139726	0.9181494661921709	
0.9162940379403794	0.9115027035437818	0.921832915056252	0.9181494661921709	
0.9025438708578243	0.8967633422062177	0.9094220594220594	0.9074733096085409	
0.9085759805105561	0.902349184120686	0.9160753971200695	0.9145907473309609	
0.9113743439504202	0.9048934235070307	0.9191417964596144	0.9145907473309609	
0.9120558395945405	0.905428760370994	0.9208951819489771	0.9181494661921709	
0.9041696467783424	0.895739904083935	0.9148422178641469	0.9110320284697508	
0.9039573855193068	0.8963132146700824	0.9145078026248953	0.9110320284697508	
0.8973084604212055	0.8917128371557127	0.9038843119053145	0.9039145907473309	
0.8992608437314319	0.8922481740196759	0.9084791794907958	0.9074733096085409	


In [None]:
f1, p, r, a = np.mean(np.array(performance_en), axis=0)
print(f"""Test precision\t{p: .3f}\nTest recall\t{r: .3f}\nTest F1-score\t{f1: .3f}\nTest accuracy\t{a: .3f}""")

Test precision	 0.901
Test recall	 0.915
Test F1-score	 0.907
Test accuracy	 0.912


## 3.2 Spanish data




In [None]:
# read data
df = pd.read_csv('/content/drive/MyDrive/EM/Episodicity.csv')
df = df[df['diag'] != 'INV']

In [None]:
# English data
df = df[df['Source'] == 'ACE']

In [None]:
# define label
# labels = {'EP':1, 'NONEP':2, 'OtherOther':3, 'OtherMess': 4, 'OtherInj': 5}
labels = {'EP':0,
       'NONEP':1,
       'OtherOther':2,
       'OtherMess':2,
       'OtherInj':2}

In [None]:
np.random.seed(112)
df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42), 
                                     [int(.8*len(df)), int(.9*len(df))])

print(len(df_train),len(df_val), len(df_test))

600 75 75


In [None]:
# train 
EPOCHS = 7
LR = 1e-6
model = BertClassifierES()
tokenizer = BertTokenizer.from_pretrained('dccuchile/bert-base-spanish-wwm-cased')

Downloading:   0%|          | 0.00/648 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/419M [00:00<?, ?B/s]

Some weights of the model checkpoint at dccuchile/bert-base-spanish-wwm-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at dccuchile/bert-base-spanish-wwm-cased and are newly initialized: ['bert.pooler.dense.we

Downloading:   0%|          | 0.00/236k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/134 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/364 [00:00<?, ?B/s]

In [None]:
train(model, df_train, df_val, LR, EPOCHS)

  0%|          | 0/300 [00:00<?, ?it/s]

Epochs: 1 
              | Train Loss:  0.480 
              | Train Accuracy:  0.598 
              | Val Loss:  0.460 
              | Val Accuracy:  0.573


  0%|          | 0/300 [00:00<?, ?it/s]

Epochs: 2 
              | Train Loss:  0.401 
              | Train Accuracy:  0.658 
              | Val Loss:  0.397 
              | Val Accuracy:  0.640


  0%|          | 0/300 [00:00<?, ?it/s]

Epochs: 3 
              | Train Loss:  0.322 
              | Train Accuracy:  0.777 
              | Val Loss:  0.369 
              | Val Accuracy:  0.653


  0%|          | 0/300 [00:00<?, ?it/s]

Epochs: 4 
              | Train Loss:  0.237 
              | Train Accuracy:  0.850 
              | Val Loss:  0.358 
              | Val Accuracy:  0.707


  0%|          | 0/300 [00:00<?, ?it/s]

Epochs: 5 
              | Train Loss:  0.176 
              | Train Accuracy:  0.887 
              | Val Loss:  0.339 
              | Val Accuracy:  0.760


  0%|          | 0/300 [00:00<?, ?it/s]

Epochs: 6 
              | Train Loss:  0.133 
              | Train Accuracy:  0.915 
              | Val Loss:  0.315 
              | Val Accuracy:  0.787


  0%|          | 0/300 [00:00<?, ?it/s]

Epochs: 7 
              | Train Loss:  0.109 
              | Train Accuracy:  0.928 
              | Val Loss:  0.354 
              | Val Accuracy:  0.787


In [None]:
# test
performance_es = []
num = 0
while num<10: 
  preds, trues, f1, p, r, a = evaluate(model, df_test)
  performance_es.append([f1, p, r, a])
  num += 1

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

  0%|          | 0/38 [00:00<?, ?it/s]

In [None]:
for x in performance_es:
  print(f'{x[0]}\t{x[1]}\t{x[2]}\t{x[3]}\t')

0.7230158730158731	0.6789321789321789	0.8738615664845173	0.7466666666666667	
0.7374296435272045	0.6908369408369408	0.8799145299145299	0.76	
0.7513330251194329	0.7027417027417027	0.8857949959644875	0.7733333333333333	
0.7230158730158731	0.6789321789321789	0.8738615664845173	0.7466666666666667	
0.6348044909020519	0.5797258297258298	0.8759983186212694	0.7466666666666667	
0.7513330251194329	0.7027417027417027	0.8857949959644875	0.7733333333333333	
0.7513330251194329	0.7027417027417027	0.8857949959644875	0.7733333333333333	
0.7080470891791647	0.6670274170274171	0.8675464320625611	0.7333333333333333	
0.8067724366564054	0.801948051948052	0.8839634941329857	0.7733333333333333	
0.7374296435272045	0.6908369408369408	0.8799145299145299	0.76	


In [None]:
f1, p, r, a = np.mean(np.array(performance_es), axis=0)
print(f"""Test precision\t{p: .3f}\nTest recall\t{r: .3f}\nTest F1-score\t{f1: .3f}\nTest accuracy\t{a: .3f}""")

Test precision	 0.690
Test recall	 0.879
Test F1-score	 0.732
Test accuracy	 0.759
