<a href="https://colab.research.google.com/github/Shanthi17/Recognizing-human-values-in-arguments/blob/main/Human_Values_in_Arguments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Identifying the human values behind arguments

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
cd 'gdrive/MyDrive/Colab Notebooks/NLP/Final Project'

/content/gdrive/MyDrive/Colab Notebooks/NLP/Final Project


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 17.6 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 78.7 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 54.2 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


## Import Libraries

In [None]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torch.autograd import Variable
import numpy as np
from sklearn.metrics import f1_score

## Load Level-2 data from the Human Values Dataset
Train data and Validation data is loaded seperately from the given tsv files

In [None]:
df_train_args = pd.read_csv("final-data/arguments-training.tsv", encoding='utf-8', sep='\t')
df_train_labels = pd.read_csv("final-data/labels-training.tsv", encoding='utf-8', sep='\t')

df_train = pd.merge(df_train_args, df_train_labels, on='Argument ID')

In [None]:
df_valid_args = pd.read_csv("final-data/arguments-validation.tsv", encoding='utf-8', sep='\t')
df_valid_labels = pd.read_csv("final-data/labels-validation.tsv", encoding='utf-8', sep='\t')

df_valid = pd.merge(df_valid_args, df_valid_labels, on='Argument ID')

In [None]:
df_train.shape, df_valid.shape

((5393, 24), (1896, 24))

## Load BERT tokenizer to calculate the maximum length of sentences

In [None]:
# # Load the BERT tokenizer

# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# bert_model = BertModel.from_pretrained("bert-base-uncased")
# if torch.cuda.is_available():
#     bert_model = bert_model.to('cuda')

In [None]:
# max_len = 0

# for sent in conclusion_text:
#     input_ids = tokenizer.encode(sent, add_special_tokens=True)
#     max_len = max(max_len, len(input_ids))

# print('Max sentence length: ', max_len)

# Creating textual representations

1. Load the BERT base uncased tokenizer
2. Load BERT base uncased model
3. Create Dataset class for Dataloader



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
bert_model = BertModel.from_pretrained("bert-base-uncased").to(device)

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Creating textual representation of the data
1. Create textual representation of Conclusion with BERT tokenizer and BERT model.
2. Create textual representation for Premise just like Conclusion
3. Stance has only two inputs: 'in favor of' or 'against'. So, it is given binary representation

In [None]:
class MyDataset(Dataset):
    def __init__(self,df_data,device=device, tokenizer=tokenizer, bert_model=bert_model):
        self.conclusion = df_data['Conclusion'].values
        self.stance = df_data['Stance'].values
        self.premise = df_data['Premise'].values
        self.y = df_data.iloc[:,4:].values
        self.device = device
        self.tokenizer = tokenizer
        self.bert_model = bert_model

    def __len__(self):
        return len(self.y)

    def __getitem__(self,idx):
        conclusion = self.bert_model(**self.tokenizer(self.conclusion[idx], max_length=32, padding='max_length', truncation=True, return_tensors='pt').to(self.device)).pooler_output
        premise = self.bert_model(**self.tokenizer(self.premise[idx], max_length=128, padding='max_length', truncation=True, return_tensors='pt').to(self.device)).pooler_output
        stance = torch.Tensor([1] if self.stance[idx] == 'in favor of' else [0])[:, None]
        return torch.cat([conclusion.to(self.device), stance.to(self.device), premise.to(self.device)], axis=1), torch.Tensor(self.y[idx]).to(self.device)

## Save representations to file
Create Dataset object for the data and use it in DataLoader for batch wise dataloading. The final representations are saved to numpy file

In [None]:
train_dataset = MyDataset(df_train)
valid_dataset = MyDataset(df_valid)

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=8)
valid_dataloader = DataLoader(valid_dataset, batch_size=8)

In [None]:
x = []
y = []

for batch in valid_dataloader:
    data, label = batch
    x.append(data.detach().cpu().numpy())
    y.append(label.detach().cpu().numpy())

In [None]:
# np.save('final-data/train_data.npy', np.concatenate(x))
# np.save('final-data/train_label.npy', np.concatenate(y))

np.save('final-data/valid_data.npy', np.concatenate(x))
np.save('final-data/valid_label.npy', np.concatenate(y))

# Load the Data 

The dataset is imbalanced when each individual label is used to train the model. So, before training the binary classifier we balance the dataset


In [None]:
import os
from keras.models import Sequential, load_model
from keras.layers import Dropout, Dense, LSTM, GRU, RNN
from keras.utils import to_categorical

from keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import f1_score
from keras.optimizers import SGD

In [None]:
train_data = np.load('final-data/train_data.npy')
train_label = np.load('final-data/train_label.npy')

In [None]:
valid_data = np.load('final-data/valid_data.npy')
valid_label = np.load('final-data/valid_label.npy')

In [None]:
# Returns balanced dataset for given label
def get_data(data, label, label_number):

    # Get the indices where ones and zeros are there in the label array
    one_indices = np.where(label[:,label_number]==1)[0]
    zero_indices = np.where(label[:,label_number]==0)[0]

    # Get min of the two, to get the size of each class in balanced dataset
    size = min(len(one_indices), len(zero_indices))

    # Create balanced data and label using the indices
    balanced_data = np.concatenate((data[one_indices[:size]], data[zero_indices[:size]]))
    balanced_label = np.concatenate((label[one_indices[:size]], label[zero_indices[:size]]))

    return balanced_data, balanced_label[:, label_number].astype('float32').reshape((-1,1))


# Train the model
Created 20 different classifiers to classify each label seperately. Each classifier is trained on different label data and are combined together at the end. The ensemble of these 20 models will return the prediction for a data point

In [None]:
# Optimizer
sgd = SGD(learning_rate=0.00001, momentum=0.8, nesterov=True)

In [None]:
def get_model():
    # Return the binary classifier 
    model = Sequential()
    model.add(LSTM(300, input_shape = (1, 1537)))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

    return model

In [None]:
batch_size=128
n_epochs = 30
models = []
histories = []

# Running in a loop to train 20 different binary classifiers
for i in range(20):
    x_train, y_train = get_data(train_data, train_label, i)
    x_val, y_val =  get_data(valid_data, valid_label, i)
    print("--------------------------------------------------------------------------------------------")
    print(x_train.shape)
    print("--------------------------------------------------------------------------------------------")
    model = get_model()

    # Added checkpoints and early stopping regularization
    early_stop = EarlyStopping(monitor='val_loss', patience=5)
    fpath = "Models/model_binary_{}.h5".format(i+1)
    checkpoint = ModelCheckpoint(filepath=fpath, monitor="val_loss",
                                verbose=1, 
                                save_best_only=True,
                                mode="min")
    callbacks = [checkpoint,early_stop]

    history = model.fit(x_train, y_train, batch_size=batch_size, epochs=n_epochs, validation_data=(x_val, y_val), callbacks=callbacks)

    histories.append(history)
    models.append(model)

--------------------------------------------------------------------------------------------
(1976, 1, 1537)
--------------------------------------------------------------------------------------------
Epoch 1/30
Epoch 1: val_loss improved from inf to 0.70143, saving model to Models/model_binary_1.h5
Epoch 2/30
Epoch 2: val_loss improved from 0.70143 to 0.70118, saving model to Models/model_binary_1.h5
Epoch 3/30
Epoch 3: val_loss improved from 0.70118 to 0.70094, saving model to Models/model_binary_1.h5
Epoch 4/30
Epoch 4: val_loss improved from 0.70094 to 0.70072, saving model to Models/model_binary_1.h5
Epoch 5/30
Epoch 5: val_loss improved from 0.70072 to 0.70049, saving model to Models/model_binary_1.h5
Epoch 6/30
Epoch 6: val_loss improved from 0.70049 to 0.70027, saving model to Models/model_binary_1.h5
Epoch 7/30
Epoch 7: val_loss improved from 0.70027 to 0.70006, saving model to Models/model_binary_1.h5
Epoch 8/30
Epoch 8: val_loss improved from 0.70006 to 0.69985, saving mode

In [None]:
# Uncomment to load the saved models

# models=[]
# models_path = "Models"
# for f in os.listdir(models_path):
#     models.append(load_model(os.path.join(models_path, f)))

In [None]:
# Returns f1_scores of all the models 
def calc_f1(models, x_val, y_val):
    f1_scores = []
    for i, model in enumerate(models):
        f1_scores.append(f1_score((model.predict(x_val)>0.5).astype(int), y_val[:,i]))

    return f1_scores

In [None]:
f1_scores = calc_f1(models, valid_data, valid_label)

In [None]:
f1_scores

[0.4389157985238512,
 0.3939857923520334,
 0.1070811744386874,
 0.10225563909774436,
 0.4611590628853268,
 0.11057692307692307,
 0.1301775147928994,
 0.5823985729572054,
 0.002600780234070221,
 0.21063394683026587,
 0.1663442940038685,
 0.3870693321990642,
 0.06134969325153375,
 0.12288786482334871,
 0.4673216574113627,
 0.2229580239523595,
 0.5052192066805846,
 0.1271745650869826,
 0.20869565217391303,
 0.2978494623655914]

In [None]:
np.mean(f1_scores)

0.2553327478568808

In [None]:
# Returns accuracy of all the models
def calc_acc(models, x_val, y_val):
    acc = []
    for i, model in enumerate(models):
        acc.append(model.evaluate(x_val, y_val[:,i])[1])
    return acc, np.mean(acc)

In [None]:
accs, avg = calc_acc(models, valid_data, valid_label)



In [None]:
avg

0.41249999925494196