In [None]:
!nvidia-smi

In [None]:
# # Cloud Storage
# from google.cloud import storage
# storage_client = storage.Client(project='YOUR PROJECT ID')

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# import pandas as pd
# import ast

# import torch
# from tqdm.notebook import tqdm

# from transformers import BertTokenizer
# from torch.utils.data import TensorDataset

# from transformers import BertForSequenceClassification

In [None]:
import wandb

import torch
import torch.nn as nn
from transformers.file_utils import is_tf_available, is_torch_available, is_torch_tpu_available
from transformers import BertTokenizer, BertTokenizerFast, BertForSequenceClassification, BertModel
from transformers import Trainer, TrainingArguments, EarlyStoppingCallback
import numpy as np
import random
from sklearn.model_selection import train_test_split

In [None]:
LEVEL = 'post'
epochs = 5
valid_strategy = 1
column_used = 'text'

# wandb.init(project="bmnlp-project-{}_level-valid_strategy_{}".format(LEVEL, valid_strategy), entity="andreig")
wandb.init(
    project="bmnlp-project", 
    name=f"{LEVEL}_level-valid_strategy_{valid_strategy}-column_used_{column_used}", 
    entity="andreig"
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

verbose = True

In [None]:
def set_seed(seed: int):
    """
    Helper function for reproducible behavior to set the seed in ``random``, ``numpy``, ``torch`` and/or ``tf`` (if
    installed).
 
    Args:
        seed (:obj:`int`): The seed to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    if is_torch_available():
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        # ^^ safe to call this function even if cuda is not available
    if is_tf_available():
        import tensorflow as tf
 
        tf.random.set_seed(seed)
 
set_seed(1)

In [None]:
labels_map = {
    "Supportive" : 0,
    "Indicator" : 1,
    "Ideation" : 2,
    "Behavior" : 3,
    "Attempt" : 4
}
labels_map_inverse = {
    0: "Supportive",
    1: "Indicator",
    2: "Ideation",
    3: "Behavior",
    4: "Attempt"
}

class SuicideDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
#         self.labels = labels
        self.labels = [labels_map[label] for label in labels]

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor([self.labels[idx]])
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
class BertClassifier(nn.Module):

    def __init__(self, model_name, num_labels, dropout=0.5):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, num_labels)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_layer = self.relu(linear_output)

        return final_layer

In [None]:
def metric_evaluation(results: np.array, level: str = "post", verbose: bool = False) -> ():
    """
    Metric Evaluation function, returns accuracy, precision, recall, ordinal error (check paper) at user or post level
    
    :param results: numpy array of shape (test_size, 3), results[:, 0] -> user_id, results[:, 1] -> labels, results[:, 2] -> prediction,
    :param level  : 'post' or 'user', if 'user' we group predictions by user_id and select the most voted prediction
    :param verbose: if True display metrics before return 
    :return: (accuracy, precision, recall, ordinal_error) 
    """

    RD = lambda x: np.round(x, 3)
    assert level in ["post", "user"], "Level should be in ['post', 'user']"

    if level == "user": 
        results = pd.DataFrame(results, columns = ['user', 'labels', 'predictions'])
        users, labels, predictions = [], [], []
        for group_idx, group in results.groupby("user"):
            preds = group.values[:, 2]
            values, counts = np.unique(preds, return_counts = True)
            
            user = group.values[:, 0][0]
            label = group.values[:, 1][0]
            prediction = values[np.argmax(counts)]
            
            users.append(user)
            labels.append(label)
            predictions.append(prediction)

        labels      = np.array(labels)
        predictions = np.array(predictions)
        
        tp = sum(labels == predictions)
        fp = sum(labels  < predictions)
        fn = sum(labels  > predictions)
        oe = sum((labels - predictions) > 1)

        accuracy  = RD(tp / labels.shape[0])
        ord_error = RD(oe / labels.shape[0])
        precision = RD(tp / (tp + fp))
        recall    = RD(tp / (tp + fn))

    else:
        tp = sum(results[:, 1] == results[:, 2])
        fp = sum(results[:, 1]  < results[:, 2])
        fn = sum(results[:, 1]  > results[:, 2])
        oe = sum((results[:, 1] - results[:, 2]) > 1)

        accuracy  = RD(tp / results.shape[0])
        ord_error = RD(oe / results.shape[0])
        precision = RD(tp / (tp + fp))
        recall    = RD(tp / (tp + fn))

    if verbose: print(f"[Level: {level}] Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, Ordinal Error: {ord_error}")
    return (accuracy, precision, recall, ord_error)

In [None]:
# from sklearn.metrics import accuracy_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
#     # calculate accuracy using sklearn's function
#     acc = accuracy_score(labels, preds)

#     print(labels)
#     print(labels.shape)
#     print(np.squeeze(labels))
#     print(np.squeeze(labels).shape)
#     print(preds)
#     print(preds.shape)
#     print(valid_df.user.values)
    
    results = np.array((valid_df.user.values, np.squeeze(labels), preds), dtype=object).T
    accuracy, precision, recall, ord_error = metric_evaluation(results, level=LEVEL, verbose=verbose)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'ord_error': ord_error,
    }

def make_compute_metrics(valid_users):
    def compute_metrics(pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
    
#         results = np.array((valid_users, np.squeeze(labels), preds), dtype=object).T
        results = np.zeros((len(valid_users), 3))
        results[:, 0] = valid_users
        results[:, 1] = np.squeeze(labels)
        results[:, 2] = preds
        
        accuracy, precision, recall, ord_error = metric_evaluation(results, level=LEVEL, verbose=verbose)
        
        return {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'ord_error': ord_error,
        }

    return compute_metrics

In [None]:
# datapath = f'data/suicide_squad.csv'
# datapath = '/kaggle/input/suicide-squad/suicide_squad.csv'
datapath = f"/kaggle/input/suicide-squad-processed/suicide_{LEVEL}_preprocessed.csv"
dataset = pd.read_csv(datapath)
dataset.head()

In [None]:
# the model we gonna train, base uncased BERT
# check text classification models here: https://huggingface.co/models?filter=text-classification
model_name = "bert-base-cased"
# max sequence length for each document/sentence sample
max_length = 512

In [None]:
columns = ['user', column_used, 'label']
oof_users, oof_labels, oof_predictions = [], [], [] 
for fold in range(5):
    X_train = dataset[dataset[f'{valid_strategy}_fold'] != fold][columns]
    y_train = dataset[dataset[f'{valid_strategy}_fold'] != fold]['label'].values

    X_valid = dataset[dataset[f'{valid_strategy}_fold'] == fold][columns]
    y_valid = dataset[dataset[f'{valid_strategy}_fold'] == fold]['label'].values

    print("Label Distribution: {} => {}".format(*np.unique(y_valid, return_counts = True)))
    print(f"Train Samples: {X_train.shape[0]}, Valid Sample: {X_valid.shape[0]}")
    print(f"Train Users: {np.unique(X_train['user'])[:18]}")
    print(f"Valid Users: {np.unique(X_valid['user'])[:18]}")
    print()
    
    sample_size = 200
    train_texts = list(X_train[column_used].values)
    val_texts = list(X_valid[column_used].values)

    train_labels = list(X_train.label.values)
    valid_labels = list(X_valid.label.values)
    
    valid_users = list(X_valid.user.values)

    # load the tokenizer
    tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True)

    train_encodings = tokenizer(train_texts, truncation=True, padding=True)
    valid_encodings = tokenizer(val_texts, truncation=True, padding=True)

    # convert our tokenized data into a torch Dataset
    train_dataset = SuicideDataset(train_encodings, train_labels)
    valid_dataset = SuicideDataset(valid_encodings, valid_labels)

    target_names = set(train_labels)
    # load the model and pass to device
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(target_names)).to(device)

    # metric = compute_metrics(EvalPrediction(inputs=X_valid.user.values))
    metric = make_compute_metrics(valid_users)

    if LEVEL == 'user':
        logging_steps = 100
        save_steps = 100
        eval_steps = 100
    else:
        logging_steps = 500
        save_steps = 500
        eval_steps = 500

    training_args = TrainingArguments(
        output_dir='./results',          # output directory
        num_train_epochs=epochs,              # total number of training epochs
        per_device_train_batch_size=16,  # batch size per device during training
        per_device_eval_batch_size=16,   # batch size for evaluation
        warmup_steps=500,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        logging_dir='./logs',            # directory for storing logs
        load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
        # but you can specify `metric_for_best_model` argument to change to accuracy or other metric
        metric_for_best_model='accuracy',
        greater_is_better=True,
        logging_steps=logging_steps,               # log & save weights each logging_steps
        save_steps=save_steps,
        eval_steps=eval_steps,
        evaluation_strategy="steps",     # evaluate each `logging_steps`
        fp16=True,
        report_to="wandb",
        run_name=f"{LEVEL}_level-valid_strategy_{valid_strategy}-column_used_{column_used}"  # name of the W&B run (optional)
    )

    trainer = Trainer(
        model=model,                         # the instantiated Transformers model to be trained
        args=training_args,                  # training arguments, defined above
        train_dataset=train_dataset,         # training dataset
        eval_dataset=valid_dataset,          # evaluation dataset
        compute_metrics=metric,     # the callback that computes metrics of interest
        callbacks = [EarlyStoppingCallback(early_stopping_patience=3)],
    )

    # train the model
    trainer.train()

    # evaluate the current model after training
    trainer.evaluate()
    
    results_valid = trainer.predict(valid_dataset)
    y_labels = results_valid.label_ids
    y_preds = results_valid.predictions.argmax(-1)
    oof_users.extend(valid_users)
    oof_labels.extend(np.squeeze(y_labels))
    oof_predictions.extend(y_preds)
    
    
all_results = np.zeros((len(oof_users), 3))
all_results[:, 0] = oof_users
all_results[:, 1] = oof_labels
all_results[:, 2] = oof_predictions

In [None]:
accuracy, precision, recall, ord_error = metric_evaluation(all_results, level=LEVEL, verbose=verbose)

In [None]:
results_json = {
    'accuracy': accuracy,
    'precision': precision,
    'recall': recall,
    'ord_error': ord_error,
    'level': LEVEL,
    'valid_strategy': valid_strategy,
    'column_used': column_used,
    'epochs': epochs,
}

In [None]:
results_json