In [2]:
# Import packages
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import wandb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import transformers
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer
from datasets import Dataset, DatasetDict, load_metric
     




# Load Dataset

In [3]:
# Load dataset
df = pd.read_csv('suicide_detection_final_cleaned.csv', header=0, names=['text', 'label', 'cleaned_text'])
df['label'] = df['label'].map({'suicide': 1, 'non-suicide': 0})
df.drop(columns=['cleaned_text'], inplace = True)
df.head()

Unnamed: 0,text,label
0,Ex Wife Threatening SuicideRecently I left my ...,1
1,Am I weird I don't get affected by compliments...,0
2,Finally 2020 is almost over... So I can never ...,0
3,i need helpjust help me im crying so hard,1
4,It ends tonight.I can’t do it anymore. \nI quit.,1


In [4]:
# Define constants
EPOCHS = 1
BATCH_SIZE = 6
LEARNING_RATE = 1e-5
SEED = 90

MODEL_SAVE_PATH = "Models/RoBERTa"
MODEL_CHECKPOINT_PATH = "Models/RoBERTa_checkpoint"
MODEL_LOGGING_PATH = "Models/RoBERT_checkpoint/logs"

#WANDB_ENTITY = "irshad-shariq-liverpool-john-moores-university"
#WANDB_PROJECT = "suicide-detection"
#WANDB_RUN = "bert"

In [5]:
# Split dataset into train, validation and test sets
train, temp = train_test_split(df,
                               random_state=SEED,
                               test_size=0.2,
                               stratify=df['label'])

val, test = train_test_split(temp,
                             random_state=SEED,
                             test_size=0.5,
                             stratify=temp['label'])

In [6]:
# Load RoBERTa tokenizer
tokenizer = AutoTokenizer.from_pretrained("j-hartmann/emotion-english-distilroberta-base")

In [7]:
def dataset_conversion(train, test, val):
  """Converts pandas dataframe to Dataset."""

  train.reset_index(drop=True, inplace=True)
  test.reset_index(drop=True, inplace=True)
  val.reset_index(drop=True, inplace=True)

  train_dataset = Dataset.from_pandas(train)
  test_dataset = Dataset.from_pandas(test)
  val_dataset = Dataset.from_pandas(val)

  return DatasetDict({"train": train_dataset,
                      "test": test_dataset,
                      "val": val_dataset})

raw_datasets = dataset_conversion(train, test, val)

In [8]:
def tokenize_function(dataset):
    return tokenizer(dataset["text"], padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

Map:   0%|          | 0/140523 [00:00<?, ? examples/s]

Map:   0%|          | 0/17566 [00:00<?, ? examples/s]

Map:   0%|          | 0/17565 [00:00<?, ? examples/s]

In [9]:
# Tokenise datasets
#SAMPLE_SIZE = 20
#small_train_dataset = tokenized_datasets["train"].shuffle(seed=SEED).select(range(SAMPLE_SIZE))
#small_test_dataset = tokenized_datasets["test"].shuffle(seed=SEED).select(range(SAMPLE_SIZE))
#small_val_dataset = tokenized_datasets["val"].shuffle(seed=SEED).select(range(SAMPLE_SIZE))

full_train_dataset = tokenized_datasets["train"]
full_test_dataset = tokenized_datasets["test"]
full_val_dataset = tokenized_datasets["val"]

In [10]:
# Import RoBERTa -base pretrained model
model = AutoModelForSequenceClassification.from_pretrained("j-hartmann/emotion-english-distilroberta-base", num_labels=7)

In [11]:
# Define custom metrics for computation

In [12]:
#!pip install evaluate

In [13]:
from evaluate import load 
def compute_metrics(eval_pred):
    metric_acc = load("accuracy")
    metric_rec = load("recall")
    metric_pre = load("precision")
    metric_f1 = load("f1")

    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = metric_acc.compute(predictions=predictions, references=labels)["accuracy"]
    recall = metric_rec.compute(predictions=predictions, references=labels, average='macro',zero_division=0)["recall"]
    precision = metric_pre.compute(predictions=predictions, references=labels, average='macro',zero_division=0)["precision"]
    f1 = metric_f1.compute(predictions=predictions, references=labels, average='macro')["f1"]

    return {
        "accuracy": accuracy,
        "recall": recall,
        "precision": precision,
        "f1": f1,
    }


In [14]:
# Define model and training parameters
training_args = TrainingArguments(
    output_dir=MODEL_CHECKPOINT_PATH,
    overwrite_output_dir = True,
    eval_strategy="epoch",
    learning_rate=LEARNING_RATE,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    seed=SEED,
    logging_dir=MODEL_LOGGING_PATH,
    save_strategy="steps",
    save_steps=1500,
    report_to=["none"]
)
# 
    #run_name=WANDB_RUN,
   #report_to = 'wandb',
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=full_train_dataset,
    eval_dataset=full_val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics)

# PRE-Trained RoBERTa

In [15]:
# Predict before fine-tuning
trainer.predict(full_test_dataset).metrics

  0%|          | 0/2928 [00:00<?, ?it/s]

{'test_loss': 3.734851837158203,
 'test_accuracy': 0.09985198679266766,
 'test_recall': 0.025748948353415575,
 'test_precision': 0.12465439114451837,
 'test_f1': 0.04259237648688736,
 'test_runtime': 791.5275,
 'test_samples_per_second': 22.193,
 'test_steps_per_second': 3.699}

# Fine Tuned RoBERTa

In [16]:
# To observe training progress live
#%%wandb 

# Fine-tune model
trainer.train()

# Resume fine-tuning from checkpoint
trainer.train(MODEL_CHECKPOINT_PATH + "/" + "checkpoint-10500")

  0%|          | 0/23421 [00:00<?, ?it/s]

{'loss': 0.4665, 'grad_norm': 16.310157775878906, 'learning_rate': 9.7865163741941e-06, 'epoch': 0.02}
{'loss': 0.2787, 'grad_norm': 26.21689796447754, 'learning_rate': 9.573032748388199e-06, 'epoch': 0.04}
{'loss': 0.1348, 'grad_norm': 2.0396459102630615, 'learning_rate': 9.3595491225823e-06, 'epoch': 0.06}
{'loss': 0.0832, 'grad_norm': 0.0051724230870604515, 'learning_rate': 9.146065496776399e-06, 'epoch': 0.09}
{'loss': 0.0995, 'grad_norm': 0.4950341582298279, 'learning_rate': 8.932581870970498e-06, 'epoch': 0.11}
{'loss': 0.0944, 'grad_norm': 0.023596251383423805, 'learning_rate': 8.719098245164597e-06, 'epoch': 0.13}
{'loss': 0.0861, 'grad_norm': 0.7253497838973999, 'learning_rate': 8.505614619358696e-06, 'epoch': 0.15}
{'loss': 0.0639, 'grad_norm': 0.005385054741054773, 'learning_rate': 8.292130993552795e-06, 'epoch': 0.17}
{'loss': 0.0797, 'grad_norm': 0.3268435299396515, 'learning_rate': 8.078647367746895e-06, 'epoch': 0.19}
{'loss': 0.0857, 'grad_norm': 0.01800481788814068, 'l

  0%|          | 0/2928 [00:00<?, ?it/s]

{'eval_loss': 0.03629041463136673, 'eval_accuracy': 0.9926558497011102, 'eval_recall': 0.9917323966951974, 'eval_precision': 0.9928161785353395, 'eval_f1': 0.9922679992598877, 'eval_runtime': 196.4235, 'eval_samples_per_second': 89.424, 'eval_steps_per_second': 14.907, 'epoch': 1.0}
{'train_runtime': 7664.9822, 'train_samples_per_second': 18.333, 'train_steps_per_second': 3.056, 'train_loss': 0.07083109718877506, 'epoch': 1.0}


  0%|          | 0/23421 [00:00<?, ?it/s]

{'loss': 0.0662, 'grad_norm': 1.4798390865325928, 'learning_rate': 5.3033602322701845e-06, 'epoch': 0.47}
{'loss': 0.0581, 'grad_norm': 0.03646569699048996, 'learning_rate': 5.089876606464284e-06, 'epoch': 0.49}
{'loss': 0.0386, 'grad_norm': 0.0023171466309577227, 'learning_rate': 4.8763929806583834e-06, 'epoch': 0.51}
{'loss': 0.0515, 'grad_norm': 0.005401731934398413, 'learning_rate': 4.662909354852483e-06, 'epoch': 0.53}
{'loss': 0.0473, 'grad_norm': 0.0025137714110314846, 'learning_rate': 4.449425729046582e-06, 'epoch': 0.56}
{'loss': 0.057, 'grad_norm': 0.0035329782404005527, 'learning_rate': 4.235942103240681e-06, 'epoch': 0.58}
{'loss': 0.0619, 'grad_norm': 0.010108714923262596, 'learning_rate': 4.022458477434781e-06, 'epoch': 0.6}
{'loss': 0.0441, 'grad_norm': 0.005381495226174593, 'learning_rate': 3.8089748516288807e-06, 'epoch': 0.62}
{'loss': 0.0494, 'grad_norm': 0.1864854246377945, 'learning_rate': 3.5954912258229797e-06, 'epoch': 0.64}
{'loss': 0.0421, 'grad_norm': 0.05627

  0%|          | 0/2928 [00:00<?, ?it/s]

{'eval_loss': 0.03629041463136673, 'eval_accuracy': 0.9926558497011102, 'eval_recall': 0.9917323966951974, 'eval_precision': 0.9928161785353395, 'eval_f1': 0.9922679992598877, 'eval_runtime': 197.996, 'eval_samples_per_second': 88.714, 'eval_steps_per_second': 14.788, 'epoch': 1.0}
{'train_runtime': 2809.7963, 'train_samples_per_second': 50.012, 'train_steps_per_second': 8.335, 'train_loss': 0.025746483841681836, 'epoch': 1.0}


TrainOutput(global_step=23421, training_loss=0.025746483841681836, metrics={'train_runtime': 2809.7963, 'train_samples_per_second': 50.012, 'train_steps_per_second': 8.335, 'total_flos': 1.861637609652941e+16, 'train_loss': 0.025746483841681836, 'epoch': 1.0})

In [17]:
# Save fine-tuned model
trainer.save_model(MODEL_SAVE_PATH)

In [18]:
# Evaluate fine-tuned model
trainer.evaluate()

  0%|          | 0/2928 [00:00<?, ?it/s]

{'eval_loss': 0.03629041463136673,
 'eval_accuracy': 0.9926558497011102,
 'eval_recall': 0.9917323966951974,
 'eval_precision': 0.9928161785353395,
 'eval_f1': 0.9922679992598877,
 'eval_runtime': 217.0816,
 'eval_samples_per_second': 80.914,
 'eval_steps_per_second': 13.488,
 'epoch': 1.0}

In [19]:
# Predict after fine-tuning
trainer.predict(full_test_dataset).metrics

  0%|          | 0/2928 [00:00<?, ?it/s]

{'test_loss': 0.034782037138938904,
 'test_accuracy': 0.9931116930433792,
 'test_recall': 0.9922380937059818,
 'test_precision': 0.9932695982054867,
 'test_f1': 0.9927481543095249,
 'test_runtime': 209.3322,
 'test_samples_per_second': 83.914,
 'test_steps_per_second': 13.987}

In [20]:
# Load fine-tuned model
saved_model = AutoModelForSequenceClassification.from_pretrained(MODEL_SAVE_PATH)

# Load trainer after fine-tune
saved_trainer = Trainer(
    model=saved_model,
    args=training_args,
    train_dataset=full_train_dataset,
    eval_dataset=full_test_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Predict after fine-tuning
saved_trainer.predict(full_test_dataset).metrics

  0%|          | 0/2928 [00:00<?, ?it/s]

{'test_loss': 0.034782037138938904,
 'test_accuracy': 0.9931116930433792,
 'test_recall': 0.9922380937059818,
 'test_precision': 0.9932695982054867,
 'test_f1': 0.9927481543095249,
 'test_runtime': 210.6437,
 'test_samples_per_second': 83.392,
 'test_steps_per_second': 13.9}