In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load and preprocess the data
df = pd.read_csv(r"Suicide_Detection.csv")
df = df[['text', 'class']].dropna()
df = df[df['class'].isin(['suicide', 'non-suicide'])]
df['label'] = df['class'].map({'non-suicide': 0, 'suicide': 1})

In [4]:
# Train-test split
train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)

In [5]:
# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

In [6]:
from transformers import ElectraTokenizer, ElectraForSequenceClassification

model_name = "google/electra-base-discriminator"
tokenizer = ElectraTokenizer.from_pretrained(model_name)

In [7]:
# Tokenization function
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)

In [8]:
train_dataset = train_dataset.map(tokenize, batched=True)
test_dataset = test_dataset.map(tokenize, batched=True)

Map: 100%|██████████| 162451/162451 [03:39<00:00, 739.51 examples/s]
Map: 100%|██████████| 69623/69623 [01:37<00:00, 712.55 examples/s]


In [9]:
# Set format for PyTorch
train_dataset = train_dataset.remove_columns(['text', 'class', '__index_level_0__'])
test_dataset = test_dataset.remove_columns(['text', 'class', '__index_level_0__'])
train_dataset.set_format("torch")
test_dataset.set_format("torch")

In [10]:
# Load model
model = ElectraForSequenceClassification.from_pretrained(model_name, num_labels=2)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at google/electra-base-discriminator and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print("CUDA is available.")
    print("Device name:", torch.cuda.get_device_name(0))
    print("Current device:", torch.cuda.current_device())
    print("Device count:", torch.cuda.device_count())
else:
    print("CUDA is NOT available.")

CUDA is available.
Device name: NVIDIA GeForce RTX 4050 Laptop GPU
Current device: 0
Device count: 1


In [12]:
# Create a tensor
x = torch.tensor([1.0, 2.0])

# Move it to CUDA if available
if torch.cuda.is_available():
    x = x.to("cuda")
    print("Tensor is on GPU:", x.device)
else:
    print("Tensor is on CPU:", x.device)


Tensor is on GPU: cuda:0


In [13]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU Name:", torch.cuda.get_device_name(0))

CUDA Available: True
GPU Name: NVIDIA GeForce RTX 4050 Laptop GPU


In [14]:
# Metrics function
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

In [15]:
# Training arguments
training_args = TrainingArguments(
    output_dir="./roberta_model",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=4,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    save_total_limit=1,
    fp16=torch.cuda.is_available()  # Enable if GPU supports it
)



In [16]:
# Trainer setup
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

In [17]:
# Train
trainer.train()

 25%|██▌       | 20307/81228 [3:39:27<9:13:25,  1.83it/s] 

{'loss': 0.1152, 'grad_norm': 0.008539312519133091, 'learning_rate': 1.5002708425666028e-05, 'epoch': 1.0}


                                                         
 25%|██▌       | 20307/81228 [3:54:33<9:13:25,  1.83it/s]

{'eval_loss': 0.08908629417419434, 'eval_accuracy': 0.9774930698188817, 'eval_precision': 0.9804811751778382, 'eval_recall': 0.9743670795137791, 'eval_f1': 0.9774145659474496, 'eval_runtime': 905.7973, 'eval_samples_per_second': 76.864, 'eval_steps_per_second': 9.608, 'epoch': 1.0}


 50%|█████     | 40614/81228 [7:55:59<7:08:18,  1.58it/s]    

{'loss': 0.0634, 'grad_norm': 0.0016644755378365517, 'learning_rate': 1.0004678189786774e-05, 'epoch': 2.0}


                                                         
 50%|█████     | 40614/81228 [8:11:11<7:08:18,  1.58it/s]

{'eval_loss': 0.10015127062797546, 'eval_accuracy': 0.9792740904586128, 'eval_precision': 0.9773052486978422, 'eval_recall': 0.981321302336274, 'eval_f1': 0.9793091581709468, 'eval_runtime': 912.6287, 'eval_samples_per_second': 76.288, 'eval_steps_per_second': 9.536, 'epoch': 2.0}


 75%|███████▌  | 60921/81228 [12:28:40<3:53:06,  1.45it/s]   

{'loss': 0.0312, 'grad_norm': 0.0015790299512445927, 'learning_rate': 5.006155512877333e-06, 'epoch': 3.0}


                                                          
 75%|███████▌  | 60921/81228 [12:43:58<3:53:06,  1.45it/s]

{'eval_loss': 0.1137465164065361, 'eval_accuracy': 0.9796044410611436, 'eval_precision': 0.9838237425713872, 'eval_recall': 0.9752291732521049, 'eval_f1': 0.9795076052760701, 'eval_runtime': 918.5383, 'eval_samples_per_second': 75.798, 'eval_steps_per_second': 9.475, 'epoch': 3.0}


 88%|████████▊ | 71358/81228 [15:44:10<32:17:14, 11.78s/it]   

RuntimeError: unique_by_key: failed to synchronize: cudaErrorMemoryAllocation: out of memory

In [20]:
model.save_pretrained("TRAINING/roberta_model")
tokenizer.save_pretrained("TRAINING/roberta_model")


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
print("✅ Model training complete and saved at ./roberta_model")