In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
import torch





In [2]:
dataset_path = "GAD-7.csv"
data = pd.read_csv(dataset_path)

In [3]:
print(data.head())

   year_1 state_1 general_health                      phq9_1  \
0    2000      MD      Very Good                Several days   
1    2001      SC           Good  More than half of the days   
2    1999      NJ           Good                Several days   
3    1998      NY           Good                Several days   
4    2000      PA      Very Good                  Not at all   

                       phq9_2                      phq9_3  \
0                Several days                  Not at all   
1  More than half of the days            Nearly every day   
2                Several days  More than half of the days   
3                Several days            Nearly every day   
4                Several days                Several days   

                       phq9_4            phq9_5        phq9_6  \
0                Several days        Not at all    Not at all   
1            Nearly every day  Nearly every day  Several days   
2  More than half of the days      Several days  Seve

In [4]:
texts = data['general_health'].tolist()
labels = data['phq9_severity'].tolist()

train_texts, temp_texts, train_labels, temp_labels = train_test_split(texts, labels, test_size=0.3, random_state=42)
val_texts, test_texts, val_labels, test_labels = train_test_split(temp_texts, temp_labels, test_size=0.5, random_state=42)

In [5]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Convert labels to numerical values
label_mapping = {label: idx for idx, label in enumerate(set(labels))}
train_labels = [label_mapping[label] for label in train_labels]
val_labels = [label_mapping[label] for label in val_labels]
test_labels = [label_mapping[label] for label in test_labels]

def tokenize_data(texts, labels):
    encodings = tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
    labels_tensor = torch.tensor(labels)
    return encodings, labels_tensor

train_encodings, train_labels = tokenize_data(train_texts, train_labels)
val_encodings, val_labels = tokenize_data(val_texts, val_labels)
test_encodings, test_labels = tokenize_data(test_texts, test_labels)


In [6]:
class GAD7Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item
train_dataset = GAD7Dataset(train_encodings, train_labels)
val_dataset = GAD7Dataset(val_encodings, val_labels)
test_dataset = GAD7Dataset(test_encodings, test_labels)

In [7]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(set(labels)))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    logging_dir="./logs",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
    load_best_model_at_end=True,
)



In [9]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=lambda p: {
        "accuracy": accuracy_score(p.label_ids, np.argmax(p.predictions, axis=1))
    },
)

  trainer = Trainer(


In [10]:
trainer.train()

  0%|          | 0/153 [00:00<?, ?it/s]

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


  0%|          | 0/11 [00:00<?, ?it/s]

{'eval_loss': 1.543938159942627, 'eval_accuracy': 0.2413793103448276, 'eval_runtime': 22.097, 'eval_samples_per_second': 3.937, 'eval_steps_per_second': 0.498, 'epoch': 1.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


  0%|          | 0/11 [00:00<?, ?it/s]

{'eval_loss': 1.5141088962554932, 'eval_accuracy': 0.3103448275862069, 'eval_runtime': 2.135, 'eval_samples_per_second': 40.749, 'eval_steps_per_second': 5.152, 'epoch': 2.0}


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


  0%|          | 0/11 [00:00<?, ?it/s]

{'eval_loss': 1.5102463960647583, 'eval_accuracy': 0.27586206896551724, 'eval_runtime': 6.063, 'eval_samples_per_second': 14.349, 'eval_steps_per_second': 1.814, 'epoch': 3.0}
{'train_runtime': 853.4689, 'train_samples_per_second': 1.424, 'train_steps_per_second': 0.179, 'train_loss': 1.5030642241434333, 'epoch': 3.0}


TrainOutput(global_step=153, training_loss=1.5030642241434333, metrics={'train_runtime': 853.4689, 'train_samples_per_second': 1.424, 'train_steps_per_second': 0.179, 'total_flos': 2497566742920.0, 'train_loss': 1.5030642241434333, 'epoch': 3.0})

In [11]:
test_results = trainer.evaluate(test_dataset)
print("Test Results:", test_results)


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


  0%|          | 0/11 [00:00<?, ?it/s]

Test Results: {'eval_loss': 1.4416767358779907, 'eval_accuracy': 0.2988505747126437, 'eval_runtime': 47.3716, 'eval_samples_per_second': 1.837, 'eval_steps_per_second': 0.232, 'epoch': 3.0}


In [12]:
test_preds = np.argmax(trainer.predict(test_dataset).predictions, axis=1)
print(classification_report(test_labels, test_preds))


  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


  0%|          | 0/11 [00:00<?, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        13
           1       0.33      0.04      0.07        27
           2       0.33      0.62      0.43        29
           3       0.23      0.41      0.30        17
           4       0.00      0.00      0.00         1

    accuracy                           0.30        87
   macro avg       0.18      0.21      0.16        87
weighted avg       0.26      0.30      0.22        87



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
model.save_pretrained("./gad7_bert_model")
tokenizer.save_pretrained("./gad7_bert_model")


('./gad7_bert_model\\tokenizer_config.json',
 './gad7_bert_model\\special_tokens_map.json',
 './gad7_bert_model\\vocab.txt',
 './gad7_bert_model\\added_tokens.json')

In [15]:
from sklearn.metrics import recall_score, f1_score

accuracy = accuracy_score(test_labels, test_preds)
recall = recall_score(test_labels, test_preds, average="macro")
f1 = f1_score(test_labels, test_preds, average="macro")

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 0.30
Recall: 0.21
F1 Score: 0.16
