In [1]:
from datasets import load_dataset
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

import evaluate
import numpy as np
from transformers import DataCollatorWithPadding

  from .autonotebook import tqdm as notebook_tqdm





In [2]:
dataset = load_dataset("csv", data_files="AI_Human.csv")
dataset = dataset["train"]

dataset = dataset.shuffle(seed=42).select(range(10000))

split_dataset = dataset.train_test_split(test_size=0.2)
test_valid = split_dataset["test"].train_test_split(test_size=0.5)

final_splits = {
    "train": split_dataset["train"],
    "validation": test_valid["train"],
    "test": test_valid["test"],
}

dataset_dict = final_splits

In [3]:
dataset_dict

{'train': Dataset({
     features: ['text', 'generated'],
     num_rows: 8000
 }),
 'validation': Dataset({
     features: ['text', 'generated'],
     num_rows: 1000
 }),
 'test': Dataset({
     features: ['text', 'generated'],
     num_rows: 1000
 })}

In [4]:
from datasets import DatasetDict
dataset_dict = DatasetDict(dataset_dict)  # Convert it to DatasetDict

In [5]:
dataset_dict = dataset_dict.rename_column("generated", "labels")


In [6]:
dataset_dict['train'].features

{'text': Value(dtype='string', id=None),
 'labels': Value(dtype='float64', id=None)}

In [7]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['text', 'labels'],
        num_rows: 8000
    })
    validation: Dataset({
        features: ['text', 'labels'],
        num_rows: 1000
    })
    test: Dataset({
        features: ['text', 'labels'],
        num_rows: 1000
    })
})

In [8]:
def convert_labels(example):
    example["labels"] = int(example["labels"])
    return example

dataset_dict = dataset_dict.map(convert_labels)


Map: 100%|██████████| 8000/8000 [00:00<00:00, 22209.96 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 21471.59 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 21715.94 examples/s]


In [9]:
from collections import Counter
print(Counter(dataset_dict["train"]["labels"]))  # Count occurrences of each label

Counter({0.0: 5038, 1.0: 2962})


In [34]:
np.shape(dataset_dict["train"]["labels"])

(8000,)

In [35]:
np.shape(dataset_dict["train"]["text"])

(8000,)

In [39]:
dataset_dict["train"]["text"][0]

"Dear Principal,\n\nI Think Rear you should give us community service because we don'tR want RO live in a diary community. We also don'tR want viruses. If we keep This community clean Then maybe a for of people won'tR be sick and live longer. We should also have more Rumors because we want The kids RO has a bearer life and nor be poor in The streets. Some people don'tR have They right mind RO learn and They need a little help, and we could do Rear. We could also help Rumor special ed kids IR might be frustrating bur i BER we could still do IR.\n\nI Think Rear everybody should clean up The environment. The environment helps us by giving us air RO breathe i Think IR's Rime RO give back RO The earth. Yes, IR might Rake Too much Rime away bur we are doing IR for a good reason and IR is RO live in a healthier, bearer community. They could do all They want during The weekends bur IR is bearer RO help now while we can Than RO later on when we can't do anything about IR. Also people Rear have 

In [10]:
model_path = "google-bert/bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_path)

id2label = {0: "Human", 1: "AI"}
label2id = {"Human": 0, "AI": 1}
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2, id2label=id2label, label2id=label2id)
model.config.problem_type = "single_label_classification"

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
#layers
for name, param in model.named_parameters():
   print(name, param.requires_grad)

bert.embeddings.word_embeddings.weight True
bert.embeddings.position_embeddings.weight True
bert.embeddings.token_type_embeddings.weight True
bert.embeddings.LayerNorm.weight True
bert.embeddings.LayerNorm.bias True
bert.encoder.layer.0.attention.self.query.weight True
bert.encoder.layer.0.attention.self.query.bias True
bert.encoder.layer.0.attention.self.key.weight True
bert.encoder.layer.0.attention.self.key.bias True
bert.encoder.layer.0.attention.self.value.weight True
bert.encoder.layer.0.attention.self.value.bias True
bert.encoder.layer.0.attention.output.dense.weight True
bert.encoder.layer.0.attention.output.dense.bias True
bert.encoder.layer.0.attention.output.LayerNorm.weight True
bert.encoder.layer.0.attention.output.LayerNorm.bias True
bert.encoder.layer.0.intermediate.dense.weight True
bert.encoder.layer.0.intermediate.dense.bias True
bert.encoder.layer.0.output.dense.weight True
bert.encoder.layer.0.output.dense.bias True
bert.encoder.layer.0.output.LayerNorm.weight True


In [12]:
# freeze base model params
for name, param in model.base_model.named_parameters():
    param.requires_grad = False

# unfreeze base model pooling layers
for name, param in model.base_model.named_parameters():
    if "pooler" in name:
        param.requires_grad = True

In [13]:
for name, param in model.named_parameters():
   print(name, param.requires_grad)

bert.embeddings.word_embeddings.weight False
bert.embeddings.position_embeddings.weight False
bert.embeddings.token_type_embeddings.weight False
bert.embeddings.LayerNorm.weight False
bert.embeddings.LayerNorm.bias False
bert.encoder.layer.0.attention.self.query.weight False
bert.encoder.layer.0.attention.self.query.bias False
bert.encoder.layer.0.attention.self.key.weight False
bert.encoder.layer.0.attention.self.key.bias False
bert.encoder.layer.0.attention.self.value.weight False
bert.encoder.layer.0.attention.self.value.bias False
bert.encoder.layer.0.attention.output.dense.weight False
bert.encoder.layer.0.attention.output.dense.bias False
bert.encoder.layer.0.attention.output.LayerNorm.weight False
bert.encoder.layer.0.attention.output.LayerNorm.bias False
bert.encoder.layer.0.intermediate.dense.weight False
bert.encoder.layer.0.intermediate.dense.bias False
bert.encoder.layer.0.output.dense.weight False
bert.encoder.layer.0.output.dense.bias False
bert.encoder.layer.0.output.Lay

In [14]:
def preprocess_function(examples):
    encoding = tokenizer(examples["text"], padding="max_length", truncation=True)
    encoding["labels"] = [int(label) for label in examples["labels"]]
    return encoding

tokenized_data = dataset_dict.map(preprocess_function, batched=True)



Map: 100%|██████████| 8000/8000 [00:02<00:00, 3071.83 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 3189.15 examples/s]
Map: 100%|██████████| 1000/1000 [00:00<00:00, 3027.07 examples/s]


In [15]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [16]:
# load metrics
accuracy = evaluate.load("accuracy")
auc_score = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    # get predictions
    predictions, labels = eval_pred
    
    # apply softmax to get probabilities
    probabilities = np.exp(predictions) / np.exp(predictions).sum(-1, keepdims=True)
    # use probabilities of the positive class for ROC AUC
    positive_class_probs = probabilities[:, 1]
    # compute auc
    auc = np.round(auc_score.compute(prediction_scores=positive_class_probs, references=labels)['roc_auc'],3)
    
    # predict most probable class
    predicted_classes = np.argmax(predictions, axis=1)
    # compute accuracy
    acc = np.round(accuracy.compute(predictions=predicted_classes, references=labels)['accuracy'],3)
    
    return {"Accuracy": acc, "AUC": auc}

Training Using CUDA

In [17]:
import torch.nn.functional as F

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels").long()
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss()
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss


In [18]:

def collate_fn(batch):
    batch = {k: torch.tensor([d[k] for d in batch]) for k in batch[0]}
    batch["labels"] = batch["labels"].long()
    return batch


In [19]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce RTX 3050 Ti Laptop GPU


In [20]:
lr = 2e-4
batch_size = 50
num_epochs = 5

training_args = TrainingArguments(
    output_dir="bert-ai-classifier_teacher",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_strategy="epoch",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)
model.to("cuda")

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [21]:
trainer = CustomTrainer(
    model=model.to("cuda"),
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["validation"],
    tokenizer=tokenizer,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
)


trainer.train()

  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,Auc
1,0.2279,0.219595,0.909,0.987
2,0.1132,0.099334,0.966,0.993
3,0.0972,0.098786,0.967,0.994
4,0.0853,0.098476,0.96,0.994
5,0.0817,0.103418,0.959,0.995


TrainOutput(global_step=800, training_loss=0.121067453622818, metrics={'train_runtime': 1429.9834, 'train_samples_per_second': 27.972, 'train_steps_per_second': 0.559, 'total_flos': 1.05244422144e+16, 'train_loss': 0.121067453622818, 'epoch': 5.0})

In [22]:
# apply model to validation dataset
predictions = trainer.predict(tokenized_data["train"])

logits = predictions.predictions
labels = predictions.label_ids

metrics = compute_metrics((logits, labels))
print(metrics)

{'Accuracy': np.float64(0.968), 'AUC': np.float64(0.996)}


In [23]:
import torch
from transformers import AutoTokenizer

model.eval()

sentence = "The stars flickered as the ancient ship drifted back into known space, its hull covered in markings no human had ever seen"
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

inputs = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt")
inputs = {key: val.to("cuda") for key, val in inputs.items()}  # Move tensors to CUDA

with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
probs = torch.nn.functional.softmax(logits, dim=-1)
predicted_label = torch.argmax(probs, dim=-1).item()

print(f"Predicted Label: {predicted_label}")
print(outputs)

Predicted Label: 1
SequenceClassifierOutput(loss=None, logits=tensor([[-2.1378,  2.2903]], device='cuda:0'), hidden_states=None, attentions=None)


In [51]:
import torch
from transformers import AutoTokenizer

model.eval()

sentence = "alright this is looking good so far, we have to look at more testing to check the reliability of this product."
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

inputs = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt")
inputs = {key: val.to("cuda") for key, val in inputs.items()}  # Move tensors to CUDA

with torch.no_grad():
    outputs = model(**inputs)

logits = outputs.logits
probs = torch.nn.functional.softmax(logits, dim=-1)
predicted_label = torch.argmax(probs, dim=-1).item()

print(f"Predicted Label: {predicted_label}")
print(outputs)

Predicted Label: 0
SequenceClassifierOutput(loss=None, logits=tensor([[ 0.6452, -0.8488]], device='cuda:0'), hidden_states=None, attentions=None)
