In [1]:
from datasets import load_dataset

from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer

import evaluate
import numpy as np
from transformers import DataCollatorWithPadding

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("csv", data_files="AI_Human.csv")
dataset = dataset["train"]

dataset = dataset.shuffle(seed=42).select(range(100000))

split_dataset = dataset.train_test_split(test_size=0.2)
test_valid = split_dataset["test"].train_test_split(test_size=0.5)

final_splits = {
    "train": split_dataset["train"],
    "validation": test_valid["train"],
    "test": test_valid["test"],
}

dataset_dict = final_splits

In [3]:
dataset_dict

{'train': Dataset({
     features: ['text', 'generated'],
     num_rows: 80000
 }),
 'validation': Dataset({
     features: ['text', 'generated'],
     num_rows: 10000
 }),
 'test': Dataset({
     features: ['text', 'generated'],
     num_rows: 10000
 })}

In [4]:
from datasets import DatasetDict
dataset_dict = DatasetDict(dataset_dict)  # Convert it to DatasetDict

In [5]:
dataset_dict = dataset_dict.rename_column("generated", "label")


In [6]:
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 80000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 10000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 10000
    })
})

In [7]:
from collections import Counter
print(Counter(dataset_dict["train"]["label"]))  # Count occurrences of each label

Counter({0.0: 50116, 1.0: 29884})


In [8]:
model_path = "google-bert/bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_path)

id2label = {0: "Human", 1: "AI"}
label2id = {"Human": 0, "AI": 1}
model = AutoModelForSequenceClassification.from_pretrained(model_path, num_labels=2, id2label=id2label, label2id=label2id)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
#layers
for name, param in model.named_parameters():
   print(name, param.requires_grad)

bert.embeddings.word_embeddings.weight True
bert.embeddings.position_embeddings.weight True
bert.embeddings.token_type_embeddings.weight True
bert.embeddings.LayerNorm.weight True
bert.embeddings.LayerNorm.bias True
bert.encoder.layer.0.attention.self.query.weight True
bert.encoder.layer.0.attention.self.query.bias True
bert.encoder.layer.0.attention.self.key.weight True
bert.encoder.layer.0.attention.self.key.bias True
bert.encoder.layer.0.attention.self.value.weight True
bert.encoder.layer.0.attention.self.value.bias True
bert.encoder.layer.0.attention.output.dense.weight True
bert.encoder.layer.0.attention.output.dense.bias True
bert.encoder.layer.0.attention.output.LayerNorm.weight True
bert.encoder.layer.0.attention.output.LayerNorm.bias True
bert.encoder.layer.0.intermediate.dense.weight True
bert.encoder.layer.0.intermediate.dense.bias True
bert.encoder.layer.0.output.dense.weight True
bert.encoder.layer.0.output.dense.bias True
bert.encoder.layer.0.output.LayerNorm.weight True


In [10]:
# freeze base model params
for name, param in model.base_model.named_parameters():
    param.requires_grad = False

# unfreeze base model pooling layers
for name, param in model.base_model.named_parameters():
    if "pooler" in name:
        param.requires_grad = True

In [11]:
for name, param in model.named_parameters():
   print(name, param.requires_grad)

bert.embeddings.word_embeddings.weight False
bert.embeddings.position_embeddings.weight False
bert.embeddings.token_type_embeddings.weight False
bert.embeddings.LayerNorm.weight False
bert.embeddings.LayerNorm.bias False
bert.encoder.layer.0.attention.self.query.weight False
bert.encoder.layer.0.attention.self.query.bias False
bert.encoder.layer.0.attention.self.key.weight False
bert.encoder.layer.0.attention.self.key.bias False
bert.encoder.layer.0.attention.self.value.weight False
bert.encoder.layer.0.attention.self.value.bias False
bert.encoder.layer.0.attention.output.dense.weight False
bert.encoder.layer.0.attention.output.dense.bias False
bert.encoder.layer.0.attention.output.LayerNorm.weight False
bert.encoder.layer.0.attention.output.LayerNorm.bias False
bert.encoder.layer.0.intermediate.dense.weight False
bert.encoder.layer.0.intermediate.dense.bias False
bert.encoder.layer.0.output.dense.weight False
bert.encoder.layer.0.output.dense.bias False
bert.encoder.layer.0.output.Lay

In [12]:
import torch
def preprocess_function(examples):
    tokenized_inputs = tokenizer(examples["text"], padding="max_length", truncation=True)
    tokenized_inputs["labels"] = torch.tensor(examples["label"], dtype=torch.long)  # Convert to long
    return tokenized_inputs


dataset_dict = dataset_dict.map(preprocess_function, batched=True)


Map: 100%|██████████| 80000/80000 [00:40<00:00, 1993.15 examples/s]
Map: 100%|██████████| 10000/10000 [00:05<00:00, 1875.55 examples/s]
Map: 100%|██████████| 10000/10000 [00:05<00:00, 1852.93 examples/s]


In [13]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [14]:
# load metrics
accuracy = evaluate.load("accuracy")
auc_score = evaluate.load("roc_auc")

def compute_metrics(eval_pred):
    # get predictions
    predictions, labels = eval_pred
    
    # apply softmax to get probabilities
    probabilities = np.exp(predictions) / np.exp(predictions).sum(-1, keepdims=True)
    # use probabilities of the positive class for ROC AUC
    positive_class_probs = probabilities[:, 1]
    # compute auc
    auc = np.round(auc_score.compute(prediction_scores=positive_class_probs, references=labels)['roc_auc'],3)
    
    # predict most probable class
    predicted_classes = np.argmax(predictions, axis=1)
    # compute accuracy
    acc = np.round(accuracy.compute(predictions=predicted_classes, references=labels)['accuracy'],3)
    
    return {"Accuracy": acc, "AUC": auc}

Training Using CUDA

In [15]:
import torch.nn.functional as F

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.pop("labels").long()  # Ensure labels are long
        outputs = model(**inputs)
        logits = outputs.logits
        loss_fct = torch.nn.CrossEntropyLoss()
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss




In [16]:
import torch
from transformers import TrainingArguments, Trainer
tokenized_data = dataset_dict  # Ensure tokenized_data is assigned

# Ensure CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Move model to CUDA
model.to(device)

# Hyperparameters
lr = 2e-4
batch_size = 50

num_epochs = 5

training_args = TrainingArguments(
    output_dir="bert-ai-classifier_teacher",
    learning_rate=lr,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_strategy="epoch",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    fp16=False,  # Enable mixed-precision training for better performance
    no_cuda=False,  # Ensure CUDA is enabled
)


trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],  # Now tokenized_data is defined
    eval_dataset=tokenized_data["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


# Train the model
trainer.train()


Using device: cuda


  trainer = CustomTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,Auc
1,0.1048,0.055352,0.98,0.998
2,0.0765,0.075997,0.971,0.998
3,0.0683,0.080866,0.969,0.998
4,0.0625,0.068467,0.975,0.998
5,0.0596,0.059185,0.979,0.999


TrainOutput(global_step=8000, training_loss=0.0743387975692749, metrics={'train_runtime': 13606.6607, 'train_samples_per_second': 29.397, 'train_steps_per_second': 0.588, 'total_flos': 1.05244422144e+17, 'train_loss': 0.0743387975692749, 'epoch': 5.0})

In [17]:
# apply model to validation dataset
predictions = trainer.predict(tokenized_data["validation"])

logits = predictions.predictions
labels = predictions.label_ids

metrics = compute_metrics((logits, labels))
print(metrics)

{'Accuracy': np.float64(0.976), 'AUC': np.float64(0.997)}


In [21]:
model.eval()
from transformers import AutoTokenizer

sentence = "oh my god why are u giving 1 for all the sentences man"
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")  # Use the same tokenizer

inputs = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt").to("cuda")  # Send to GPU
with torch.no_grad():  # No gradients needed for inference
    outputs = model(**inputs)
import torch

logits = outputs.logits
probs = torch.nn.functional.softmax(logits, dim=-1)  # Convert to probabilities
predicted_label = torch.argmax(probs, dim=-1).item()  # Get the highest probability label

print(f"Predicted Label: {predicted_label}")  # 0 or 1

Predicted Label: 1


In [23]:
print(probs)
print(logits)

tensor([[0.3799, 0.6201]], device='cuda:0')
tensor([[-0.1584,  0.3316]], device='cuda:0')
