# <center>LLM - Detect AI Generated Text</center>

This competition challenges participants to develop a machine learning model that can accurately detect **whether an essay was written by a student or an LLM**. The competition dataset comprises a mix of student-written essays and essays generated by a variety of LLMs.

Team Members: 毛柏毅, 朱誼學, 許木羽, 張立誠

## Configuration

In [1]:
import transformers as T
from datasets import Dataset
import torch
# from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
import torch.nn as nn
import torch.nn.functional as F
import kagglehub
import numpy as np

import os
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython.display import display, HTML

In [2]:
from typing import Literal

HOST: Literal['Localhost', 'Interactive', 'Batch'] = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', 'Localhost')
IS_RERUN: bool = os.getenv('KAGGLE_IS_COMPETITION_RERUN')

print(f'HOST: {HOST}, IS_RERUN: {IS_RERUN}')

HOST: Localhost, IS_RERUN: None


In [3]:
device = torch.device(
    ("cuda:0" if torch.cuda.is_available()
     else "mps" if torch.backends.mps.is_available()
     else "cpu"))
print(device)

cuda:0


## Data

### Load Data

In [4]:
def get_kaggle_csv(dataset: str, name: str, is_comp: bool = False) -> pd.DataFrame:
    assert name.endswith('.csv')
    if IS_RERUN:
        return pd.read_csv(f'/kaggle/input/{dataset}/{name}')
    if is_comp:
        path = kagglehub.competition_download(dataset)
    else:
        path = kagglehub.dataset_download(dataset)
    return pd.read_csv(Path(path) / name)

In [5]:
if IS_RERUN:
    df_train = get_kaggle_csv('daigt-datamix', 'train_essays.csv')
    df_test = get_kaggle_csv('llm-detect-ai-generated-text', 'test_essays.csv', is_comp=True)
else:
    df = get_kaggle_csv('dogeon188/daigt-datamix', 'train_essays.csv')
    # split df_train into train and test
    df = df.sample(frac=1).reset_index(drop=True)
    df_test = df.iloc[-1000:].copy()
    df_train = df.iloc[:5000].copy()
    # Up sampling -> used to balance the number of data (generated = 0 or 1)
    human = df[df['source']=='human'].copy().sample(frac=1).reset_index(drop=True)
    minority = df_train[df_train['source']=='human'].shape[0]
    majority = df_train[df_train['source']!='human'].shape[0]
    up_sampling = human[:majority-minority]
    df_train = pd.concat((df_train, up_sampling)).sample(frac=1).reset_index(drop=True)
    assert df_train[df_train['source']=='human'].shape[0] == df_train[df_train['source']!='human'].shape[0]

## Model

In [6]:
tokenizer = T.AutoTokenizer.from_pretrained("microsoft/deberta-v3-base", use_fast=False)
tokenizer.save_pretrained("./src/tokenizer")

# tokenizer = T.AutoTokenizer.from_pretrained("src/tokenizer")

# Define the hyperparameters
lr = 3e-5
epochs = 3
train_batch_size = 16
validation_batch_size = 16
test_batch_size = 16

In [7]:
# Encoded
source_classes = {'claude': 0, 'cohere': 1, 'falcon': 2, 'gpt': 3, 'llama': 4, 'mistral': 5, 'palm': 6, 'T5': 7, 'human': 8}
df_train.drop(df_train[df_train['source'] == 'unknown'].index, inplace=True)
df_test.drop(df_test[df_test['source'] == 'unknown'].index, inplace=True)

if df_train['source'].dtype != "int64":
    df_train['source'] = df_train['source'].apply(lambda x: source_classes[x])
    df_test['source'] = df_test['source'].apply(lambda x: source_classes[x])
print(df_train['source'])

0       3
1       8
2       5
3       5
4       7
       ..
8731    5
8732    6
8733    8
8734    5
8735    7
Name: source, Length: 8736, dtype: int64


### Preprocess Data

In [8]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, df, split="train") -> None:
        super().__init__()
        assert split in ["train", "validation", "test"]
        if split != 'test':
            self.data = df[split]
        else:
            self.data = df
    def __getitem__(self, index):
        d = self.data.iloc[index]
        return d

    def __len__(self):
        return len(self.data)
    
def collate_fn(batch):
    texts = [item['text'] for item in batch]
    source = [item['source'] for item in batch]

    encoded_inputs = tokenizer(
        texts,
        return_tensors='pt',
        padding=True,
        truncation=True,
        max_length=512
    )

    source_tensor = torch.tensor(source)

    return {
        'input_ids': encoded_inputs['input_ids'],
        'token_type_ids': encoded_inputs['token_type_ids'],
        'attention_mask': encoded_inputs['attention_mask'],
        'source': source_tensor
    }

split_ratio = 0.85
split_idx = int(len(df_train) * split_ratio)
df_split = {"train": df_train[:split_idx], "validation": df_train[split_idx:]}


ds_train = CustomDataset(df_split, "train")
ds_validation = CustomDataset(df_split, "validation")
dl_train = torch.utils.data.DataLoader(ds_train, batch_size=train_batch_size, collate_fn=collate_fn)
dl_validation = torch.utils.data.DataLoader(ds_validation, batch_size=validation_batch_size, collate_fn=collate_fn)

### Model

In [9]:
from sklearn.metrics import accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1).cpu().numpy()
    labels = labels.cpu().numpy()
    acc = accuracy_score(labels, predictions)
    return acc

class Deberta(torch.nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.deberta = T.AutoModel.from_pretrained("microsoft/deberta-v3-base", num_labels=9)
        self.deberta.save_pretrained("./src/deberta")
        self.deberta.gradient_checkpointing_enable()

        self.linear = torch.nn.Sequential(
            torch.nn.Linear(768, 384),
            torch.nn.Linear(384, 9)
        )
        self.activation = torch.nn.Sigmoid()
    def forward(self, **kwargs):
        input_ids = kwargs['input_ids']
        attention_mask = kwargs['attention_mask']
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state

        # Mean pooling
        # reshape attention_mask, used to filter the valid tokens
        mask = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float()  # [batch_size, seq_len, hidden_size]
        # summation for valid tokens
        sum_embeddings = torch.sum(last_hidden_state * mask, dim=1)  # [batch_size, hidden_size]
        # num of valid tokens
        sum_mask = torch.clamp(mask.sum(dim=1), min=1e-9)
        x = sum_embeddings / sum_mask
        
        x = self.linear(x)
        x = self.activation(x)
        
        return {
            'source': x
        }


In [10]:
model = Deberta().to(device)
optimizer = AdamW(model.parameters(), lr=lr)
loss_fn = torch.nn.CrossEntropyLoss()


import os

def save_checkpoint(epoch, model, optimizer, path="./checkpoints"):
    os.makedirs(path, exist_ok=True)
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
    }, os.path.join(path, f"checkpoint_epoch_{epoch}.pt"))

def load_checkpoint(path, model, optimizer):
    checkpoint = torch.load(path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    return model, optimizer, start_epoch


In [11]:
for ep in range(epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model.train()

    for batch in pbar:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        source = batch['source'].to(device)
        
        optimizer.zero_grad()
        pred = model(input_ids = input_ids, attention_mask = attention_mask)
        loss = loss_fn(pred['source'], source)
        loss.backward()
        optimizer.step()
        

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model.eval()

    with torch.no_grad():
        acc, cnt = 0, 0
        for batch in pbar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            source = batch['source'].long().to(device)
            pred = model(input_ids=input_ids, attention_mask=attention_mask)

            # Scoring
            acc += compute_metrics((pred['source'], source))
            cnt += 1
        
        print(f"accuracy: {acc/cnt}")
    save_checkpoint(ep, model, optimizer)

Training epoch [1/3]: 100%|██████████| 465/465 [08:03<00:00,  1.04s/it]
Validation epoch [1/3]: 100%|██████████| 82/82 [00:28<00:00,  2.84it/s]


accuracy: 0.5597560975609756


Training epoch [2/3]: 100%|██████████| 465/465 [09:01<00:00,  1.17s/it]
Validation epoch [2/3]: 100%|██████████| 82/82 [00:24<00:00,  3.39it/s]


accuracy: 0.5520833333333334


Training epoch [3/3]: 100%|██████████| 465/465 [08:57<00:00,  1.16s/it]
Validation epoch [3/3]: 100%|██████████| 82/82 [00:24<00:00,  3.37it/s]


accuracy: 0.5765752032520326


## Save Model

In [12]:
torch.save(model.state_dict(), "./src/model")

## Prediction

In [13]:
# model = torch.load("./src/model")
model = Deberta().to(device)
model.load_state_dict(torch.load('./src/model', weights_only=True))

ds_test = CustomDataset(df_test.copy(), "test")
dl_test = torch.utils.data.DataLoader(ds_test, batch_size=test_batch_size, collate_fn=collate_fn)

pbar = tqdm(dl_test)
pbar.set_description(f"Test")
model.eval()

generated_preds = []
source_preds = []

with torch.no_grad():
        acc, cnt = 0, 0
        for batch in pbar:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            source = batch['source'].long().to(device)
            
            pred = model(input_ids=input_ids, attention_mask=attention_mask)
            tmp = torch.argmax(pred['source'], dim=-1).cpu().tolist()

            source_preds.extend(tmp)
            generated_preds.extend(list(map(lambda x: (sum(x[:8].cpu().tolist())) / (sum(x[:8].cpu().tolist()) + x[8].cpu().item()), pred['source'])))

            # Scoring
            acc += compute_metrics((pred['source'], source))
            cnt += 1
        
        print(f"accuracy: {acc/cnt}")

# final_preds = ...  # should be a 1D array of predictions, with the same length as df_test, and values in [0, 1]

Test: 100%|██████████| 63/63 [00:21<00:00,  2.89it/s]

accuracy: 0.3134920634920635





## Evaluation

In [14]:
# validation
if not IS_RERUN:
    from sklearn.metrics import roc_auc_score
    print(generated_preds)
    print(df_test['generated'])
    auc_score = roc_auc_score(df_test['generated'], generated_preds)
    
    print(f"ROC AUC: {auc_score:.4f}")

[0.5002552871719624, 0.9999687785492802, 0.9999356819238724, 0.5002404161722352, 0.5002024046643971, 0.532119064025266, 0.5002529569437665, 0.5006680054588466, 0.5002162544955341, 0.5001974236077423, 0.5003945570672582, 0.9998087658619547, 0.5024606952787399, 0.5002489422754923, 0.9997855663381677, 0.987430032852393, 0.9999388667296992, 0.9997967689079676, 0.5002429690187592, 0.5002364125836518, 0.5002001553755355, 0.9998656701856108, 0.9994926233565962, 0.9998964186078848, 0.5002617891567097, 0.9997507453871575, 0.9998082438545349, 0.9998860289122952, 0.6229459496040772, 0.5002773938800464, 0.5002834149780241, 0.9998731795855911, 0.500187308058815, 0.9947859040440964, 0.9998440989877337, 0.500268992453873, 0.5002537239771867, 0.5003350454930622, 0.500255309810736, 0.9999238464898779, 0.9851676209444827, 0.9817326159645572, 0.9998439893038725, 0.5002691977194956, 0.9994632241264262, 0.5002076283484678, 0.5002413947023836, 0.5002762100896461, 0.9869074522901395, 0.9999027946557588, 0.99

## Submission

In [15]:
df_test['generated'] = generated_preds
submission = df_test[['id' if IS_RERUN else 'prompt_id', 'generated']]
submission.to_csv('submission.csv', index=False)