In [None]:
#  STEP 1 — Install dependencies
!pip install transformers datasets accelerate evaluate -q


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/84.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install --upgrade transformers -q

In [None]:
import transformers
transformers.__version__


'4.57.1'

In [None]:
#  STEP 2 — Import libraries
import pandas as pd
from datasets import Dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification, TrainingArguments, Trainer
import evaluate
import torch


In [None]:
#  STEP 3 — Load dataset
train_df = pd.read_csv("/content/train.csv")   # path to Mercor train.csv
test_df  = pd.read_csv("/content/test.csv")    # path to Mercor test.csv


In [None]:
import pandas as pd
# Correct column setup

train_df = train_df.rename(columns={"answer": "text", "is_cheating": "label"})
test_df  = test_df.rename(columns={"answer": "text"})

train_df["text"] = train_df["topic"] + " " + train_df["answer"]
test_df["text"]  = test_df["topic"]  + " " + test_df["answer"]



In [None]:
# Convert to HF Dataset
train_ds = Dataset.from_pandas(train_df)


In [None]:
#  STEP 4 — Tokenize data
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

def preprocess(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=256)

train_ds = train_ds.map(preprocess, batched=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

Map:   0%|          | 0/269 [00:00<?, ? examples/s]

In [None]:
#  STEP 5 — Split into train/validation
train_test = train_ds.train_test_split(test_size=0.2, seed=42)
train_split = train_test["train"]
val_split   = train_test["test"]


In [None]:
#  STEP 6 — Load model
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
#  STEP 7 — Define metrics
accuracy = evaluate.load("accuracy")
f1 = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    return {
        "accuracy": accuracy.compute(predictions=preds, references=labels)["accuracy"],
        "f1": f1.compute(predictions=preds, references=labels, average="weighted")["f1"],
    }


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
#  STEP 8 — Training arguments

training_args = TrainingArguments(
    output_dir="./results",
    do_train=True,
    do_eval=True,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=50,
    save_total_limit=1,
)


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [None]:
#  STEP 9 — Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_split,
    eval_dataset=val_split,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


  trainer = Trainer(


Step,Training Loss
50,0.3812


TrainOutput(global_step=81, training_loss=0.31868583184701427, metrics={'train_runtime': 1706.8215, 'train_samples_per_second': 0.378, 'train_steps_per_second': 0.047, 'total_flos': 84853315353600.0, 'train_loss': 0.31868583184701427, 'epoch': 3.0})

In [None]:

#  STEP 10 — Evaluate
trainer.evaluate()




{'eval_loss': 0.1469152569770813,
 'eval_accuracy': 0.9629629629629629,
 'eval_f1': 0.9629629629629629,
 'eval_runtime': 39.8456,
 'eval_samples_per_second': 1.355,
 'eval_steps_per_second': 0.176,
 'epoch': 3.0}

In [None]:
#  STEP 11 — Predict on test data
test_dataset = Dataset.from_pandas(test_df)
test_dataset = test_dataset.map(preprocess, batched=True)
predictions = trainer.predict(test_dataset)
preds = torch.argmax(torch.tensor(predictions.predictions), dim=1).numpy()

submission = pd.DataFrame({
    "id": test_df["id"],
    "is_cheating": preds
})
submission.to_csv("submission_roberta.csv", index=False)
print("✅ submission_roberta.csv saved!")

Map:   0%|          | 0/264 [00:00<?, ? examples/s]



✅ submission_roberta.csv saved!
