In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
    pipeline
)
from datasets import Dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" Using device: {device}")

 Using device: cuda


In [2]:
import pandas as pd
from datasets import Dataset

train_df = pd.read_csv("/content/train_dataset.csv")
val_df   = pd.read_csv("/content/val_dataset.csv")
test_df  = pd.read_csv("/content/test_dataset.csv")

train_df = train_df.drop_duplicates(subset=['query']).reset_index(drop=True)
val_df   = val_df.drop_duplicates(subset=['query']).reset_index(drop=True)
test_df  = test_df.drop_duplicates(subset=['query']).reset_index(drop=True)

print(f" Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

train_dataset = Dataset.from_pandas(train_df)
val_dataset   = Dataset.from_pandas(val_df)
test_dataset  = Dataset.from_pandas(test_df)

label_mapping = {label: i for i, label in enumerate(train_df["label"].unique())}
reverse_mapping = {v: k for k, v in label_mapping.items()}

def encode_labels(example):
    example["label"] = label_mapping[example["label"]]
    return example

train_dataset = train_dataset.map(encode_labels)
val_dataset   = val_dataset.map(encode_labels)
test_dataset  = test_dataset.map(encode_labels)


 Train: 2090, Val: 470, Test: 469


Map:   0%|          | 0/2090 [00:00<?, ? examples/s]

Map:   0%|          | 0/470 [00:00<?, ? examples/s]

Map:   0%|          | 0/469 [00:00<?, ? examples/s]

In [3]:

MODEL_NAME = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(example):
    return tokenizer(example["query"], truncation=True, padding="max_length", max_length=128)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=len(label_mapping), id2label=reverse_mapping, label2id=label_mapping
).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/2090 [00:00<?, ? examples/s]

Map:   0%|          | 0/470 [00:00<?, ? examples/s]

Map:   0%|          | 0/469 [00:00<?, ? examples/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
    save_steps=100,
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir="./logs",
    logging_steps=50,
    metric_for_best_model="eval_loss",
    report_to="none"
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

  trainer = Trainer(


Step,Training Loss,Validation Loss
100,0.17,0.151381
200,0.0788,0.104993
300,0.0256,0.10611
400,0.0559,0.139682
500,0.0154,0.088038
600,0.01,0.091478


TrainOutput(global_step=655, training_loss=0.09770579055264478, metrics={'train_runtime': 233.3061, 'train_samples_per_second': 44.791, 'train_steps_per_second': 2.807, 'total_flos': 346077250675200.0, 'train_loss': 0.09770579055264478, 'epoch': 5.0})

In [5]:
predictions = trainer.predict(test_dataset)
y_true = predictions.label_ids
y_pred = predictions.predictions.argmax(-1)

print("Test Accuracy:", accuracy_score(y_true, y_pred))

target_names = [ {"S":"Simple","M":"Medium","A":"Advanced"}[reverse_mapping[i]] for i in range(len(label_mapping)) ]
print(classification_report(y_true, y_pred, target_names=target_names))



Test Accuracy: 0.9722814498933902
              precision    recall  f1-score   support

      Simple       0.95      0.97      0.96       159
      Medium       1.00      1.00      1.00       155
    Advanced       0.97      0.95      0.96       155

    accuracy                           0.97       469
   macro avg       0.97      0.97      0.97       469
weighted avg       0.97      0.97      0.97       469



In [6]:
model.save_pretrained("./best_model")
tokenizer.save_pretrained("./best_model")

!zip -r best_model.model ./best_model



  adding: best_model/ (stored 0%)
  adding: best_model/model.safetensors (deflated 8%)
  adding: best_model/vocab.txt (deflated 53%)
  adding: best_model/config.json (deflated 47%)
  adding: best_model/tokenizer_config.json (deflated 75%)
  adding: best_model/tokenizer.json (deflated 71%)
  adding: best_model/special_tokens_map.json (deflated 42%)


In [7]:
pipe = pipeline("text-classification", model="./best_model", tokenizer="./best_model", device=0 if torch.cuda.is_available() else -1)

queries = [
    "This is a very easy text.",
    "The model should understand moderately difficult content.",
    "Quantum physics requires advanced understanding."
]

for q in queries:
    pred = pipe(q)[0]
    print(f" Text: {q}")
    print(f"   → Predicted: {pred['label']} (Score: {pred['score']:.4f})\n")

Device set to use cuda:0


 Text: This is a very easy text.
   → Predicted: S (Score: 0.9933)

 Text: The model should understand moderately difficult content.
   → Predicted: S (Score: 0.9366)

 Text: Quantum physics requires advanced understanding.
   → Predicted: A (Score: 0.9191)



In [8]:
pipe = pipeline(
    "text-classification",
    model="./best_model",
    tokenizer="./best_model",
    device=0 if torch.cuda.is_available() else -1
)

queries = [
    ("What is the capital of France?", "S"),
    ("How many days are in a year?", "S"),
    ("What color is the sky?", "S"),
    ("Who wrote the play Romeo and Juliet?", "S"),
    ("What is 2 + 2?", "S"),

    ("Explain the process of photosynthesis.", "M"),
    ("What are the main causes of World War II?", "M"),
    ("How does the water cycle operate in nature?", "M"),

    ("Discuss the relationship between quantum mechanics and relativity.", "A"),
    ("Analyze the socioeconomic impacts of artificial intelligence on global labor markets.", "A"),
    ("Evaluate the ethical implications of gene editing technologies in humans.", "A"),
    ("Critically assess the role of postmodern philosophy in redefining truth.", "A"),


    ("When was the first iPhone released?", "S"),
    ("Summarize the key differences between classical and operant conditioning.", "M"),
    ("Examine the intersection of consciousness studies and neuroscience.", "A"),
    ("Explain how supply and demand affect market prices.", "M"),
]

correct, total = 0, len(queries)

for q, expected in queries:
    pred = pipe(q)[0]
    predicted = pred["label"]
    score = pred["score"]

    is_correct = (predicted.upper() == expected.upper())
    if is_correct:
        correct += 1

    print(f" Text: {q}")
    print(f"   → Expected: {expected}")
    print(f"   → Predicted: {predicted} (Score: {score:.4f}) {'✅' if is_correct else '❌'}\n")

print("===================================")
print(f" Final Accuracy: {correct}/{total} = {correct/total:.2%}")


Device set to use cuda:0


 Text: What is the capital of France?
   → Expected: S
   → Predicted: S (Score: 0.9858) ✅

 Text: How many days are in a year?
   → Expected: S
   → Predicted: S (Score: 0.9877) ✅

 Text: What color is the sky?
   → Expected: S
   → Predicted: S (Score: 0.9946) ✅

 Text: Who wrote the play Romeo and Juliet?
   → Expected: S
   → Predicted: S (Score: 0.9801) ✅

 Text: What is 2 + 2?
   → Expected: S
   → Predicted: S (Score: 0.9973) ✅

 Text: Explain the process of photosynthesis.
   → Expected: M
   → Predicted: M (Score: 0.9040) ✅

 Text: What are the main causes of World War II?
   → Expected: M
   → Predicted: M (Score: 0.9783) ✅



You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


 Text: How does the water cycle operate in nature?
   → Expected: M
   → Predicted: M (Score: 0.9670) ✅

 Text: Discuss the relationship between quantum mechanics and relativity.
   → Expected: A
   → Predicted: A (Score: 0.9816) ✅

 Text: Analyze the socioeconomic impacts of artificial intelligence on global labor markets.
   → Expected: A
   → Predicted: A (Score: 0.9924) ✅

 Text: Evaluate the ethical implications of gene editing technologies in humans.
   → Expected: A
   → Predicted: A (Score: 0.9876) ✅

 Text: Critically assess the role of postmodern philosophy in redefining truth.
   → Expected: A
   → Predicted: A (Score: 0.9943) ✅

 Text: When was the first iPhone released?
   → Expected: S
   → Predicted: S (Score: 0.9946) ✅

 Text: Summarize the key differences between classical and operant conditioning.
   → Expected: M
   → Predicted: M (Score: 0.9958) ✅

 Text: Examine the intersection of consciousness studies and neuroscience.
   → Expected: A
   → Predicted: A (Score: 0

In [9]:
from google.colab import files
files.download('/content/best_model.model')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>