In [None]:
"""
To run this notebook, you will need to upload the following file:
- train_final.csv to the Colab environment from the dataset at https://www.kaggle.com/datasets/xuanhuynh233/ielts-dataset/data . 

# IELTS Essay Score Prediction with DeBERTa
This notebook aims to predict IELTS essay scores using Microsoft's DeBERTa model. It includes the following steps:
1. Installation and import of necessary libraries
2. Loading and preprocessing the dataset
3. Training the DeBERTa model
4. Evaluation and visualization of model performance
5. Making single essay predictions and analyzing results

"""

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"


In [None]:
!pip install -q transformers datasets accelerate evaluate scikit-learn


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments
from transformers import Trainer
from datasets import Dataset
import numpy as np
import pandas as pd


In [None]:
df = pd.read_csv("train_final.csv")
df_clean = df[["prompt", "essay", "Overall_Band"]].dropna()
df_clean["text"] = df_clean["prompt"] + " [SEP] " + df_clean["essay"]
df_clean = df_clean[["text", "Overall_Band"]]


In [None]:
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df_clean, test_size=0.1, random_state=42)

train_ds = Dataset.from_pandas(train_df)
val_ds   = Dataset.from_pandas(val_df)


In [None]:
MODEL_NAME = "microsoft/deberta-v3-base"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=256)

train_tok = train_ds.map(tokenize, batched=True)
val_tok   = val_ds.map(tokenize, batched=True)


In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=1,           # REGRESSION
    problem_type="regression"
)


In [None]:
args = TrainingArguments(
    output_dir="./deberta_ielts",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_steps=50,
    save_strategy="epoch",
    eval_strategy="epoch"   # NEW API
)


In [None]:
# HF Trainer'ın beklediği isim "labels" olduğu için yeniden adlandırıyoruz
train_tok = train_tok.rename_column("Overall_Band", "labels")
val_tok   = val_tok.rename_column("Overall_Band", "labels")

# Modele girmeyecek olan text kolonunu silelim
train_tok = train_tok.remove_columns(["text"])
val_tok   = val_tok.remove_columns(["text"])


In [None]:
train_tok[0]


In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from transformers import Trainer

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.squeeze()
    labels = labels.squeeze()
    mse = mean_squared_error(labels, preds)
    mae = mean_absolute_error(labels, preds)
    return {"mse": mse, "mae": mae}

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_tok,
    eval_dataset=val_tok,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

In [None]:
"""
trainer.save_model("./deberta_ielts_model")
tokenizer.save_pretrained("./deberta_ielts_model")

"""

In [None]:
import matplotlib.pyplot as plt

epochs = [1, 2, 3]
train_loss = [1.799300, 1.999000, 0.926100]
val_loss = [1.652428, 1.271451, 1.238958]

plt.plot(epochs, train_loss, marker='o', label='Training Loss')
plt.plot(epochs, val_loss, marker='o', label='Validation Loss')

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import pandas as pd

df = pd.read_csv("train_final.csv")

band_counts = df["Overall_Band"].value_counts().sort_index()

band_counts


In [None]:
import torch
import pandas as pd

# Tek bir essay için tahmin fonksiyonu
def predict_single(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(model.device)
    with torch.no_grad():
        output = model(**inputs).logits.squeeze().item()
    return round(output, 2)

# ✔ İlk 10 örnekte gerçek skor vs model tahmini
samples = val_df.sample(10, random_state=42)

texts = samples["text"].tolist()
true_scores = samples["Overall_Band"].tolist()

preds = []
for t in texts:
    preds.append(predict_single(t))

results = pd.DataFrame({
    "True Score": true_scores,
    "Predicted Score": preds
})

results


In [None]:
"""
import matplotlib.pyplot as plt

pred_all = model(**tokenizer(val_df["text"].tolist(), return_tensors="pt", truncation=True, padding=True).to(model.device)).logits.squeeze().cpu().tolist()

plt.hist(pred_all, bins=20)
plt.xlabel("Predicted Score")
plt.ylabel("Frequency")
plt.title("Model Output Distribution (Validation Set)")
plt.show()

"""

In [None]:
"""
errors = val_df["Overall_Band"] - pd.Series(pred_all)

plt.hist(errors, bins=20)
plt.xlabel("Error (True − Predicted)")
plt.ylabel("Count")
plt.title("Prediction Error Distribution")
plt.show()

"""

In [None]:
import pandas as pd

# İlk 10 örnekte gerçek skor vs tahmin
samples = val_df.sample(10, random_state=42)

texts = samples["text"].tolist()
true_scores = samples["Overall_Band"].tolist()

preds = []
for t in texts:
    out = predict_single(t)
    preds.append(out)

results = pd.DataFrame({
    "True Score": true_scores,
    "Predicted Score": preds
})

results


In [None]:
samples = val_df.sample(50, random_state=123)

texts = samples["text"].tolist()
true_scores = samples["Overall_Band"].tolist()
preds = [predict_single(t) for t in texts]

df50 = pd.DataFrame({
    "True Score": true_scores,
    "Predicted Score": preds
})

df50.describe()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(true_scores, preds)
mse = mean_squared_error(true_scores, preds)

pd.DataFrame({
    "Metric": ["MAE", "MSE"],
    "Value": [mae, mse]
})
