In [None]:
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch
import Levenshtein
from torch.utils.data import DataLoader
from typing import List
import evaluate


In [108]:
device = 0 if torch.cuda.is_available() else -1  
print(f"Device set to use: {'cuda:0' if device==0 else 'cpu'}")

Device set to use: cuda:0


In [109]:
MODEL_NAME = "vennify/t5-base-grammar-correction"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

In [110]:
df=pd.read_csv('/kaggle/input/grammar-correction/Grammar Correction.csv')
df.drop(columns={'Serial Number','Error Type'},inplace=True)
df.head()

Unnamed: 0,Ungrammatical Statement,Standard English
0,I goes to the store everyday.,I go to the store everyday.
1,They was playing soccer last night.,They were playing soccer last night.
2,She have completed her homework.,She has completed her homework.
3,He don't know the answer.,He doesn't know the answer.
4,The sun rise in the east.,The sun rises in the east.


In [None]:
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

print(f"Train: {len(train_df)}, Val: {len(val_df)}")

Train: 1045, Val: 262


In [None]:
def tokenize_fn(batch):
    return tokenizer(batch['Ungrammatical Statement'], 
                     text_target=batch['Standard English'], 
                     padding='max_length', truncation=True, max_length=128)

train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset = val_dataset.map(tokenize_fn, batched=True)


Map:   0%|          | 0/1634 [00:00<?, ? examples/s]

Map:   0%|          | 0/182 [00:00<?, ? examples/s]

In [113]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16)


In [120]:
bleu = evaluate.load("bleu")

def precision_recall_fbeta(y_true: List[List[str]], y_pred: List[List[str]], beta: float = 0.5):
    assert len(y_true) == len(y_pred), "Predictions and references must have the same length"
    total_correct, total_pred, total_true = 0, 0, 0
    for ref, pred in zip(y_true, y_pred):
        ref_set, pred_set = set(ref), set(pred)
        correct = len(ref_set & pred_set)
        total_correct += correct
        total_pred += len(pred_set)
        total_true += len(ref_set)
    precision = total_correct / total_pred if total_pred > 0 else 0
    recall = total_correct / total_true if total_true > 0 else 0
    if precision + recall == 0:
        fbeta = 0
    else:
        fbeta = (1 + beta**2) * (precision * recall) / ((beta**2 * precision) + recall)
    return precision, recall, fbeta


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    
    # BLEU
    references = [[ref] for ref in labels]
    bleu_score = bleu.compute(predictions=predictions, references=references)["bleu"]

    ned_values = [
        Levenshtein.distance(pred, ref) / max(len(ref), 1)
        for pred, ref in zip(predictions, labels)
    ]
    ned_score = sum(ned_values) / len(ned_values)

    # F0.5, Precision, Recall
    y_true = [ref.split() for ref in labels]
    y_pred = [pred.split() for pred in predictions]
    precision, recall, f0_5 = precision_recall_fbeta(y_true, y_pred, beta=0.5)

    return {
        "bleu": bleu_score,
        "ned": ned_score,
        "f0.5": f0_5,
        "precision": precision,
        "recall": recall
    }



In [121]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [122]:
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

In [None]:
patience = 4    
counter = 0   
best_f05 = 0

for epoch in range(20):  
    model.train()
    loop = tqdm(train_loader)
    for batch in loop:
        optimizer.zero_grad()
        outputs = model(
            input_ids=batch['input_ids'].to(device),
            attention_mask=batch['attention_mask'].to(device),
            labels=batch['labels'].to(device)
        )
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        loop.set_description(f"Epoch {epoch+1}")
        loop.set_postfix(loss=loss.item())

    
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            outputs = model.generate(
                batch['input_ids'].to(device),
                attention_mask=batch['attention_mask'].to(device)
            )
            preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)
            labels = [tokenizer.decode(l, skip_special_tokens=True) for l in batch['labels']]
            all_preds.extend(preds)
            all_labels.extend(labels)

    
    metrics = compute_metrics((all_preds, all_labels))
    print(f"Epoch {epoch+1} metrics:", metrics)

    
    if metrics['f0.5'] > best_f05:
        best_f05 = metrics['f0.5']
        counter = 0    
        model.save_pretrained("./best_model")
        tokenizer.save_pretrained("./best_model")
        print(f" Best model saved with F0.5={best_f05:.4f}")
    else:
        counter += 1
        print(f"No improvement for {counter} evaluations.")

    if counter >= patience:
        print(f" Early stopping triggered after {counter} evaluations without improvement.")
        break


Epoch 1: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00594]


Epoch 1 metrics: {'bleu': 0.8031517477125393, 'ned': 0.09526589104174304, 'f0.5': 0.9073637316561846, 'precision': 0.9040469973890339, 'recall': 0.9208776595744681}
 Best model saved with F0.5=0.9074


Epoch 2: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.0166]


Epoch 2 metrics: {'bleu': 0.8389963609599745, 'ned': 0.06875569806186241, 'f0.5': 0.927055702917772, 'precision': 0.9264413518886679, 'recall': 0.9295212765957447}
 Best model saved with F0.5=0.9271


Epoch 3: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00465]


Epoch 3 metrics: {'bleu': 0.8454687243969327, 'ned': 0.06520899699093696, 'f0.5': 0.927055702917772, 'precision': 0.9264413518886679, 'recall': 0.9295212765957447}
No improvement for 1 evaluations.


Epoch 4: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.0229] 


Epoch 4 metrics: {'bleu': 0.8545749279730742, 'ned': 0.05949677989210565, 'f0.5': 0.93367889420521, 'precision': 0.9335548172757475, 'recall': 0.9341755319148937}
 Best model saved with F0.5=0.9337


Epoch 5: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.0137] 


Epoch 5 metrics: {'bleu': 0.8595363258032627, 'ned': 0.05718402015903243, 'f0.5': 0.9371686108165429, 'precision': 0.9364238410596026, 'recall': 0.9401595744680851}
 Best model saved with F0.5=0.9372


Epoch 6: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00263]


Epoch 6 metrics: {'bleu': 0.8639581227547694, 'ned': 0.05608681840992979, 'f0.5': 0.9373343932167462, 'precision': 0.9364659166115156, 'recall': 0.9408244680851063}
 Best model saved with F0.5=0.9373


Epoch 7: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00111]


Epoch 7 metrics: {'bleu': 0.8613403928172374, 'ned': 0.065139966589571, 'f0.5': 0.9346965699208444, 'precision': 0.9328505595786701, 'recall': 0.942154255319149}
No improvement for 1 evaluations.


Epoch 8: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00263]


Epoch 8 metrics: {'bleu': 0.8685632295249467, 'ned': 0.06286894749437304, 'f0.5': 0.9351900739176346, 'precision': 0.9334650856389987, 'recall': 0.942154255319149}
No improvement for 2 evaluations.


Epoch 9: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.000501]


Epoch 9 metrics: {'bleu': 0.8720152978311988, 'ned': 0.06001097441747431, 'f0.5': 0.9371693121693123, 'precision': 0.9359313077939234, 'recall': 0.942154255319149}
No improvement for 3 evaluations.


Epoch 10: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.000367]


Epoch 10 metrics: {'bleu': 0.8744012841658286, 'ned': 0.053976153074022565, 'f0.5': 0.9421443736730362, 'precision': 0.9416445623342176, 'recall': 0.9441489361702128}
 Best model saved with F0.5=0.9421


Epoch 11: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.0104] 


Epoch 11 metrics: {'bleu': 0.8714412478544773, 'ned': 0.05253640035022266, 'f0.5': 0.94115302869288, 'precision': 0.9409030544488712, 'recall': 0.942154255319149}
No improvement for 1 evaluations.


Epoch 12: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.000486]


Epoch 12 metrics: {'bleu': 0.8757467530825537, 'ned': 0.05006835095798728, 'f0.5': 0.9441489361702127, 'precision': 0.9441489361702128, 'recall': 0.9441489361702128}
 Best model saved with F0.5=0.9441


Epoch 13: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00578]


Epoch 13 metrics: {'bleu': 0.8804829458579425, 'ned': 0.04812170820793159, 'f0.5': 0.9466277217206586, 'precision': 0.9462508294625083, 'recall': 0.9481382978723404}
 Best model saved with F0.5=0.9466


Epoch 14: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00154] 


Epoch 14 metrics: {'bleu': 0.8735249641280345, 'ned': 0.05201391835490172, 'f0.5': 0.9438232161874334, 'precision': 0.9440745672436751, 'recall': 0.9428191489361702}
No improvement for 1 evaluations.


Epoch 15: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.000444]


Epoch 15 metrics: {'bleu': 0.8724702110073912, 'ned': 0.0551999440171092, 'f0.5': 0.9414724576271187, 'precision': 0.9404761904761905, 'recall': 0.9454787234042553}
No improvement for 2 evaluations.


Epoch 16: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=0.00029] 


Epoch 16 metrics: {'bleu': 0.8700852779546421, 'ned': 0.054260161135533974, 'f0.5': 0.9428191489361701, 'precision': 0.9428191489361702, 'recall': 0.9428191489361702}
No improvement for 3 evaluations.


Epoch 17: 100%|██████████| 103/103 [00:52<00:00,  1.96it/s, loss=3.77e-5] 


Epoch 17 metrics: {'bleu': 0.874689534907623, 'ned': 0.05967109833227765, 'f0.5': 0.942297511911064, 'precision': 0.9411764705882353, 'recall': 0.9468085106382979}
No improvement for 4 evaluations.
 Early stopping triggered after 4 evaluations without improvement.


In [None]:

model = AutoModelForSeq2SeqLM.from_pretrained("./best_model").to(device)
tokenizer = AutoTokenizer.from_pretrained("./best_model")

# جمل تجريبية
sentences = [
    "He go to school yesterday.",
    "I has a pen.",
    "He suggested me to go to the doctor because I am sick.",
    "Despite of being tired, but she continued working.",
    "I have visited Paris last year for the first time.",
    "Everyone should knows their responsibilities",
    "She don’t knows nothing about the project yet.",
    "Him and me was going to the market yesterday.",
    "If I would have seen her, I would tell her the truth.",
    "Running fastly, the race was won by him.",
    "The informations you gave me are very helpful.",
    "I am agree with you about this idea."
]

inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True).to(device)

with torch.no_grad():
    outputs = model.generate(**inputs)

preds = tokenizer.batch_decode(outputs, skip_special_tokens=True)

counter=1
for src, pred in zip(sentences, preds):
    print(f"Input {counter}: {src}")
    print(f"Prediction: {pred}")
    print("----------")
    counter+=1


Input 1: He go to school yesterday.
Prediction: He went to school yesterday.
----------
Input 2: I has a pen.
Prediction: I have a pen.
----------
Input 3: He suggested me to go to the doctor because I am sick.
Prediction: He suggested I go to the doctor because I am sick.
----------
Input 4: Despite of being tired, but she continued working.
Prediction: Despite being tired, she continued working.
----------
Input 5: I have visited Paris last year for the first time.
Prediction: I visited Paris last year for the first time.
----------
Input 6: Everyone should knows their responsibilities
Prediction: Everyone should know his or her responsibilities.
----------
Input 7: She don’t knows nothing about the project yet.
Prediction: She doesn’t know anything about the project yet.
----------
Input 8: Him and me was going to the market yesterday.
Prediction: He and I were going to the market yesterday.
----------
Input 9: If I would have seen her, I would tell her the truth.
Prediction: If I h

In [None]:
#he made only oen mistake in input 11