# Grammar Correction using T5
*By : Fady Motir*
![](https://miro.medium.com/v2/resize:fit:4006/1*D0J1gNQf8vrrUpKeyD8wPA.png)

In [23]:
# Install HappyTransformer
!pip install happytransformer 
from IPython.display import clear_output
clear_output()

In [24]:
# Importing Libraries
import csv
import os
from datasets import load_dataset
from happytransformer import TTSettings
from happytransformer import TTTrainArgs
from happytransformer import HappyTextToText
import warnings
warnings.filterwarnings('ignore')

In [25]:
# Load Model
happy_tt = HappyTextToText("T5", "t5-base")

In [26]:
# Loading Dataset 
train_dataset = load_dataset("jfleg", split='validation[:]')
eval_dataset = load_dataset("jfleg", split='test[:]')

In [27]:
for case in train_dataset["corrections"][:2]:
    print(case)
    print(case[0])
    print("--------------------------------------------------------")

['So I think we would not be alive if our ancestors did not develop sciences and technologies . ', 'So I think we could not live if older people did not develop science and technologies . ', 'So I think we can not live if old people could not find science and technologies and they did not develop . ', 'So I think we can not live if old people can not find the science and technology that has not been developed . ']
So I think we would not be alive if our ancestors did not develop sciences and technologies . 
--------------------------------------------------------
['Not for use with a car . ', 'Do not use in the car . ', 'Car not for use . ', 'Can not use the car . ']
Not for use with a car . 
--------------------------------------------------------


In [28]:
# Data Preprocessing
def generate_csv(csv_path, dataset):
    with open(csv_path, 'w', newline='') as csvfile:
        writter = csv.writer(csvfile)
        writter.writerow(["input", "target"])
        for case in dataset:
             # Adding the task's prefix to input 
            input_text = "grammar: " + case["sentence"]
            for correction in case["corrections"]:
                # a few of the cases contain blank strings. 
                if input_text and correction:
                    writter.writerow([input_text, correction])

In [29]:
generate_csv("train.csv", train_dataset)
generate_csv("eval.csv", eval_dataset)

In [30]:
before_result = happy_tt.eval("eval.csv")

Generating eval split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2988 [00:00<?, ? examples/s]

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [31]:
print("Before loss:", before_result.loss)

Before loss: 1.280392050743103


In [32]:
# Training

args = TTTrainArgs(batch_size=8)
happy_tt.train("train.csv", args=args)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2714 [00:00<?, ? examples/s]

Map:   0%|          | 0/302 [00:00<?, ? examples/s]

Step,Training Loss,Validation Loss
1,1.7996,1.130379
34,0.8352,0.721868
68,0.7302,0.594935
102,0.6886,0.554168
136,0.6083,0.53815
170,0.6431,0.519083
204,0.5778,0.51713
238,0.6054,0.50664
272,0.5812,0.502152
306,0.6583,0.497941


In [33]:
# Evaluation

before_loss = happy_tt.eval("eval.csv")
print("After loss: ", before_loss.loss)

Map:   0%|          | 0/2988 [00:00<?, ? examples/s]

After loss:  0.477690190076828


In [34]:
beam_settings =  TTSettings(num_beams=5, min_length=1, max_length=20)

In [35]:
# Example 1 

example_1 = "grammar: This sentences, has bads grammar and spelling!"
result_1 = happy_tt.generate_text(example_1, args=beam_settings)
print(result_1.text)

This sentences, has bad grammar and spelling!


In [36]:
# Example 2

example_2 = "grammar: I am enjoys, writtings articles ons AI and I also enjoyed write articling on AI."
result_2 = happy_tt.generate_text(example_2, args=beam_settings)
print(result_2.text)

I enjoy writing articles on AI and I also enjoyed writing articles on AI.


In [37]:
# Example 3

example_3 = "grammar: He is not very sanguine about his chances of successful."
result_3 = happy_tt.generate_text(example_3, args=beam_settings)
print(result_3.text)

He is not very sanguine about his chances of success.
