## 4 Model Training

In [None]:
import pandas as pd
from happytransformer import HappyTextToText, TTSettings, TTTrainArgs
from tqdm import tqdm

In [None]:
happy_tt = HappyTextToText("text2text-generation", "pszemraj/grammar-synthesis-small")

args = TTTrainArgs(batch_size=8)
happy_tt.train("/content/drive/MyDrive/corrupt/train_3k.csv", args=args)
happy_tt.model.save_pretrained("french_model")

### Load saved model

In [3]:
happy_tt = HappyTextToText("French", "./french_model")

args = TTSettings(num_beams=5, min_length=1, max_length=200)

sentence = 'I are happy'
result = happy_tt.generate_text("gec: " + sentence, args=args)
result.text

11/08/2023 15:35:27 - INFO - happytransformer.happy_transformer -   Using device: mps
11/08/2023 15:35:27 - INFO - happytransformer.happy_transformer -   Moving model to mps
11/08/2023 15:35:27 - INFO - happytransformer.happy_transformer -   Initializing a pipeline


'I am happy.'

### Evaluation

In [None]:
happy_tt.eval("data/10k2.csv")

### Comparison

In [None]:
happy_tt2 = HappyTextToText("T5", "pszemraj/grammar-synthesis-small")

data = pd.read_csv("data/10k2.csv")
data = data.head(100)

diffs = []

for i in tqdm(data.iterrows()):
  input = i[1]["input"]
  ourg = happy_tt.generate_text(input, args=args)
  theirg = happy_tt2.generate_text(input, args=args)
  if ourg != theirg:
    diffs.append([i[1]["input"], ourg.text, theirg.text])

df = pd.DataFrame(diffs, columns = ["original", "our", "old"])
df.to_csv("diffs.csv")