# Model Dependency

In [1]:
! pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.63.9-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.5/250.5 kB[0m [31m360.3 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting streamlit
  Downloading streamlit-1.14.0-py2.py3-none-any.whl (9.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.2/9.2 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting validators>=0.2
  Downloading validators-0.20.0.tar.gz (30 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting watchdog
  Downloading watchdog-2.1.9-py3-none-manylinux2014_x86_64.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.4/78.4 kB[0m [31m7.9 MB/s

# Model Training

In [2]:
import logging
import pandas as pd

from simpletransformers.seq2seq import (
  Seq2SeqModel,
  Seq2SeqArgs,
)

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

print("LOADING DATASETS")
train_df = pd.read_csv("../input/model-training/final2_train.tsv", sep='\t').astype(str)
test_df = pd.read_csv("../input/model-training/final2_test.tsv", sep='\t').astype(str)
print("LOADED DATASETS SUCCESSFULLY")


train_df["prefix"] = ""
test_df["prefix"] = ""

model_args = Seq2SeqArgs()
model_args.max_seq_length = 56
# model_args.max_length = 20
model_args.train_batch_size = 4
model_args.eval_batch_size = 4
# model_args.rag_embed_batch_size = 8
model_args.num_train_epochs = 5
model_args.no_save = True
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = True

# Initialize model
model = Seq2SeqModel(
    encoder_decoder_type="mbart",
    encoder_decoder_name="facebook/mbart-large-50-one-to-many-mmt",
    args=model_args,
    use_cuda=True,
)

def count_matches(labels, preds):
    print(labels)
    print(preds)
    return sum(
        [
            1 if label == pred else 0
            for label, pred in zip(labels, preds)
        ]
    )
print("------TRAINING------")
# Train the model
model.train_model(train_df, output_dir="./Outputs2/", eval_data=test_df)
results = model.eval_model(test_df)
print("-------------TRAINING DONE---------")

LOADING DATASETS
LOADED DATASETS SUCCESSFULLY


Downloading:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/717 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/528 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'MBart50Tokenizer'. 
The class this function is called from is 'MBartTokenizer'.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


------TRAINING------


  0%|          | 0/10000 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/500 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

-------------TRAINING DONE---------


# Dependencies for Model Evaluation

In [56]:
! pip install evaluate
! pip install rouge_score
! pip install jiwer

[0mCollecting jiwer
  Downloading jiwer-2.5.1-py3-none-any.whl (15 kB)
Collecting levenshtein==0.20.2
  Downloading Levenshtein-0.20.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m0m
Installing collected packages: levenshtein, jiwer
  Attempting uninstall: levenshtein
    Found existing installation: Levenshtein 0.20.7
    Uninstalling Levenshtein-0.20.7:
      Successfully uninstalled Levenshtein-0.20.7
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
python-levenshtein 0.20.7 requires Levenshtein==0.20.7, but you have levenshtein 0.20.2 which is incompatible.[0m[31m
[0mSuccessfully installed jiwer-2.5.1 levenshtein-0.20.2
[0m

# Model Evaluation

In [67]:
# Load inputs and targets
test_df = pd.read_csv("../input/model-training/final1_test.tsv", sep='\t').astype(str)
test_dash_df = pd.read_csv("../input/model-training/final2_test.tsv", sep='\t').astype(str)

hindi_truth = test_df.loc[test_df["prefix"] == "translate english to indic"]["target_text"].tolist()
to_hindi = test_df.loc[test_df["prefix"] == "translate english to indic"]["input_text"].tolist()
hindi_dash_truth = test_dash_df.loc[test_dash_df["prefix"] == "translate english to indic"]["target_text"].tolist()
to_hindi_dash = test_dash_df.loc[test_dash_df["prefix"] == "translate english to indic"]["input_text"].tolist()

english_truth = test_df.loc[test_df["prefix"] == "translate indic to english"]["target_text"].tolist()
to_english = test_df.loc[test_df["prefix"] == "translate indic to english"]["input_text"].tolist()
english_dash_truth = test_dash_df.loc[test_dash_df["prefix"] == "translate indic to english"]["target_text"].tolist()
to_english_dash = test_dash_df.loc[test_dash_df["prefix"] == "translate indic to english"]["input_text"].tolist()

In [78]:
# Expand string to list of strings to pass as references in evaluation metric function calls
def str_to_list_of_str(lang_truth):
    expanded_lang_truth = []
    for t in lang_truth:
        temp = []
        temp.append(t)
        expanded_lang_truth.append(temp)
    return expanded_lang_truth

expanded_hindi_truth = str_to_list_of_str(hindi_truth)
expanded_english_truth = str_to_list_of_str(english_truth)
expanded_hindi_dash_truth = str_to_list_of_str(hindi_dash_truth)
expanded_english_dash_truth = str_to_list_of_str(english_dash_truth)

In [69]:
# Load evaluation models
import evaluate
sacrebleu = evaluate.load("sacrebleu")
chrf = evaluate.load("chrf")
rouge = evaluate.load("rouge")
ter = evaluate.load("ter")
wer = evaluate.load("wer")

In [70]:
# Load model predictions
hindi_preds = model.predict(to_hindi)
english_preds = model.predict(to_english)

hindi_dash_preds = model.predict(to_hindi_dash)
english_dash_preds = model.predict(to_english_dash)

Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

In [79]:
## BLEU
en_hi_results_bleu = sacrebleu.compute(predictions=hindi_preds, references=expanded_hindi_truth)
print("BLEU SCORE--------Orig Target")
print("English to Hindi")
print(en_hi_results_bleu['score'])

hi_en_results_bleu = sacrebleu.compute(predictions=english_preds, references=expanded_english_truth)
print("Hindi to English")
print(hi_en_results_bleu['score'])

en_hi_dash_results_bleu = sacrebleu.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth)
print("\n\nBLEU SCORE--------Dash Target")
print("English to Hindi")
print(en_hi_dash_results_bleu['score'])

hi_en_dash_results_bleu = sacrebleu.compute(predictions=english_dash_preds, references=expanded_english_dash_truth)
print("Hindi to English")
print(hi_en_dash_results_bleu['score'])

BLEU SCORE--------Orig Target
English to Hindi
0.042253558406162725
Hindi to English
0.06703456725096933


BLEU SCORE--------Dash Target
English to Hindi
0.4320747823601082
Hindi to English
3.1510107005032495


In [80]:
## CHRF++ 
en_hi_results_chrf = chrf.compute(predictions=hindi_preds, references=expanded_hindi_truth, word_order=2)
print("CHRF++ SCORE--------Orig Target")
print("English to Hindi")
print(en_hi_results_chrf['score'])

hi_en_results_chrf = chrf.compute(predictions=english_preds, references=expanded_english_truth, word_order=2)
print("Hindi to English")
print(hi_en_results_chrf['score'])

en_hi_dash_results_chrf = chrf.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth, word_order=2)
print("\n\nCHRF++ SCORE--------Dash Target")
print("English to Hindi")
print(en_hi_dash_results_chrf['score'])

hi_en_dash_results_chrf = chrf.compute(predictions=english_dash_preds, references=expanded_english_dash_truth, word_order=2)
print("Hindi to English")
print(hi_en_dash_results_chrf['score'])

CHRF++ SCORE--------Orig Target
English to Hindi
8.026919749790096
Hindi to English
1.0897263699460413


CHRF++ SCORE--------Dash Target
English to Hindi
12.051476020501001
Hindi to English
19.43295882652144


In [82]:
# ROUGE
en_hi_results_rouge = rouge.compute(predictions=hindi_preds, references=expanded_hindi_truth)
print("Rouge SCORE--------Orig target")
print("English to Hindi")
print(en_hi_results_rouge)

hi_en_results_rouge = rouge.compute(predictions=english_preds, references=expanded_english_truth)
print("Hindi to English")
print(hi_en_results_rouge)

en_hi_dash_results_rouge = rouge.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth)
print("\n\nRouge SCORE--------Dash Target")
print("English to Hindi")
print(en_hi_dash_results_rouge)

hi_en_dash_results_rouge = rouge.compute(predictions=english_dash_preds, references=expanded_english_dash_truth)
print("Hindi to English")
print(hi_en_dash_results_rouge)

Rouge SCORE--------Orig target
English to Hindi
{'rouge1': 0.0604984472049689, 'rouge2': 0.01231904761904762, 'rougeL': 0.060148775017253256, 'rougeLsum': 0.06045811812139039}
Hindi to English
{'rouge1': 0.019633602875762254, 'rouge2': 0.00605129426150121, 'rougeL': 0.01765161677237704, 'rougeLsum': 0.017604626061317966}


Rouge SCORE--------Dash Target
English to Hindi
{'rouge1': 0.0604984472049689, 'rouge2': 0.01231904761904762, 'rougeL': 0.060148775017253256, 'rougeLsum': 0.06045811812139039}
Hindi to English
{'rouge1': 0.30308837779358044, 'rouge2': 0.10832852650845823, 'rougeL': 0.24988553266914884, 'rougeLsum': 0.2495896278125303}


In [84]:
# TER
en_hi_results_ter = ter.compute(predictions=hindi_preds, references=expanded_hindi_truth)
print("TER SCORE--------Orig Target")
print("English to Hindi")
print(en_hi_results_ter['score'])

hi_en_results_ter = ter.compute(predictions=english_preds, references=expanded_english_truth)
print("Hindi to English")
print(hi_en_results_ter['score'])

en_hi_dash_results_ter = ter.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth)
print("\n\nTER SCORE--------Dash target")
print("English to Hindi")
print(en_hi_dash_results_ter['score'])

hi_en_dash_results_ter = ter.compute(predictions=english_dash_preds, references=expanded_english_dash_truth)
print("Hindi to English")
print(hi_en_dash_results_ter['score'])

TER SCORE--------Orig Target
English to Hindi
98.34702877629367
Hindi to English
99.21793709931863


TER SCORE--------Dash target
English to Hindi
91.83491780865474
Hindi to English
85.90608532205552


In [87]:
# WER
en_hi_results_wer = wer.compute(predictions=hindi_preds, references=hindi_truth)
print("WER SCORE--------Orig Target")
print("English to Hindi")
print(en_hi_results_wer)

hi_en_results_wer = wer.compute(predictions=english_preds, references=english_truth)
print("Hindi to English")
print(hi_en_results_wer)

en_hi_dash_results_wer = wer.compute(predictions=hindi_dash_preds, references=hindi_dash_truth)
print("\n\nWER SCORE--------Dash Target")
print("English to Hindi")
print(en_hi_dash_results_wer)

hi_en_dash_results_wer = wer.compute(predictions=english_dash_preds, references=english_dash_truth)
print("Hindi to English")
print(hi_en_dash_results_wer)

WER SCORE--------Orig Target
English to Hindi
0.9821020563594821
Hindi to English
0.992817104689021


WER SCORE--------Dash Target
English to Hindi
0.916836280371454
Hindi to English
0.8795690262813413


# Sample Sentences

In [88]:
print(model.predict(["I am Ganesh"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['मैंऐअं गणतंत्रअंए']


In [89]:
print(model.predict(["I am working as a doctor in AIMS hospital, Delhi"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['मैंऐअं दिल्लीइई केए एम्स अस्पतालआ मेंएअं डॉक्टर']


In [90]:
print(model.predict(["Outputs for MT5 model"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['एमटी5ई मॉडल केए लिएइ आउटपुट']


In [91]:
print(model.predict(["मुझे अच्छी तरह पीने की आशा करता हूं।"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['मुझेउए अच्छाआ आशाआ हैऐ']


In [92]:
print(model.predict(["मैं गणेश सिंह हूं"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['मैंगन् नाआ सिंहइअं हूंऊअं']


In [93]:
print(model.predict(["मुझे अपने दोस्तों के साथ क्रिकेट खेलना पसंद है"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['मुझेउए अपनेए दोस्तोंओओअं केए साथआ क्रिकेटइए बहुतउ']
