# Model Dependency

In [1]:
! pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.63.9-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.5/250.5 kB[0m [31m486.2 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting streamlit
  Downloading streamlit-1.14.0-py2.py3-none-any.whl (9.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.2/9.2 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting validators>=0.2
  Downloading validators-0.20.0.tar.gz (30 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting pydeck>=0.1.dev5
  Downloading pydeck-0.8.0-py2.py3-none-any.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m19.3 MB/s[0m et

# Model Training

In [2]:
import logging
import pandas as pd

from simpletransformers.seq2seq import (
  Seq2SeqModel,
  Seq2SeqArgs,
)

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

print("LOADING DATASETS")
train_df = pd.read_csv("../input/modeldatastedash/final2_train.tsv", sep='\t').astype(str)
test_df = pd.read_csv("../input/modeldatastedash/final2_test.tsv", sep='\t').astype(str)
print("LOADED DATASETS SUCCESSFULLY")


train_df["prefix"] = ""
test_df["prefix"] = ""

model_args = Seq2SeqArgs()
model_args.max_seq_length = 56
# model_args.max_length = 20
model_args.train_batch_size = 4
model_args.eval_batch_size = 4
# model_args.rag_embed_batch_size = 8
model_args.num_train_epochs = 5
model_args.no_save = True
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = True

# Initialize model
model = Seq2SeqModel(
    encoder_decoder_type="mbart",
    encoder_decoder_name="facebook/mbart-large-50-one-to-many-mmt",
    args=model_args,
    use_cuda=True,
)

def count_matches(labels, preds):
    print(labels)
    print(preds)
    return sum(
        [
            1 if label == pred else 0
            for label, pred in zip(labels, preds)
        ]
    )
print("------TRAINING------")
# Train the model
model.train_model(train_df, output_dir="./Outputs2/", eval_data=test_df)
results = model.eval_model(test_df)
print("-------------TRAINING DONE---------")

LOADING DATASETS
LOADED DATASETS SUCCESSFULLY


Downloading:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/4.83M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/717 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/528 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'MBart50Tokenizer'. 
The class this function is called from is 'MBartTokenizer'.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


------TRAINING------


  0%|          | 0/10000 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/500 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/500 [00:00<?, ?it/s]

-------------TRAINING DONE---------


# Dependencies for Model Evaluation

In [3]:
! pip install evaluate
! pip install rouge_score
! pip install jiwer
! pip install sacrebleu

Collecting evaluate
  Downloading evaluate-0.3.0-py3-none-any.whl (72 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 kB[0m [31m260.1 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: evaluate
Successfully installed evaluate-0.3.0
[0mCollecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24955 sha256=6c9d5aa5dbf65518edeb6f5369dd2af0ebd331ac4101649eb26677bfc523961b
  Stored in directory: /root/.cache/pip/wheels/84/ac/6b/38096e3c5bf1dc87911e3585875e21a3ac610348e740409c76
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
[0mCollecting jiwer
  Downloading jiwer-2.5.1-py3-none-any.whl (15 kB)
Collecting levens

# Model Evaluation

In [4]:
# Load inputs and targets
# Treat hindi==telugu==indic_language
test_df = pd.read_csv("../input/modeldatastedash/final1_test.tsv", sep='\t').astype(str)
test_dash_df = pd.read_csv("../input/modeldatastedash/final2_test.tsv", sep='\t').astype(str)

hindi_truth = test_df.loc[test_df["prefix"] == "translate english to indic"]["target_text"].tolist()
to_hindi = test_df.loc[test_df["prefix"] == "translate english to indic"]["input_text"].tolist()
hindi_dash_truth = test_dash_df.loc[test_dash_df["prefix"] == "translate english to indic"]["target_text"].tolist()
to_hindi_dash = test_dash_df.loc[test_dash_df["prefix"] == "translate english to indic"]["input_text"].tolist()

english_truth = test_df.loc[test_df["prefix"] == "translate indic to english"]["target_text"].tolist()
to_english = test_df.loc[test_df["prefix"] == "translate indic to english"]["input_text"].tolist()
english_dash_truth = test_dash_df.loc[test_dash_df["prefix"] == "translate indic to english"]["target_text"].tolist()
to_english_dash = test_dash_df.loc[test_dash_df["prefix"] == "translate indic to english"]["input_text"].tolist()

In [5]:
# Expand string to list of strings to pass as references in evaluation metric function calls
def str_to_list_of_str(lang_truth):
    expanded_lang_truth = []
    for t in lang_truth:
        temp = []
        temp.append(t)
        expanded_lang_truth.append(temp)
    return expanded_lang_truth

expanded_hindi_truth = str_to_list_of_str(hindi_truth)
expanded_english_truth = str_to_list_of_str(english_truth)
expanded_hindi_dash_truth = str_to_list_of_str(hindi_dash_truth)
expanded_english_dash_truth = str_to_list_of_str(english_dash_truth)

In [14]:
# Load evaluation models
import evaluate
sacrebleu = evaluate.load("sacrebleu")
chrf = evaluate.load("chrf")
rouge = evaluate.load("rouge")
ter = evaluate.load("ter")
wer = evaluate.load("wer")

In [9]:
# Load model predictions
hindi_preds = model.predict(to_hindi)
english_preds = model.predict(to_english)

hindi_dash_preds = model.predict(to_hindi_dash)
english_dash_preds = model.predict(to_english_dash)

Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

`prepare_seq2seq_batch` is deprecated and will be removed in version 5 of HuggingFace Transformers. Use the regular
`__call__` method to prepare your inputs and the tokenizer under the `as_target_tokenizer` context manager to prepare
your targets.

Here is a short example:

model_inputs = tokenizer(src_texts, ...)
with tokenizer.as_target_tokenizer():
    labels = tokenizer(tgt_texts, ...)
model_inputs["labels"] = labels["input_ids"]

See the documentation of your specific tokenizer for more details on the specific arguments to the tokenizer of choice.
For a more complete example, see the implementation of `prepare_seq2seq_batch`.



Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/250 [00:00<?, ?it/s]

In [10]:
## BLEU
en_hi_results_bleu = sacrebleu.compute(predictions=hindi_preds, references=expanded_hindi_truth)
print("BLEU SCORE--------Orig Target")
print("English to Telugu")
print(en_hi_results_bleu['score'])

hi_en_results_bleu = sacrebleu.compute(predictions=english_preds, references=expanded_english_truth)
print("Telugu to English")
print(hi_en_results_bleu['score'])

en_hi_dash_results_bleu = sacrebleu.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth)
print("\n\nBLEU SCORE--------Dash Target")
print("English to Telugu")
print(en_hi_dash_results_bleu['score'])

hi_en_dash_results_bleu = sacrebleu.compute(predictions=english_dash_preds, references=expanded_english_dash_truth)
print("Telugu to English")
print(hi_en_dash_results_bleu['score'])

BLEU SCORE--------Orig Target
English to Telugu
0.020366805473825585
Telugu to English
0.2140898757918018


BLEU SCORE--------Dash Target
English to Telugu
0.05972403482419173
Telugu to English
2.077209276107318


In [11]:
## CHRF++ 
en_hi_results_chrf = chrf.compute(predictions=hindi_preds, references=expanded_hindi_truth, word_order=2)
print("CHRF++ SCORE--------Orig Target")
print("English to Telugu")
print(en_hi_results_chrf['score'])

hi_en_results_chrf = chrf.compute(predictions=english_preds, references=expanded_english_truth, word_order=2)
print("Telugu to English")
print(hi_en_results_chrf['score'])

en_hi_dash_results_chrf = chrf.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth, word_order=2)
print("\n\nCHRF++ SCORE--------Dash Target")
print("English to Telugu")
print(en_hi_dash_results_chrf['score'])

hi_en_dash_results_chrf = chrf.compute(predictions=english_dash_preds, references=expanded_english_dash_truth, word_order=2)
print("Telugu to English")
print(hi_en_dash_results_chrf['score'])

CHRF++ SCORE--------Orig Target
English to Telugu
6.130223827713543
Telugu to English
3.1059042830316663


CHRF++ SCORE--------Dash Target
English to Telugu
7.104610772485463
Telugu to English
14.956300159325439


In [12]:
# ROUGE
en_hi_results_rouge = rouge.compute(predictions=hindi_preds, references=expanded_hindi_truth)
print("Rouge SCORE--------Orig target")
print("English to Telugu")
print(en_hi_results_rouge)

hi_en_results_rouge = rouge.compute(predictions=english_preds, references=expanded_english_truth)
print("Telugu to English")
print(hi_en_results_rouge)

en_hi_dash_results_rouge = rouge.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth)
print("\n\nRouge SCORE--------Dash Target")
print("English to Telugu")
print(en_hi_dash_results_rouge)

hi_en_dash_results_rouge = rouge.compute(predictions=english_dash_preds, references=expanded_english_dash_truth)
print("Telugu to English")
print(hi_en_dash_results_rouge)

Rouge SCORE--------Orig target
English to Telugu
{'rouge1': 0.06559503089503085, 'rouge2': 0.015340909090909089, 'rougeL': 0.06523596403596402, 'rougeLsum': 0.06460598938098935}
Telugu to English
{'rouge1': 0.0470899847931012, 'rouge2': 0.011316366289856508, 'rougeL': 0.040890922976200686, 'rougeLsum': 0.0406426857846212}


Rouge SCORE--------Dash Target
English to Telugu
{'rouge1': 0.06559503089503085, 'rouge2': 0.015340909090909089, 'rougeL': 0.06523596403596402, 'rougeLsum': 0.06460598938098935}
Telugu to English
{'rouge1': 0.22473698971175698, 'rouge2': 0.061839233927657, 'rougeL': 0.1825948050142109, 'rougeLsum': 0.18255907397105398}


In [13]:
# TER
en_hi_results_ter = ter.compute(predictions=hindi_preds, references=expanded_hindi_truth)
print("TER SCORE--------Orig Target")
print("English to Telugu")
print(en_hi_results_ter['score'])

hi_en_results_ter = ter.compute(predictions=english_preds, references=expanded_english_truth)
print("Telugu to English")
print(hi_en_results_ter['score'])

en_hi_dash_results_ter = ter.compute(predictions=hindi_dash_preds, references=expanded_hindi_dash_truth)
print("\n\nTER SCORE--------Dash target")
print("English to Telugu")
print(en_hi_dash_results_ter['score'])

hi_en_dash_results_ter = ter.compute(predictions=english_dash_preds, references=expanded_english_dash_truth)
print("Telugu to English")
print(hi_en_dash_results_ter['score'])

TER SCORE--------Orig Target
English to Telugu
99.08840010035962
Telugu to English
98.07615364097092


TER SCORE--------Dash target
English to Telugu
96.25402399765876
Telugu to English
90.58748999733263


In [15]:
# WER
en_hi_results_wer = wer.compute(predictions=hindi_preds, references=hindi_truth)
print("WER SCORE--------Orig Target")
print("English to Telugu")
print(en_hi_results_wer)

hi_en_results_wer = wer.compute(predictions=english_preds, references=english_truth)
print("Telugu to English")
print(hi_en_results_wer)

en_hi_dash_results_wer = wer.compute(predictions=hindi_dash_preds, references=hindi_dash_truth)
print("\n\nWER SCORE--------Dash Target")
print("English to Telugu")
print(en_hi_dash_results_wer)

hi_en_dash_results_wer = wer.compute(predictions=english_dash_preds, references=english_dash_truth)
print("Telugu to English")
print(hi_en_dash_results_wer)

WER SCORE--------Orig Target
English to Telugu
0.9907251972469364
Telugu to English
0.9821619098426247


WER SCORE--------Dash Target
English to Telugu
0.9618935705892228
Telugu to English
0.9211789810616164


# Sample Sentences

In [16]:
print(model.predict(["I am King"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['నేనుఏఉ కింగ్ఇఅంఇఅం యుద్దెనుఉఎ']


In [17]:
print(model.predict(["I am working as a doctor in AIMS hospital, Delhi"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['ఢిల్లీలోనిఇఈఓఇ ఎయిమ్స్ఇ ఆస్పత్రిలోఇఓ డాక్టర్']


In [18]:
print(model.predict(["Outputs for MT5 model"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['ఎంటి5అంఇ మోడల్ఓ కోసంఓఅం అటవీలు']


In [19]:
print(model.predict(["నాకు సర్ఫింగ్ అంటే ఇష్టం"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['నాకుఆఉ సర్ఫింగ్ఆఇఅం ఇష్టం.ఏఇ']


In [20]:
print(model.predict(["ఎంటి5అంఇ మోడల్ఓ కోసంఓఅం అటవీలు"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['The engines are powered by two-channel turbo-defenced gearbox']


In [21]:
print(model.predict(["నేనుఏఉ కింగ్ఇఅంఇఅం యుద్దెనుఉఎ"]))

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

['I am the king of the king of the kingdom of Judah, and the king']
