### Notes 

T5 Paper: https://arxiv.org/pdf/1910.10683.pdf

T5 Tokenizer: https://github.com/huggingface/transformers/blob/master/src/transformers/tokenization_t5.py

Important Tasks: https://docs.google.com/document/d/1weIZM6QTlnitpPQmpg-WeV2RW70TnYmDuogBQPr5mB0/edit

In [1]:
#installation step
!pip install transformers
!pip install t5
!pip install sentencepiece
#creating the folders 
!mkdir data/
!mkdir data/AD_NMT-master
!mkdir data/train/
!mkdir data/test/
!mkdir data/val/
!mkdir data/model/
!mkdir data/config/
#fetching the pkl files
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1V9crCmqvgQcv0Sx2MCNWB9AET2j6M6FW' -O data/AD_NMT-master/english-Arabic-both.pkl
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1UzL4cOWTMCee83KBUh2QO_H62AFVpDQV' -O data/AD_NMT-master/LAV-MSA-2-both.pkl
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1UjDX7cCG2S23SPfSHxSPdVayMTxB5Y16' -O data/AD_NMT-master/Magribi_MSA-both.pkl
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1fEVj9jCxvcKn9zg8lO43i2sWZquegg5H' -O data/operative_config.gin

Collecting t5
[?25l  Downloading https://files.pythonhosted.org/packages/4e/55/cf4b9ad68873d28224ac9ca78bd30332b53b1f9f6c564ce8d3cc5358a0a8/t5-0.6.0-py3-none-any.whl (149kB)
[K     |████████████████████████████████| 153kB 2.7MB/s 
Collecting rouge-score
  Downloading https://files.pythonhosted.org/packages/1f/56/a81022436c08b9405a5247b71635394d44fe7e1dbedc4b28c740e09c2840/rouge_score-0.0.4-py2.py3-none-any.whl
Collecting tensorflow-text
[?25l  Downloading https://files.pythonhosted.org/packages/75/f4/3e3968e8a19e85bea8a0fdc9bd1f6a963b29cdecb1be984af0b70fbc0690/tensorflow_text-2.2.1-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 8.3MB/s 
[?25hCollecting tfds-nightly
[?25l  Downloading https://files.pythonhosted.org/packages/c8/87/7f0ff3aa7b3a11d0aed0fda2d24a742cc75528fe0e02ab90360a0fee9857/tfds_nightly-3.1.0.dev202006160105-py3-none-any.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 30.9MB/s 
Collecting sacrebleu
[?25l  Down

--2020-06-17 06:17:18--  https://docs.google.com/uc?export=download&id=1V9crCmqvgQcv0Sx2MCNWB9AET2j6M6FW
Resolving docs.google.com (docs.google.com)... 74.125.31.139, 74.125.31.138, 74.125.31.102, ...
Connecting to docs.google.com (docs.google.com)|74.125.31.139|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-10-2s-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/1jtaaeat2l3v6a62bmgtbujec52mr2tk/1592374575000/16970776037313924126/*/1V9crCmqvgQcv0Sx2MCNWB9AET2j6M6FW?e=download [following]
--2020-06-17 06:17:19--  https://doc-10-2s-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/1jtaaeat2l3v6a62bmgtbujec52mr2tk/1592374575000/16970776037313924126/*/1V9crCmqvgQcv0Sx2MCNWB9AET2j6M6FW?e=download
Resolving doc-10-2s-docs.googleusercontent.com (doc-10-2s-docs.googleusercontent.com)... 172.217.193.132, 2607:f8b0:400c:c03::84
Connecting to doc-10-2s-docs.googleusercontent.com (doc-10-2s-docs

In [2]:
#James Chartouni
#Joey Park
#Raef Khan

import torch
from torch.optim import SGD
import pandas as pd
import numpy as np
import pickle
import os, io, glob
import functools

import sentencepiece as spm

import transformers
import t5
import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split

## Prepare Datasets

We need to take our training and test sets from the pkl files and create new .txt files that are formatted so that the standard torchtext Dataset class can read them

##Initial Loading from Pickle

In [3]:
ls data/AD_NMT-master

english-Arabic-both.pkl  LAV-MSA-2-both.pkl  Magribi_MSA-both.pkl


In [4]:
file_path = 'data/AD_NMT-master/'

with open(file_path + "english-Arabic-both.pkl", 'rb') as handle:
    data_English_MSA_both = pickle.load(handle) 

with open(file_path + "LAV-MSA-2-both.pkl", 'rb') as handle:
    data_LAV_MSA_both = pickle.load(handle) 

with open(file_path + "Magribi_MSA-both.pkl", 'rb') as handle:
    data_Magribi_MSA_both = pickle.load(handle) 
    

In [5]:
#few dataset examples
print(data_English_MSA_both[0:5])
print(data_LAV_MSA_both[0:5])
print(data_Magribi_MSA_both[0:5])

[['Tom was also there', 'كان توم هنا ايضا'], ['That old woman lives by herself', 'تلك المراة العجوز تسكن بمفردها'], ['He went abroad for the purpose of studying English', 'سافر خارج البلد ليتعلم الانجليزية'], ['There is a fork missing', 'هناك شوكة ناقصة'], ["I don't know this game", 'لا اعرف هذه اللعبة']]
[['لا انا بعرف وحدة راحت ع فرنسا و معا شنتا حطت فيها الفرش', 'لا اعرف واحدة ذهبت الى فرنسا و لها غرفة و ضعت فيها الافرشة'], ['روح بوشك و فتول عاليسار', 'اذهب تقدم و استدر يسارا'], ['لا لا لازم انه يكون عندك موضوع ما في اشي', ' لا لا يجب ان يكون لديك موضوع هذا ضروري'], ['اوعي تبعدي من هون بلاش تضيعي ', 'لا تبتعد عن هنا حتى لا تفقد الطريق '], ['قصدي صراحة يما انا كمان كرهته من يوم ما عملتيه زي ما بتعمله خالتي كرهته و صرت ما باطيقه بالمرة', 'اقصد صراحة يا امي انا ايضا كرهته من يوم حضرته مثلما تحضره خالتي كرهته و اصبحت لا اطيقه ابدا']]
[['يا ربي متخليش حتى لبيوتا ديالهم يوصلو ل البارة', 'يارب لا تدع اهدافهم تصيب حتى العارضة'], ['يعطيك الصحة كريمة', 'يعطيك العافية كريمة'], [' لوكان جوزوزه 

In [6]:
#splits the train dataset into train and validation sets, define test set as datafile
en_msa_train, en_msa_val = train_test_split(data_English_MSA_both, test_size=.2)

lav_msa_train, lav_msa_val = train_test_split(data_LAV_MSA_both, test_size=.2)

mag_msa_train, mag_msa_val = train_test_split(data_Magribi_MSA_both, test_size=.2)

In [7]:
print(len(en_msa_train))
print(len(en_msa_val))

print(len(lav_msa_train))
print(len(lav_msa_val))

print(len(mag_msa_train))
print(len(mag_msa_val))

8000
2001
12644
3161
15788
3948


In [8]:
file_path = 'data/'

def list_to_csv(ds, src='en', trg='msa', datatype=''):
    src_formatted = datatype + '_' + src + '_' + trg + '.' + 'csv'
    
    with open(file_path + datatype + "/" + src_formatted, 'wt') as csv:
        for i, arr in enumerate(ds):
            csv.write(arr[1] + ',' + arr[0] + '\n')

In [9]:
#combine all the training lines of all three languages
spm_input_ds = en_msa_train + mag_msa_train + lav_msa_train

In [10]:
def list_to_input(ds):
    src_formatted = 'spm_input' + '.' + 'txt'

    with open(file_path + "/" + src_formatted, 'wt') as sentencelinefile:
        for i, arr in enumerate(ds):
            sentencelinefile.write(arr[0] + '\n' + arr[1] + '\n')

In [11]:
list_to_input(spm_input_ds)

In [19]:
list_to_csv(en_msa_train, 'en', 'msa', 'train')
list_to_csv(en_msa_val, 'en', 'msa', 'val')

list_to_csv(lav_msa_train, 'lav', 'msa', 'train')
list_to_csv(lav_msa_val, 'lav', 'msa', 'val')

list_to_csv(mag_msa_train, 'mag', 'msa', 'train')
list_to_csv(mag_msa_val, 'mag', 'msa', 'val')

In [20]:
VOCAB_SIZE = 32128
spm.SentencePieceTrainer.train('--input=data/spm_input.txt --model_prefix=data/model/spm --vocab_size=' + str(VOCAB_SIZE) + ' --unk_id=2 --bos_id=-1 --eos_id=1 --pad_id=0 --hard_vocab_limit=False')

In [21]:
filepath = 'data/model/spm.model'

##Tensor Processing + Add to TaskRegistry

### English to Arabic Task

In [22]:
en_msa_split_csv_path = {
    "train": "data/train/train_en_msa.csv",
    "validation": "data/val/val_en_msa.csv"
}
en_msa_example_count = {
    "train": len(en_msa_train),
    "validation": len(en_msa_val)
}

In [23]:
def en_msa_translation_dataset_fn(split, shuffle_files=False):
  ds = tf.data.TextLineDataset(en_msa_split_csv_path[split])
  ds = ds.map(
      functools.partial(tf.io.decode_csv, record_defaults=["",""],
                        field_delim=",", use_quote_delim=False),
      num_parallel_calls=tf.data.experimental.AUTOTUNE
  )
  ds = ds.map(lambda *example: dict(zip(["source", "target"], example)) )
  return ds

for example in tfds.as_numpy(en_msa_translation_dataset_fn("train").take(5)):
    print(example)

{'source': b'\xd8\xad\xd9\x85\xd8\xa7\xd9\x85\xd9\x83 \xd8\xac\xd8\xa7\xd9\x87\xd8\xb2', 'target': b'Your bath is ready'}
{'source': b'\xd9\x87\xd9\x84 \xd9\x87\xd8\xb0\xd9\x87 \xd8\xb5\xd9\x88\xd8\xb1\xd8\xa9 \xd8\xa7\xd9\x86\xd8\xaa \xd8\xb1\xd8\xb3\xd9\x85\xd8\xaa\xd9\x87\xd8\xa7 \xd8\xa8\xd9\x86\xd9\x81\xd8\xb3\xd9\x83\xd8\x9f', 'target': b'Is this a picture that you yourself drew?'}
{'source': b'\xd8\xb7\xd9\x88\xd9\x83\xd9\x8a\xd9\x88 \xd9\x87\xd9\x8a \xd8\xa7\xd9\x83\xd8\xa8\xd8\xb1 \xd9\x85\xd8\xaf\xd9\x8a\xd9\x86\xd8\xa9 \xd9\x81\xd9\x8a \xd8\xa7\xd9\x84\xd9\x8a\xd8\xa7\xd8\xa8\xd8\xa7\xd9\x86', 'target': b'Tokyo is the largest city in Japan'}
{'source': b'\xd8\xaa\xd9\x88\xd8\xac\xd8\xaf \xd8\xa8\xd8\xb9\xd8\xb6 \xd8\xa7\xd9\x84\xd9\x85\xd8\xad\xd9\x84\xd8\xa7\xd8\xaa \xd9\x82\xd8\xb1\xd8\xa8 \xd9\x85\xd9\x86\xd8\xb2\xd9\x84\xd9\x8a', 'target': b'There are a few shops next to my house'}
{'source': b'\xd8\xb9\xd9\x84\xd9\x8a\xd9\x83 \xd8\xa7\xd9\x86 \xd8\xaa\xd8\xb3\xd8\xa7\xd

In [24]:
#turn the ds of dictionaries and change the keys to inputs and targets that the model
def en_msa_translation_preprocessor(ds):
  def to_inputs_and_targets(ex):
    return{
        "inputs": tf.strings.join(["translate English to MSA: ",ex["source"]]),
        "targets": ex["target"]
    }
  return ds.map(to_inputs_and_targets, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [25]:
t5.data.TaskRegistry.remove("translation_en_msa")
t5.data.TaskRegistry.add(
    #name of the Task
    "translation_en_msa",
    #Supply a function which returns a tf.data.Dataset
    dataset_fn=en_msa_translation_dataset_fn,
    splits=["train", "validation"],
    # Supply a function which preprocesses text from the tf.data.Dataset.
    text_preprocessor=[en_msa_translation_preprocessor],
    # Lowercase targets before computing metrics.
    postprocess_fn=t5.data.postprocessors.lower_text, 
    # We'll use accuracy as our evaluation metric.
    metric_fns=[t5.evaluation.metrics.bleu],
    # Not required, but helps for mixing and auto-caching.
    num_input_examples=en_msa_example_count,
    # output_features
    output_features=t5.data.Feature(vocabulary=t5.data.SentencePieceVocabulary(filepath))
)

###Levantine to MSA Task

In [26]:
lav_msa_split_csv_path = {
    "train": "data/train/train_lav_msa.csv",
    "validation": "data/val/val_lav_msa.csv"
}
lav_msa_example_count = {
    "train": len(lav_msa_train),
    "validation": len(lav_msa_val)
}

In [27]:
def lav_msa_translation_dataset_fn(split, shuffle_files=False):
  ds = tf.data.TextLineDataset(lav_msa_split_csv_path[split])
  ds = ds.map(
      functools.partial(tf.io.decode_csv, record_defaults=["",""],
                        field_delim=",", use_quote_delim=False),
      num_parallel_calls=tf.data.experimental.AUTOTUNE
  )
  ds = ds.map(lambda *example: dict(zip(["source", "target"], example)) )
  return ds

for example in tfds.as_numpy(lav_msa_translation_dataset_fn("train").take(5)):
    print(example)

{'source': b'\xd8\xa7\xd9\x86\xd8\xa7 \xd9\x84\xd8\xa7 \xd8\xa7\xd8\xb9\xd8\xb1\xd9\x81 \xd9\x87\xd8\xb0\xd9\x87 \xd8\xb9\xd9\x86\xd8\xaf\xd9\x85\xd8\xa7 \xd9\x8a\xd9\x82\xd9\x88\xd9\x84\xd9\x88\xd9\x86 \xd9\x82\xd8\xb1\xd9\x88\xd8\xa7\xd8\xa8\xd9\x8a \xd8\xa7\xd9\x86\xd8\xa7 \xd8\xa7\xd9\x82\xd9\x88\xd9\x84 \xd8\xa7\xd9\x84\xd8\xa8\xd8\xa7 \xd8\xb1\xd8\xad \xd9\x83\xd9\x86\xd8\xaa \xd9\x81\xd9\x8a \xd8\xb9\xd9\x85\xd8\xb1\xd9\x8a \xd8\xb9\xd8\xb4\xd8\xb1\xd9\x8a\xd9\x86', 'target': b'\xd9\x85\xd8\xa7 \xd8\xa8\xd8\xb9\xd8\xb1\xd9\x81\xd8\xa7 \xd9\x87\xd9\x8a \xd9\x84\xd9\x85\xd8\xa7 \xd8\xa8\xd9\x8a\xd9\x82\xd9\x88\xd9\x84\xd9\x88\xd8\xa7 \xd9\x82\xd8\xb1\xd9\x88\xd8\xa7\xd8\xa8\xd9\x8a \xd8\xa7\xd9\x86\xd8\xa7 \xd8\xa8\xd9\x82\xd9\x88\xd9\x84 \xd9\x85\xd8\xa8\xd8\xa7\xd8\xb1\xd8\xad\xd8\xa9 \xd9\x83\xd8\xa7\xd9\x86 \xd8\xb9\xd9\x85\xd8\xb1\xd9\x8a \xd8\xb9\xd8\xb4\xd8\xb1\xd9\x8a\xd9\x86'}
{'source': b'\xd8\xb3\xd8\xa7\xd8\xaa\xd9\x8a \xd8\xa7\xd9\x84\xd8\xa7\xd9\x86 \xd9\x84\xd8\xa7\

In [28]:
#turn the ds of dictionaries and change the keys to inputs and targets that the model
def lav_msa_translation_preprocessor(ds):
  def to_inputs_and_targets(ex):
    return{
        "inputs": tf.strings.join(["translate Levantine to MSA: ",ex["source"]]),
        "targets": ex["target"]
    }
  return ds.map(to_inputs_and_targets, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [29]:
t5.data.TaskRegistry.remove("translation_lav_msa")
t5.data.TaskRegistry.add(
    #name of the Task
    "translation_lav_msa",
    #Supply a function which returns a tf.data.Dataset
    dataset_fn=lav_msa_translation_dataset_fn,
    splits=["train", "validation"],
    # Supply a function which preprocesses text from the tf.data.Dataset.
    text_preprocessor=[lav_msa_translation_preprocessor],
    # Lowercase targets before computing metrics.
    postprocess_fn = t5.data.postprocessors.lower_text, 
    # We'll use accuracy as our evaluation metric.
    metric_fns=[t5.evaluation.metrics.bleu],
    # Not required, but helps for mixing and auto-caching.
    num_input_examples=lav_msa_example_count,
    # output_features
    output_features=t5.data.Feature(vocabulary=t5.data.SentencePieceVocabulary(filepath))
)

###Maghrib to MSA Task

In [30]:
mag_msa_split_csv_path = {
    "train": "data/train/train_mag_msa.csv",
    "validation": "data/val/val_mag_msa.csv"
}
mag_msa_example_count = {
    "train": len(mag_msa_train),
    "validation": len(mag_msa_val)
}

In [31]:
def mag_msa_translation_dataset_fn(split, shuffle_files=False):
  ds = tf.data.TextLineDataset(mag_msa_split_csv_path[split])
  ds = ds.map(
      functools.partial(tf.io.decode_csv, record_defaults=["",""],
                        field_delim=",", use_quote_delim=False),
      num_parallel_calls=tf.data.experimental.AUTOTUNE
  )
  ds = ds.map(lambda *example: dict(zip(["source", "target"], example)) )
  return ds

for example in tfds.as_numpy(mag_msa_translation_dataset_fn("train").take(5)):
    print(example)
    print(example['source'].decode())
    print(example['target'].decode())
    print(len(example['source']))
    print(len(example['target']))

{'source': b'\xd9\x84\xd8\xa7\xd9\x86 \xd8\xa7\xd9\x84\xd9\x85\xd8\xb7\xd8\xa8\xd9\x88\xd8\xb9\xd8\xa7\xd8\xaa \xd9\x8a\xd9\x83\xd9\x88\xd9\x86 \xd9\x84\xd9\x87\xd8\xa7 \xd8\xaa\xd8\xa7\xd8\xab\xd9\x8a\xd8\xb1 \xd8\xa7\xd9\x82\xd9\x88\xd9\x89 \xd9\x85\xd9\x86 \xd8\xa7\xd9\x84\xd8\xaa\xd9\x88\xd8\xb2\xd9\x8a\xd8\xb9 \xd8\xa7\xd9\x84\xd8\xa7\xd9\x84\xd9\x83\xd8\xaa\xd8\xb1\xd9\x88\xd9\x86\xd9\x89 ', 'target': b'\xd8\xb9\xd9\x84\xd9\x89 \xd8\xae\xd8\xa7\xd8\xb7\xd8\xb1 \xd8\xa7\xd9\x84\xd9\x85\xd8\xb7\xd8\xa8\xd9\x88\xd8\xb9\xd8\xa7\xd8\xaa \xd8\xb9\xd9\x86\xd8\xaf\xd9\x87\xd8\xa7 \xd8\xaa\xd8\xa7\xd8\xab\xd9\x8a\xd8\xb1 \xd8\xa7\xd9\x82\xd9\x88\xd9\x89 \xd9\x85\xd9\x86 \xd8\xa7\xd9\x84\xd8\xaa\xd9\x88\xd8\xb2\xd9\x8a\xd8\xb9 \xd8\xa7\xd9\x84\xd8\xa7\xd9\x84\xd9\x83\xd8\xaa\xd8\xb1\xd9\x88\xd9\x86\xd9\x8a'}
لان المطبوعات يكون لها تاثير اقوى من التوزيع الالكترونى 
على خاطر المطبوعات عندها تاثير اقوى من التوزيع الالكتروني
103
106
{'source': b' \xd9\x85\xd9\x86\xd8\xb0 \xd9\x85\xd8\xaf\xd8\x

In [32]:
  #turn the ds of dictionaries and change the keys to inputs and targets that the model
def mag_msa_translation_preprocessor(ds):
    def to_inputs_and_targets(ex):
      return{
          "inputs": tf.strings.join(["translate Maghrib to MSA: ",ex["source"]]),
          "targets": ex["target"]
      }
    return ds.map(to_inputs_and_targets, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [33]:
t5.data.TaskRegistry.remove("translation_mag_msa")
t5.data.TaskRegistry.add(
    #name of the Task
    "translation_mag_msa",
    #Supply a function which returns a tf.data.Dataset
    dataset_fn=mag_msa_translation_dataset_fn,
    splits=["train", "validation"],
    # Supply a function which preprocesses text from the tf.data.Dataset.
    text_preprocessor=[mag_msa_translation_preprocessor],
    # Lowercase targets before computing metrics.
    postprocess_fn = t5.data.postprocessors.lower_text, 
    # We'll use accuracy as our evaluation metric.
    metric_fns=[t5.evaluation.metrics.bleu],
    # Not required, but helps for mixing and auto-caching.
    num_input_examples=mag_msa_example_count,
    # output_features
    output_features=t5.data.Feature(vocabulary=t5.data.SentencePieceVocabulary(filepath))
)

##Dataset Mixture

In [34]:
t5.data.MixtureRegistry.remove("translation_msa")
t5.data.MixtureRegistry.add(
    "translation_msa",
    ["translation_en_msa", "translation_lav_msa", "translation_mag_msa"],
     default_rate=1.0
)

##Pre-Training

In [35]:
#gotta get the base config and add the new tasks' task params
!wget "https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json" -O data/config/t5-base-config.json

--2020-06-17 06:19:07--  https://s3.amazonaws.com/models.huggingface.co/bert/t5-base-config.json
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.88.197
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.88.197|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1199 (1.2K) [application/json]
Saving to: ‘data/config/t5-base-config.json’


2020-06-17 06:19:07 (147 MB/s) - ‘data/config/t5-base-config.json’ saved [1199/1199]



In [36]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

#Using the base config from Huggingface T5 Model
config = transformers.T5Config.from_json_file(json_file="data/config/t5-base-config.json")
model = t5.models.HfPyTorchModel(config, "/tmp/hft5/", device)

In [37]:
ls /tmp/hft5

events.out.tfevents.1592374755.1c7b755b33b3.177.0


In [38]:
import gin
with gin.unlock_config():
  gin.parse_config_file("data/operative_config.gin")

In [None]:
STEPS = 10000 #@param {type: "integer"}
model.train(
    mixture_or_task_name="translation_msa",
    steps=STEPS,
    save_steps=STEPS/5,                                                   
    sequence_length={"inputs": 64, "targets": 64},
    split="train",
    batch_size=32,
    optimizer=functools.partial(transformers.AdamW, lr=1e-4),
)

INFO:absl:Saving checkpoint for step 0


##Evaluation

In [None]:
# Evaluate after fine-tuning
model.eval(
    "translation_msa",
    checkpoint_steps=STEPS,
    sequence_length={"inputs": 64, "targets": 64},
    batch_size=32,
)

##Predictions

In [None]:
inputs = [
    "translate English to MSA: Tom was also there.",
    "translate English to MSA: A doggy detail was walking famously.",
]
model.predict(
    inputs,
    sequence_length={"inputs": 2},
    batch_size=2,
    output_file="/tmp/hft5/example_predictions.txt",
)