In [None]:
!pip install transformers



In [None]:
from google.colab import userdata

In [None]:
from transformers import Seq2SeqTrainer, AutoModelForSeq2SeqLM, AutoConfig, AutoTokenizer

In [None]:
model_dict = {}
HF_USER = 'daynauth'

In [None]:
def load_model_params(model_name_or_path):
  config = AutoConfig.from_pretrained(
      model_name_or_path,
      cache_dir=None,
      revision="main",
      token=None,
      trust_remote_code=False
  )
  tokenizer = AutoTokenizer.from_pretrained(
      model_name_or_path,
      cache_dir=None,
      use_fast=True,
      revision="main",
      token=None,
      trust_remote_code=False
  )
  model = AutoModelForSeq2SeqLM.from_pretrained(
      model_name_or_path,
      from_tf = False,
      config=config,
      cache_dir=None,
      revision="main",
      token=None,
      trust_remote_code=False
  )

  return config, tokenizer, model


def get_model_from_dict(model_name):
  # memory issue
  # if model_name in model_dict:
  #   return model_dict[model_name]
  # else:
  #   config, tokenizer, model = load_model_params(model_name)
  #   model_dict[model_name] = (config, tokenizer, model)
  #   return model_dict[model_name]

  return load_model_params(model_name)

def load_model(model_name):
  return get_model_from_dict(model_name)

In [None]:
def translate(input_text, model, tokenizer):
  model_input = tokenizer(input_text, max_length=1024, padding=False, truncation=True, return_tensors = "pt")
  output = model.generate(**model_input)
  translation = tokenizer.decode(output[0], skip_special_tokens=True)
  return translation

def translate_english_to_creole(input_text, model_str = 't5-large'):
  model_name_or_path = model_str + '-en-creole'
  model_name_or_path = HF_USER + '/' + model_name_or_path
  config, tokenizer, model = load_model(model_name_or_path)

  input_text = f"Translate English to Creole: {input_text}"
  return translate(input_text, model, tokenizer)

def translate_creole_to_english(input_text, model_str = 't5-large'):
  model_name_or_path = model_str + '-creole-en'
  model_name_or_path = HF_USER + '/' + model_name_or_path
  config, tokenizer, model = load_model(model_name_or_path)

  input_text = f"Translate Creole to English: {input_text}"
  return translate(input_text, model, tokenizer)

# Translate English to Creole

In [None]:
input_text = "I started talking (but may not finish)"
translation = translate_english_to_creole(input_text, model_str = 't5-large')
print(translation)



config.json:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.7k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]



mi stap taak (but na a stap fin


# Translate Creole to English

## Bart Large

In [None]:
input_text = "mi staat-taak"
translation = translate_creole_to_english(input_text, model_str = 'bart-large')
print(translation)

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

I doubt whether I will speak.


## Bart Base

In [None]:
input_text = "mi staat-taak"
translation = translate_creole_to_english(input_text, model_str = 'bart-base')
print(translation)

config.json:   0%|          | 0.00/1.74k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

My mouth is aching. OR: My mouth is hurting me.


## T5 Large

In [None]:
input_text = "mi staat-taak"
translation = translate_creole_to_english(input_text, model_str = 't5-large')
print(translation)

config.json:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.7k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

I am a state-tacker


## Pegasus Large

In [None]:
input_text = "mi staat-taak"
translation = translate_creole_to_english(input_text, model_str = 'pegasus-large')
print(translation)

config.json:   0%|          | 0.00/3.12k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.1k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/6.60M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/260 [00:00<?, ?B/s]

I’m scared of snakes. OR: I’m scared of snakes.
