# T5 (Translator)

In [79]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [80]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')

In [81]:
model = T5ForConditionalGeneration.from_pretrained('t5-small', return_dict=True)

In [82]:
input = "My name is Ilya and I live in Dnipro"

In [83]:
input_ids = tokenizer(input, return_tensors="pt").input_ids  # Batch size 1

In [85]:
outputs = model.generate(input_ids)

In [86]:
decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [87]:
print(decoded)

Mein Name ist Ilya und ich wohne in Dnipro.


# Flan-T5 (Generator)

In [88]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [89]:
model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-small')

In [90]:
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-small')

In [91]:
inputs = tokenizer("A step by step recype to make bolognese pasta:", return_tensors="pt")
outputs = model.generate(**inputs)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

['Pour the pasta into a large pot and cover with a lid.']


In [92]:
inputs = tokenizer("translate English to German: How old are you?", return_tensors="pt")
outputs = model.generate(**inputs)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

['Wie ich er bitten?']


In [93]:
model = AutoModelForSeq2SeqLM.from_pretrained('google/flan-t5-large')
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-large')

In [94]:
print(model.config)

T5Config {
  "_name_or_path": "google/flan-t5-large",
  "architectures": [
    "T5ForConditionalGeneration"
  ],
  "d_ff": 2816,
  "d_kv": 64,
  "d_model": 1024,
  "decoder_start_token_id": 0,
  "dense_act_fn": "gelu_new",
  "dropout_rate": 0.1,
  "eos_token_id": 1,
  "feed_forward_proj": "gated-gelu",
  "initializer_factor": 1.0,
  "is_encoder_decoder": true,
  "is_gated_act": true,
  "layer_norm_epsilon": 1e-06,
  "model_type": "t5",
  "n_positions": 512,
  "num_decoder_layers": 24,
  "num_heads": 16,
  "num_layers": 24,
  "output_past": true,
  "pad_token_id": 0,
  "relative_attention_max_distance": 128,
  "relative_attention_num_buckets": 32,
  "tie_word_embeddings": false,
  "transformers_version": "4.30.0.dev0",
  "use_cache": true,
  "vocab_size": 32128
}



In [95]:
my_text = 'A short introduction to machine learning'

In [96]:
inputs = tokenizer(my_text, return_tensors='pt')
outputs = model.generate(**inputs,
                        min_length=200,
                        max_new_tokens=512,
                        num_beams=16,
                        no_repeat_ngram_size=2,
                        early_stopping=True)
output_text_Flan_t5 = tokenizer.batch_decode(outputs,
                                            skip_special_tokens=True)

In [97]:
print(output_text_Flan_t5)

['Machine learning (ML) is a branch of computer science that uses artificial intelligence (AI) to make predictions about the world around us. ML can be applied to many areas of science, including medicine, physics, economics, and astronomy. It can also be used to develop new technologies, such as self-driving cars and robots. A short introduction to machine learning The following are some of the main topics covered in the course "Machine Learning and Artificial Intelligence" at the University of California, Berkeley. This course is designed for students who are interested in learning how to use computers to solve real-world problems. The course covers the following topics: Introduction to Machine Learning Using Machines to Solve Real-World Problems In this course, you will learn the fundamentals of how computers work and how they are used in real world situations. You will also learn how humans and machines interact with each other and the environment in which they live and work.']


In [98]:
my_text = 'Translate to German: A short introduction to machine learning'

inputs = tokenizer(my_text, return_tensors='pt')
outputs = model.generate(**inputs,
                        max_new_tokens=512,
                        num_beams=16,
                        no_repeat_ngram_size=2,
                        early_stopping=True)
output_text_Flan_t5 = tokenizer.batch_decode(outputs,
                                            skip_special_tokens=True)

print(output_text_Flan_t5)

['Eine kurze Einführung in Machine Learning']
