# Translation English to German Audio Experiment
## Experiment 2
### Text Translation 2



# Import Libraries

In [1]:
from transformers import *



# Setup Model Function

In [2]:
def get_translation_model_and_tokenizer(src_lang, dst_lang):
  """
  Given the source and destination languages, returns the appropriate model
  See the language codes here: https://developers.google.com/admin-sdk/directory/v1/languages
  For the 3-character language codes, you can google for the code!
  """
  # construct our model name
  model_name = f"Helsinki-NLP/opus-mt-{src}-{dst}"
  # initialize the tokenizer & model
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
  # return them for use
  return model, tokenizer

# Create Model Using Function

In [3]:
# source & destination languages
src = "en"
dst = "de"

model, tokenizer = get_translation_model_and_tokenizer(src, dst)

loading configuration file config.json from cache at C:\Users\Connor/.cache\huggingface\hub\models--Helsinki-NLP--opus-mt-en-de\snapshots\6183067f769a302e3861815543b9f312c71b0ca4\config.json
Model config MarianConfig {
  "_name_or_path": "Helsinki-NLP/opus-mt-en-de",
  "_num_labels": 3,
  "activation_dropout": 0.0,
  "activation_function": "swish",
  "add_bias_logits": false,
  "add_final_layer_norm": false,
  "architectures": [
    "MarianMTModel"
  ],
  "attention_dropout": 0.0,
  "bad_words_ids": [
    [
      58100
    ]
  ],
  "bos_token_id": 0,
  "classif_dropout": 0.0,
  "classifier_dropout": 0.0,
  "d_model": 512,
  "decoder_attention_heads": 8,
  "decoder_ffn_dim": 2048,
  "decoder_layerdrop": 0.0,
  "decoder_layers": 6,
  "decoder_start_token_id": 58100,
  "decoder_vocab_size": 58101,
  "dropout": 0.1,
  "encoder_attention_heads": 8,
  "encoder_ffn_dim": 2048,
  "encoder_layerdrop": 0.0,
  "encoder_layers": 6,
  "eos_token_id": 0,
  "forced_eos_token_id": 0,
  "gradient_check

In [4]:
article = """
Albert Einstein ( 14 March 1879 – 18 April 1955) was a German-born theoretical physicist, widely acknowledged to be one of the greatest physicists of all time. 
Einstein is best known for developing the theory of relativity, but he also made important contributions to the development of the theory of quantum mechanics. 
Relativity and quantum mechanics are together the two pillars of modern physics. 
His mass–energy equivalence formula E = mc2, which arises from relativity theory, has been dubbed "the world's most famous equation". 
His work is also known for its influence on the philosophy of science.
He received the 1921 Nobel Prize in Physics "for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect", a pivotal step in the development of quantum theory. 
His intellectual achievements and originality resulted in "Einstein" becoming synonymous with "genius"
"""

# encode the text into tensor of integers using the appropriate tokenizer
inputs = tokenizer.encode(article, return_tensors="pt", max_length=512, truncation=True)
print(inputs)

tensor([[ 7799, 39858,    20,   536,  1290,   268,  3977,   112,   268,   757,
         18170,    27,    58,    14,   586,    13,  4904, 15823, 38818,     2,
         10884, 20420,    12,    43,   128,     7,     4,  7833, 38818,     6,
             7,    92,   160,     3, 39858,    19,   517,  1369,    23,  3121,
             4,  8807,     7,  5049,   658,     2,   144,   137,   115,   319,
           501,  6820,    12,     4,   478,     7,     4,  8807,     7, 35266,
         35330,     3,   465,  1270, 24370,     8, 35266, 35330,    48,   848,
             4,   254, 26364,     7,  1457, 19419,     3,  1704,  5131,  1211,
         28077, 44216,  8418,   155,  2095,   285,   265,  2877,    86, 24430,
            59,  5049,   658,  8807,     2,    99,   152,   143,  6165,   108,
            47,   500,   360,    22,     6,   306,  3420, 28408,   400,  1704,
           269,    19,   115,  1369,    23,   136,  4633,    32,     4, 12997,
             7,  4802,     3,   231,  1795,     4, 2

# Beam Outputs vs Greedy Outputs

In [5]:
# generate the translation output using beam search
beam_outputs = model.generate(inputs, num_beams=3)
# decode the output and ignore special tokens
print("Beam Outputs")
print(tokenizer.decode(beam_outputs[0], skip_special_tokens=True))

# generate the translation output using greedy search
greedy_outputs = model.generate(inputs)
# decode the output and ignore special tokens
print("Greedy Outputs")
print(tokenizer.decode(greedy_outputs[0], skip_special_tokens=True))

Beam Outputs
Albert Einstein (* 14. März 1879 – 18. April 1955) war ein deutscher theoretischer Physiker, der als einer der größten Physiker aller Zeiten anerkannt wurde. Einstein ist am besten für die Entwicklung der Relativitätstheorie bekannt, aber er leistete auch wichtige Beiträge zur Entwicklung der Quantenmechaniktheorie. Relativität und Quantenmechanik sind zusammen die beiden Säulen der modernen Physik. Seine Massenenergieäquivalenzformel E = mc2, die aus der Relativitätstheorie hervorgeht, wurde als „die berühmteste Gleichung der Welt" bezeichnet. Seine Arbeit ist auch für ihren Einfluss auf die Philosophie der Wissenschaft bekannt. Er erhielt 1921 den Nobelpreis für Physik „für seine Verdienste um die theoretische Physik und vor allem für seine Entdeckung des Gesetzes über den photoelektrischen Effekt", einen entscheidenden Schritt in der Entwicklung der Quantentheorie. Seine intellektuellen Leistungen und Originalität führten dazu, dass „Einstein" zum Synonym für „Genius" w