# Few Shot Learning with HuggingFace

In [8]:
!pip install transformers torch

from transformers import MarianMTModel, MarianTokenizer
import torch

# Load MarianMT model for English-to-French translation
model_name = "Helsinki-NLP/opus-mt-en-fr"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

def translate_sentence(sentence):
    # Tokenize the input sentence
    inputs = tokenizer(sentence, return_tensors="pt", padding=True).to(device)

    # Generate the translation
    translated_tokens = model.generate(
        **inputs,
        max_length=200,
        num_beams=5,
        early_stopping=True
    )

    # Decode the generated tokens to a string
    return tokenizer.decode(translated_tokens[0], skip_special_tokens=True)

# Input sentence to translate
sentence = "I enjoy reading books."

# Perform the translation
translation = translate_sentence(sentence)

print(f"Input: {sentence}")
print(f"Translation: {translation.strip()}")

# Clear GPU memory if using CUDA
if torch.cuda.is_available():
    torch.cuda.empty_cache()




tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Input: I enjoy reading books.
Translation: J'aime lire des livres.


In [9]:

# Load MarianMT model for English-to-Tagalog translation
model_name = "Helsinki-NLP/opus-mt-en-tl"  # 'tl' is the language code for Tagalog
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Set device to CUDA if available, otherwise use CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

def translate_sentence(sentence):
    # Tokenize the input sentence
    inputs = tokenizer(sentence, return_tensors="pt", padding=True).to(device)

    # Generate the translation using MarianMT
    translated_tokens = model.generate(
        **inputs,
        max_length=200,
        num_beams=5,  # Beam search for better translations
        early_stopping=True
    )

    # Decode the generated tokens to a string
    return tokenizer.decode(translated_tokens[0], skip_special_tokens=True)

# Input sentence to translate from English to Tagalog
sentence = "I enjoy reading books."

# Perform the translation
translation = translate_sentence(sentence)

print(f"Input: {sentence}")
print(f"Translation: {translation.strip()}")

# Clear GPU memory if using CUDA
if torch.cuda.is_available():
    torch.cuda.empty_cache()


tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/827k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/835k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.36k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/296M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Input: I enjoy reading books.
Translation: Nasisiyahan akong magbasa ng mga aklat.
