In [1]:
from transformers import pipeline

def build_qa_model():
    qa_model = pipeline('question-answering', model='distilbert-base-cased-distilled-squad', tokenizer='distilbert-base-cased-distilled-squad')
    return qa_model

def ask_question(qa_model, context, question):
    result = qa_model(context=context, question=question)
    return result['answer']

context = "Hugging Face is a company that provides state-of-the-art natural language processing models."
question = "What does Hugging Face do?"

qa_model = build_qa_model()
answer = ask_question(qa_model, context, question)

print(f"Question: {question}")
print(f"Answer: {answer}")


Downloading config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFDistilBertForQuestionAnswering.

All the weights of TFDistilBertForQuestionAnswering were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForQuestionAnswering for predictions without further training.


Downloading tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Question: What does Hugging Face do?
Answer: provides state-of-the-art natural language processing models


In [2]:
from transformers import MarianMTModel, MarianTokenizer

def build_translation_model(model_name):
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    return model, tokenizer

def translate_text(model, tokenizer, text, source_lang, target_lang):
    # Tokenize the input text with source language token
    input_ids = tokenizer.encode(f">>{source_lang}<< {text}", return_tensors="pt")

    # Generate translation without specifying the language
    output_ids = model.generate(input_ids, max_length=50, num_beams=5, early_stopping=True)

    # Decode the output, skipping special tokens and removing source language token
    translation = tokenizer.decode(output_ids[0], skip_special_tokens=True).replace(f" >>{source_lang}<<", "")
    return translation

# Example usage:
source_lang = "en"  # Source language (e.g., English)
target_lang = "fr"  # Target language (e.g., French)

model_name = f'Helsinki-NLP/opus-mt-{source_lang}-{target_lang}'
translation_model, translation_tokenizer = build_translation_model(model_name)

text_to_translate = "Hello, how are you?"
translated_text = translate_text(translation_model, translation_tokenizer, text_to_translate, source_lang, target_lang)

print(f"Source Text: {text_to_translate}")
print(f"Translated Text: {translated_text}")


Source Text: Hello, how are you?
Translated Text: Bonjour, comment allez-vous?
