## Tim's Pipeline

In [1]:
#imports
from transformers import AutoTokenizer, pipeline, AutoModelForQuestionAnswering, T5Tokenizer, T5ForConditionalGeneration

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Text generation -> Text Input: Fragen werden daraus generiert

textgen_model_name = "google/flan-t5-large"
textgen_model = T5ForConditionalGeneration.from_pretrained(textgen_model_name)
textgen_tokenizer = T5Tokenizer.from_pretrained(textgen_model_name)

#context = "There are two tenses in English – past and present. The past tense in English is used to talk about the past, about hypotheses and for politeness."
context = "Fußball-Club Bayern München e. V. (FCB, pronounced [ˈfuːsbalˌklʊp ˈbaɪɐn ˈmʏnçn̩] ⓘ), also known as FC Bayern (pronounced [ˌɛft͡seː ˈbaɪɐn] ⓘ), Bayern Munich, or simply Bayern, is a German professional sports club based in Munich, Bavaria. It is best known for its professional men's association football team, which plays in the Bundesliga, the top tier of the German football league system. Bayern is the most successful club in German football history, having won a record 33 national titles, including 11 consecutively since 2013, and 20 national cups, along with numerous European honours.  "

input_text = "Generate a question about the following text:" + context
input_ids = textgen_tokenizer(input_text, return_tensors="pt").input_ids

outputs = textgen_model.generate(
    input_ids,
    max_length=250,
    num_beams=5,
    early_stopping=True,
    no_repeat_ngram_size=5,
)
#print(tokenizer.decode(outputs[0]))

generated_question = textgen_tokenizer.decode(outputs[0])

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
qa_model_name = 'deepset/roberta-base-squad2'

qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)

qa_pipe = pipeline(
  'question-answering',
  qa_model=qa_model,
  qa_tokenizer=qa_tokenizer
)

qa_input = {
  'question': generated_question,
  'context': context
}

qa_res = qa_pipe(qa_input)

score = qa_res['score']
start = qa_res['start']
end = qa_res['end']
qa_true_answer = qa_res['answer']

#display(f'Score : {score}')
#display(f'Start : {start}')
#display(f'End   : {end}')
#display(f'Answer: {qa_true_answer}')

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [4]:
input_true_answer1 = "Generate a wrong answer for the question" + generated_question
input_ids = textgen_tokenizer(input_true_answer1, return_tensors="pt").input_ids

outputs2 = textgen_model.generate(
    input_ids,
    max_length=250,
    num_beams=4,
    early_stopping=True,
    no_repeat_ngram_size=5,
)

generated_false_answer1 = textgen_tokenizer.decode(outputs2[0])

# 2. Falsche Antwort

input_true_answer2 = "Generate a false answer for the following question:" + generated_question
input_ids = textgen_tokenizer(input_true_answer2, return_tensors="pt").input_ids

outputs3 = textgen_model.generate(
    input_ids,
    max_length=250,
    num_beams=4,
    early_stopping=True,
    no_repeat_ngram_size=5,
)

generated_false_answer2 = textgen_tokenizer.decode(outputs3[0])

# Eine Falsche Antwort wird nur sinnvoll generiert, wenn im Prompt als Kontext die Frage genommen wird. Allein passende Falsche Antworten zu der richtigen Antwort findet das Modell nicht.



In [5]:
# Clean und Output

clean_generated_question = generated_question.replace("<pad>", "").replace("</s>", "").strip()
clean_generated_false_answer1 = generated_false_answer1.replace("<pad>", "").replace("</s>", "").strip()
clean_generated_false_answer2 = generated_false_answer2.replace("<pad>", "").replace("</s>", "").strip()

print('Question 1:')
print(clean_generated_question)
print('Answers:')
print('- ' + qa_true_answer)
print('- ' + clean_generated_false_answer1)
print('- ' + clean_generated_false_answer2)


Quastion 1:
How many national titles has Bayern won?
Answers:
- 33
- ten
- three
