In [4]:
from transformers import pipeline
pipe = pipeline("text2text-generation", "lmqg/mbart-large-cc25-frquad-qg")
output = pipe("Créateur » (Maker), lui aussi au singulier, « <hl> le Suprême Berger <hl> » (The Great Shepherd) ; de l'autre, des réminiscences de la théologie de l'Antiquité : le tonnerre, voix de Jupiter, « Et souvent ta voix gronde en un tonnerre terrifiant », etc.")


In [3]:
import os
import nltk
import gradio as gr
import torch

from pytorch_lightning import LightningModule
from transformers import MT5ForConditionalGeneration, AutoTokenizer, pipeline


In [7]:
class MT5(LightningModule):
    """
    Google MT5 transformer class.
    """

    def __init__(self, model_name_or_path: str = None):
        """
        Initialize module.

        :param model_name_or_path: model name
        """

        super().__init__()

        # Load model and tokenizer
        self.save_hyperparameters()
        self.model = pipeline("text2text-generation", "lmqg/mbart-large-cc25-frquad-qg")
        # self.tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,
        #                                                use_fast=True) if model_name_or_path is not None else None

    def forward(self, **inputs):
        """
        Forward inputs.

        :param inputs: dictionary of inputs (input_ids, attention_mask, labels)
        """

        return self.model(**inputs)


    def ae(self, batch: list[str], max_length: int = 512, **kwargs):
        """
        Answer extraction prediction.

        :param batch: list of context
        :param max_length: max length of output
        """

        # Transform inputs
        sentences = [f"extract: {context}" for context in batch]

        # Predict
        answers =[self.model(sentence)[0]['generated_text'] for sentence in sentences]

        return answers


    def qg(self, batch: list[str], max_length: int = 512, **kwargs):
        """
        Question generation prediction.

        :param batch: batch of context with highlighted elements
        :param max_length: max length of output
        """

        # Transform inputs
        sentences = [f"generate: {context}" for context in batch]

        # Predict

        questions = [self.model(sentence)[0]['generated_text'] for sentence in sentences]
        
        return questions


    def qa(self, batch: list[dict], max_length: int = 512, **kwargs):
        """
        Question answering prediction.

        :param batch: batch of dict {question: q, context: c}
        :param max_length: max length of output
        """

        # Transform inputs
        sentences = [f"question: {context['question']}  context: {context['context']}" for context in batch]

        # Predict
        predictions = [self.model(sentence)[0]['generated_text'] for sentence in sentences]

        return predictions


    def multitask(self, batch: list[str], max_length: int = 512, **kwargs):
        """
        Answer extraction + question generation + question answering.

        :param batch: list of context
        :param max_length: max length of outputs
        """

        # Build output dict
        dict_batch = {'context': [context for context in batch], 'answers': [], 'questions': [], 'answers_bis': []}

        # Iterate over context
        for context in batch:
            answers = self.ae(batch=[context], max_length=max_length, **kwargs)[0]
            answers = answers.split('<sep>')
            answers = [ans.strip() for ans in answers if ans != ' ']
            dict_batch['answers'].append(answers)
            for_qg = [f"{context.replace(ans, f'<hl> {ans} <hl> ')}" for ans in answers]
            questions = self.qg(batch=for_qg, max_length=max_length, **kwargs)
            dict_batch['questions'].append(questions)
            new_answers = self.qa([{'context': context, 'question': question} for question in questions],
                                  max_length=max_length, **kwargs)
            dict_batch['answers_bis'].append(new_answers)
        return dict_batch



In [3]:
%load_ext gradio

In [29]:
%%blocks
# coding:utf-8
"""
Filename: mt5.py
Author: @DvdNss

Created on 12/30/2021
"""

import os
import nltk
import gradio as gr
import torch

from typing import list

from pytorch_lightning import LightningModule
from transformers import MT5ForConditionalGeneration, AutoTokenizer, pipeline

# Load nltk punkt tokenizer
nltk.download('punkt')

with gr.Row():
    with gr.Column():
    # Define input
        context = gr.Textbox(lines=5, label="Context")

    with gr.Column():
        qa = gr.Textbox(lines=5, label="QA pair")

gen_btn = gr.Button(label="Generate", variant="primary")

def generate(context):
    model = MT5(model_name_or_path="google/mt5-small")
    qa = model.multitask([context])
    return context

gen_btn.click(
    fn=generate,
    inputs=context,
    outputs=qa)

NameError: name 'List' is not defined

In [8]:
model = MT5(model_name_or_path=None)

ctxt = str("Trainor wrote the song with Justin Weaver and Caitlyn Smith, and produced it with Chris Gelbuda. Epic Records released it as the album's fourth single on June 23, 2015.")

sentences = nltk.sent_tokenize(ctxt)

out = model.ae([sentences[0]], max_length=32)

out

['Qui est le père de Fielding?']

In [27]:
from transformers import pipeline

pipe = pipeline("text2text-generation", "lmqg/mbart-large-cc25-frquad-qg")
output = pipe("What if I change the context?")

In [33]:
sent_tokenized

['Trainor wrote the song with Justin Weaver and Caitlyn Smith, and produced it with Chris Gelbuda.',
 "Epic Records released it as the album's fourth single on June 23, 2015."]

In [12]:
from transformers import pipeline

pipe = pipeline("text2text-generation", "lmqg/mt5-base-frquad-qg-ae")

# answer extraction
answer = pipe("extract answers: Créateur » (Maker), lui aussi au singulier, « <hl> le Suprême Berger <hl> » (The Great Shepherd) ; de l'autre, des réminiscences de la théologie de l'Antiquité : le tonnerre, voix de Jupiter, « Et souvent ta voix gronde en un tonnerre terrifiant », etc.")

# question generation
question = pipe("generate question: Créateur » (Maker), lui aussi au singulier, « <hl> le Suprême Berger <hl> » (The Great Shepherd) ; de l'autre, des réminiscences de la théologie de l'Antiquité : le tonnerre, voix de Jupiter, « Et souvent ta voix gronde en un tonnerre terrifiant », etc.")



In [14]:
# Try the OpenAI way, it's 2 cents for 750 words...

[{'generated_text': 'Quel est le nom du personnage qui a écrit The Great Scottish ?'}]