In [1]:
#!/usr/bin/env python
# coding: utf-8


from transformers import pipeline, Conversation
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import AutoModelForQuestionAnswering
import mtranslate
import nltk.tokenize as nt
import nltk
import config
    


def initiate_conversational_model():
    
    try:
        tokenizer = AutoTokenizer.from_pretrained("./1-blenderbot-3B")
        model = AutoModel.from_pretrained("./1-blenderbot-3B")
    except:
        tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-3B")
        model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-3B")

        tokenizer.save_pretrained("./1-blenderbot-3B")
        model.save_pretrained("./1-blenderbot-3B")
    return tokenizer,model


#Initiate question answering model

def initiate_answering_model():
    
    try:
        tokenizer1 = AutoTokenizer.from_pretrained("./2-roberta-base-squad2")
        model1 = AutoModel.from_pretrained("./2-roberta-base-squad2")
    except:
        tokenizer1 = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
        model1 = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")

        tokenizer1.save_pretrained("./2-roberta-base-squad2")
        model1.save_pretrained("./2-roberta-base-squad2")
    
    return tokenizer1, model1


#Initiate pharaphrasing model

def initiate_pharaphrasing_model():
    
    try:
        tokenizer2 = AutoTokenizer.from_pretrained("./3-pegasus_paraphrase")
        model2 = AutoModel.from_pretrained("./3-pegasus_paraphrase")
    except:
        tokenizer2 = AutoTokenizer.from_pretrained("tuner007/pegasus_paraphrase", use_fast=False)
        model2 = AutoModelForSeq2SeqLM.from_pretrained("tuner007/pegasus_paraphrase")

        tokenizer2.save_pretrained("./3-pegasus_paraphrase")
        model2.save_pretrained("./3-pegasus_paraphrase")
    
    return tokenizer2, model2

def initiate_translation_es_en_model():
    
    try:
        es_en_tokenizer = AutoTokenizer.from_pretrained("./4-helsinki-NLP-es-en")
        es_en_model = AutoModel.from_pretrained("./4-helsinki-NLP-es-en")
    except:
        es_en_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-es-en")
        es_en_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-es-en")

        es_en_tokenizer.save_pretrained("./4-helsinki-NLP-es-en")
        es_en_model.save_pretrained("./4-helsinki-NLP-es-en")

    return es_en_tokenizer,es_en_model

def initiate_translation_en_es_model():
   
    try:
        en_es_tokenizer = AutoTokenizer.from_pretrained("./5-helsinki-NLP-en-es")
        en_es_model = AutoModel.from_pretrained("./5-helsinki-NLP-en-es")
    except:
        en_es_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-es")
        en_es_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-es")

        en_es_tokenizer.save_pretrained("./5-helsinki-NLP-en-es")
        en_es_model.save_pretrained("./5-helsinki-NLP-en-es")
    
    return en_es_tokenizer,en_es_model

def translate_es_en(text):
    encoded_input = es_en_tokenizer(text, return_tensors="pt")
    output = es_en_model.generate(**encoded_input)
    out_text = es_en_tokenizer.batch_decode(output,skip_special_tokens=True)
    
    return out_text

def translate_es_en_gt(text):
    return mtranslate.translate(text, "es", "en")
    
def translate_en_es_gt(text):
    return mtranslate.translate(text, "en", "es")


def translate_en_es(text):
    encoded_input = en_es_tokenizer(text, return_tensors="pt")
    output = en_es_model.generate(**encoded_input)
    out_text = en_es_tokenizer.batch_decode(output,skip_special_tokens=True)
    return out_text

def create_conversational_pipeline(model, tokenizer):
    conversational_pipeline = pipeline("conversational", model=model, tokenizer=tokenizer)
    return conversational_pipeline


def create_answering_pipeline(model1, tokenizer1):
    question_answering = pipeline("question-answering", model=model1, tokenizer=tokenizer1)
    return question_answering


response=""
def get_contextual(question,question_answering):
    result=None
    result = question_answering(question=question, context=config.context)

    print("Answer:", result['answer'])
    print("Score:", result['score'])

    return result['answer'], result['score']



sentence=""
def tokenize_question(question):
    ss=nt.sent_tokenize(question)
    tokenized_sent=[nt.word_tokenize(sent) for sent in ss]
    
    pos_sentences=[nltk.pos_tag(sent) for sent in tokenized_sent]
    print(pos_sentences)

    #list to save verbs and sustantives
    useful_tokens = []
    for item in pos_sentences[0]:
        print(item)
        if 'VB' in item[1] or 'NN' in item[1]:
            useful_tokens.append(item[0])

    #first verb must be ignored, it is a question so if we save it the paraphraser will make a question with it

    
    if "?" in question and bool(useful_tokens):
        del useful_tokens[0]
    
    result=""
    for item in useful_tokens:
        result=result+str(item+" ")
        
    return result


def get_conversation(question, tokenizer, model):
    inputs = tokenizer([question], return_tensors="pt")
    reply_ids = model.generate(**inputs)
    return tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]


def get_text_to_pharaphrase(init, sentence, response):
    #Execute pharaphrasing
    text_to_pharaphrase=init + " " +sentence+" "+response+"."
    ¡return text_to_pharaphrase

text_to_pharaphrase=""

def get_pharaphrased(text_to_pharaphrase, tokenizer2, model2):
    
    batch = tokenizer2(text_to_pharaphrase, truncation=True, padding="longest", return_tensors="pt")
    translated = model2.generate(**batch)
    result = tokenizer2.batch_decode(translated, skip_special_tokens=True)
    return result[0]

def generate_response(question, pipeline):

    #Introducing context
    context = config.context
    
    # Get only the question in all the text received
    start = '.'
    end = '?'
    s = question
    print(s[s.find(start)+len(start):s.rfind(end)]+"?")
    question_splited=s[s.find(start)+len(start):s.rfind(end)]+"?"
    
    answer=None
    score=None
    
    #if contextual isnt good enough, there is no answer, so the conversational will be used
    answer, score = get_contextual(question_splited, question_answering)
    if score<0.15:
        answer=get_conversational_pipeline(pipeline, question)
        answer=answer.generated_responses[-1]
    else:
        tokens=tokenize_question(question)
        if "your" in question:
            init="My " 
        else:
            init="I "

        text_to_pharaphrase=get_text_to_pharaphrase(init, tokens, answer)
        answer=get_pharaphrased(text_to_pharaphrase, tokenizer2, model2)
        print(answer)
        
    return answer


In [2]:
def initiate_conversational_pipeline(pipeline_name):
    pipeline_name = Conversation(".")
    return pipeline_name

def get_conversational_pipeline(pipeline_name, text):
    pipeline_name.add_user_input(text, overwrite=True)
    result=conversational_pipeline(pipeline_name)
    return result

In [None]:
#Initiate conversational model
tokenizer, model = initiate_conversational_model()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
conversational_pipeline= create_conversational_pipeline(model, tokenizer)

In [3]:
# Initiate answering model
tokenizer1, model1 = initiate_answering_model()

In [7]:
question_answering= create_answering_pipeline(model1, tokenizer1)

In [4]:
# Initiate pharaphrasing model
tokenizer2, model2 = initiate_pharaphrasing_model()

In [5]:
# Initiating translation models
es_en_tokenizer, es_en_model= initiate_translation_es_en_model()

In [6]:
en_es_tokenizer, en_es_model= initiate_translation_en_es_model()