In [1]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline
import re
from googletrans import Translator
from google_drive_downloader import GoogleDriveDownloader as gdd
import zipfile
import os
from ipywidgets import widgets, VBox, Layout

replay_point = True

alphabets= "([A-Za-z])"
prefixes = "(Mr|St|Mrs|Ms|Dr)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"


def split_into_sentences(text):
    text = " " + text + "  "
    text = text.replace("\n"," ")
    text = re.sub(prefixes,"\\1<prd>",text)
    text = re.sub(websites,"<prd>\\1",text)
    if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
    text = re.sub("\s" + alphabets + "[.] "," \\1<prd> ",text)
    text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
    text = re.sub(alphabets + "[.]" + alphabets + "[.]","\\1<prd>\\2<prd>",text)
    text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
    text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
    text = re.sub(" " + alphabets + "[.]"," \\1<prd>",text)
    if "”" in text: text = text.replace(".”","”.")
    if "\"" in text: text = text.replace(".\"","\".")
    if "!" in text: text = text.replace("!\"","\"!")
    if "?" in text: text = text.replace("?\"","\"?")
    text = text.replace(".",".<stop>")
    text = text.replace("?","?<stop>")
    text = text.replace("!","!<stop>")
    text = text.replace("<prd>",".")
    sentences = text.split("<stop>")
    sentences = sentences[:-1]
    sentences = [s.strip() for s in sentences]
    return sentences

if not os.path.exists('./model'):
    gdd.download_file_from_google_drive(file_id='1BUPm3HPbfceqtME2Ojs0CIaI3wMhbucI',
                                        dest_path='./model/gpt.zip')
    with zipfile.ZipFile('./model/gpt.zip', 'r') as zip_ref:
        zip_ref.extractall('./model')

model = GPT2LMHeadModel.from_pretrained('./model/gpt2-tuned_with_wishes')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

output_text = widgets.Textarea(layout=Layout(width='600px', height='200px'))
output_text.value = 'С Новым Годом от ИИ!'

button = widgets.Button(
                        description='Поздравляшки',
                        disabled=False,
                        button_style='success', # 'success', 'info', 'warning', 'danger' or ''
                        tooltip='Получить поздравление',
                        icon='check' # (FontAwesome names without the `fa-` prefix)
)
def prediction(change):
    start = r'Happy New Year!'
    text = pipeline('text-generation', model=model, tokenizer=tokenizer)(start,
                                                                         max_length=150)[0]
    congratulation = ' '.join(split_into_sentences(text['generated_text'])[1:-1])
    
    translator = Translator()
    ru_congrats = translator.translate(congratulation, dest='ru').text
    output_text.value = ru_congrats
    
button.on_click(prediction)
VBox([widgets.Label('Поздравления для всех!'), button, output_text])

VBox(children=(Label(value='Поздравления для всех!'), Button(button_style='success', description='Поздравляшки…

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
