In [None]:
#|default_exp app

In [None]:
#|export
from fastai.text.all import *
import gradio as gr
from transformers import GPT2Tokenizer

In [None]:
#|export
import pathlib
plt = platform.system()
if plt == 'Linux': pathlib.WindowsPath = pathlib.PosixPath
if plt == 'Windows': pathlib.PosixPath = pathlib.WindowsPath

In [None]:
#|export
pretrained_weights = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(pretrained_weights)

In [None]:
path = Path()

In [None]:
path.ls(file_exts='.pkl')

In [None]:
#|export
class SpacyTokenizerBig():
    """Измененный класс токенайзера для поддержки словаря большого размера"""
    def __init__(self, lang='en', special_toks=None, buf_sz=5000):
        import spacy
        from spacy.symbols import ORTH
        self.special_toks = ifnone(special_toks, defaults.text_spec_tok)
        nlp = spacy.blank(lang)
        nlp.max_length = 10**10
        for w in self.special_toks: nlp.tokenizer.add_special_case(w, [{ORTH: w}])
        self.pipe,self.buf_sz = nlp.pipe,buf_sz

    def __call__(self, items):
        return (L(doc).attrgot('text') for doc in self.pipe(map(str,items), batch_size=self.buf_sz))

In [None]:
#|export
class TransformersTokenizer(Transform):
    """Класс токенайзера для работы с GPT2"""
    def __init__(self, tokenizer): self.tokenizer = tokenizer
    def encodes(self, x): 
        toks = self.tokenizer.tokenize(x)
        return tensor(self.tokenizer.convert_tokens_to_ids(toks))
    def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))

In [None]:
#|export
class DropOutput(Callback):
    """Класс поддержки для обучения GPT2"""
    def after_pred(self): self.learn.pred = self.pred[0]

In [None]:
#|export
learnHP = load_learner('WriterHP_model_10ep.pkl')
learnStK = load_learner('WriterStKng_model_10ep.pkl')

learnHP_tr = load_learner('WriterHP_transf_model.pkl')
learnStK_tr = load_learner('WriterStKng_transf_model.pkl')

In [None]:
#|export
def write_text(text, neuronet, author, words_cnt):
    """Генерирует текст в соответствии с заданными параметрами.
    Возвращает сгенерированный текст"""
    words_cnt=int(words_cnt)
    if neuronet == 'GPT2':
        prompt_ids = tokenizer.encode(text)
        inp = tensor(prompt_ids)[None]
        if author == 'Harry Potter Style':
            preds = learnHP_tr.model.generate(inp, max_length=words_cnt, repetition_penalty=6.0,
                                              temperature=1.5, no_repeat_ngram_size=2,
                                              do_sample=True, top_k=5, top_p=0.95)
        elif author == 'Stephen King Style':
            preds = learnStK_tr.model.generate(inp, max_length=words_cnt, repetition_penalty=6.0,
                                              temperature=1.5, no_repeat_ngram_size=2,
                                              do_sample=True, top_k=5, top_p=0.95)        
        new_text = tokenizer.decode(preds[0].numpy())
    elif neuronet == 'AWD_LSTM':
        if author == 'Harry Potter Style':
            preds = [learnHP.predict(text, words_cnt, temperature=1)]
        elif author == 'Stephen King Style':
            preds = [learnStK.predict(text, words_cnt, temperature=1)]
        new_text = "\n".join(preds)
    return new_text

In [None]:
#|export
input_text = gr.components.Textbox(value='It was a bright day.', label='Input text')
label = gr.components.Textbox()
wordcount_num = gr.components.Number(value=40, label='New tokens count')
neuro_radio = gr.components.Radio(choices=["AWD_LSTM", "GPT2"], value="GPT2", label='NeuroNet Type')

author_radio = gr.components.Radio(choices=["Harry Potter Style", "Stephen King Style"],
                                   value="Harry Potter Style", label='Author')

intface = gr.Interface(fn=write_text, inputs=[input_text, neuro_radio, author_radio, wordcount_num], outputs=label)
intface.launch(inline=False, server_name='0.0.0.0', server_port=12250)

In [None]:
gr.close_all()

In [None]:
# Экспорт файла _APP.ipynb для загрузки на Hugging Face
from nbdev.export import nb_export
nb_export('Writer_APP.ipynb','./')