In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda', index=0)

In [3]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")

In [4]:
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
model.to(device);

In [5]:
sum(p.numel() for p in model.parameters())

774030080

### Генерация с дефолтными параметрами
Официальный скрипт (или набор параметров) для декодирования отсутствует по [следующей причине](https://github.com/microsoft/DialoGPT#model-decoding):  
>We note that even with properly filtered Reddit dataset, sometimes our model can still generate moderately toxic/inappropriate responses. Due to this reason, we are unable to provide the decoding script at this time (The live demo and decoding script access is upon invitation only now ). We are currently still working on a controlled decoding method to prevent this system from toxic generation. Please stay tuned.  

Я взял параметры из [демки](https://huggingface.co/microsoft/DialoGPT-large)

In [6]:
def decode_answer(model_output, input_text_len):
    answer = tokenizer.decode(model_output[input_text_len:], skip_special_tokens=True)
    return answer


def print_samples(question, top_p=None, top_k=None, temperature=None, num_beams=1,
            repetition_penalty=None, length_penalty=None, no_repeat_ngram_size=None, num_return_sequences=1, repeat=1):
    input_text = question
    input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
    for _ in range(repeat):
        sample_outputs = model.generate(
            input_ids,
            do_sample=False, 
            early_stopping=False,
            max_length=1000,
            temperature=temperature,
            top_k=top_k, 
            top_p=top_p,
            num_beams=num_beams,
            repetition_penalty=repetition_penalty,
            length_penalty=length_penalty,
            no_repeat_ngram_size=no_repeat_ngram_size,
            num_return_sequences=num_return_sequences,
            pad_token_id=tokenizer.eos_token_id
        )
        answers = sorted(map(lambda text: decode_answer(text, input_ids.shape[-1]-1), sample_outputs),
                         key=len, reverse=True)
        for answer in answers:
            print(answer)
            print('**********\n')

### Примеры из репозитория
https://github.com/microsoft/DialoGPT  
С дефолтными параметрами почему-то генерируются другие ответы. Больше всего похоже, что модель с того времени дообучили.

In [7]:
print_samples('Does money buy happiness?')

It does if you're a billionaire.
**********



In [8]:
# Отсутствие вопросительного знака творит какую-то дичь
print_samples('who is the first president of the United States')

I think it's a reference to the movie The Office.
**********



In [9]:
print_samples('who is the first president of the United States?')

George Washington
**********



In [10]:
print_samples('what is the boiling point of water?')

It's a unit of measurement.
**********



In [11]:
print_samples('which one is bigger, sun or moon?')

Sun, but it's not even close.
**********



In [12]:
print_samples('which animal has black and white stripes?')

The one with the black and white stripes.
**********



In [13]:
print_samples('what is the meaning of life ?')

To be a good boy.
**********



In [14]:
print_samples('who won the world cup in 2018 ?')

England, but they were the only team to win it in the last 20 years.
**********



In [15]:
print_samples("""Nvidia's Titan RTX is really good .""")

I'm not a fan of the way the game looks, but the gameplay is great.
**********



In [16]:
print_samples("""Nvidia's Titan RTX is really good.""")

I'm not sure if I should be excited or scared.
**********



In [17]:
print_samples("""Can Porsche beat Tesla with its new Taycan EV ?""")

I think it's possible, but I don't think it's likely.
**********



In [18]:
print_samples("""Can Porsche beat Tesla with its new Taycan EV?""")

I think it's a pretty good car.
**********



In [19]:
print_samples("""What do you think of the DialoGPT repo ?""")

I like it. It's a good one.
**********



In [20]:
print_samples("""What do you think of the DialoGPT repo?""")

I like it. It's a good one.
**********



### Примеры из AskReddit

In [21]:
print_samples("""What's the grossest thing you've ever tasted?""")

I don't know, but I know I've had it.
**********



In [22]:
print_samples("""If you could have 1 superpower what would it be and why?""")

I would have to say I would be a ninja. I would be a ninja.
**********



In [23]:
print_samples("""Is there anything you should be doing right now, but you're wasting your time on reddit instead? If so, what is it?""")

I'm not wasting my time on this sub. I'm just bored.
**********



In [24]:
print_samples("""What is the scariest/creepiest theory you know about?""")

I don't know, but I know it's a theory.
**********



In [25]:
print_samples("""In the John Wick universe, assassins are shown to be everyone and everywhere, thus indicating a buyer's market. What limited time deals and offers do assassins provide to out price the competition? How does the economics of the world operate with so many trained killers?""")

I think the point is that the assassins are not the only ones who are trained to murder.
**********



In [26]:
print_samples("""What is you favorite sex scene in the Bible?""")

I don't know, but I know it's the one where Jesus is crucified.
**********



In [27]:
print_samples("""What are underrated websites and what do you use them for?""")

I use them for the same thing as above.
**********



In [28]:
print_samples("""Why do you hate the people that you hate?""")

Because they're not the same people.
**********



In [29]:
print_samples("""People who used to work in photo labs before digital cameras, what weird stuff did you develop?""")

I used to work in a photo lab. I used to work in a photo lab.
**********



In [30]:
print_samples("""To the people who check behind the shower curtain before using the washroom; what’s the next plan if you see someone?""")

I don't know what you mean by that.
**********



In [31]:
print_samples("""If you came with a warning label, what would it be?""")

**********



In [32]:
print_samples("""What is the best decision you've made while drunk?""")

I don't know, but I'm sure it's not the best decision I've made while sober.
**********



In [33]:
print_samples("""What’s one rule you live by?""")

I don't know. I don't know. I don't know. I don't know.
**********

