In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda', index=0)

In [3]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-large")

In [4]:
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-large")
model.to(device);

In [5]:
sum(p.numel() for p in model.parameters())

774030080

### Пытаемся подобрать параметры

In [146]:
def decode_answer(model_output, input_text_len):
    answer = tokenizer.decode(model_output[input_text_len:], skip_special_tokens=True)
    return answer


def print_samples(question, top_p=0.75, top_k=None, temperature=1.0, num_beams=5,
            repetition_penalty=1.0, length_penalty=None, no_repeat_ngram_size=3, num_return_sequences=5, repeat=1):
    input_text = question
    input_ids = tokenizer.encode(input_text + tokenizer.eos_token, return_tensors='pt').to(device)
    for _ in range(repeat):
        sample_outputs = model.generate(
            input_ids,
            do_sample=True, 
            early_stopping=False,
            max_length=128,
            temperature=temperature,
            top_k=top_k, 
            top_p=top_p,
            num_beams=num_beams,
            repetition_penalty=repetition_penalty,
            length_penalty=length_penalty,
            no_repeat_ngram_size=no_repeat_ngram_size,
            num_return_sequences=num_return_sequences,
            pad_token_id=tokenizer.eos_token_id
        )
        answers = sorted(map(lambda text: decode_answer(text, input_ids.shape[-1]-1), sample_outputs),
                         key=len, reverse=True)
        for answer in answers:
            print(answer)
            print('**********\n')

### Примеры из репозитория
https://github.com/microsoft/DialoGPT  

In [147]:
print_samples('Does money buy happiness?')

I don't know, but it sure does buy happiness.
**********

It does if you're rich.
**********

It does if you're rich.
**********

No, but it helps.
**********

It sure does.
**********



In [148]:
print_samples('who is the first president of the United States')

George Washington
**********

George Washington
**********

Bill Clinton
**********

Bill Clinton
**********

Bill Clinton
**********



In [149]:
print_samples('who is the first president of the United States?')

George Washington
**********

George Washington
**********

George Washington
**********

George Washington
**********

George Washington
**********



In [150]:
print_samples('what is the boiling point of water?')

The boiling point is the point at which the water boils.
**********

The boiling point is the temperature of the water.
**********

It depends on the temperature of the water.
**********

I don't know, but it's over 9000.
**********

I don't know, but it's over 9000.
**********



In [151]:
print_samples('which one is bigger, sun or moon?')

Sun is bigger.
**********

Sun is bigger.
**********

Sun is bigger.
**********

Sun is bigger.
**********

Sun is bigger.
**********



In [152]:
print_samples('which animal has black and white stripes?')

I think it's a tortoise.
**********

A dog.
**********

A dog.
**********

A cat.
**********

A cat.
**********



In [153]:
print_samples('what is the meaning of life ?')

I don't know.
**********

I don't know
**********

To be alive.
**********

I don't know
**********

I don't know
**********



In [154]:
print_samples('who won the world cup in 2018 ?')

England. They won the World Cup in 2018.
**********

England, if I remember correctly.
**********

England, I believe.
**********

England, I believe.
**********

The Netherlands
**********



In [155]:
print_samples("""Nvidia's Titan RTX is really good .""")

I second this. I've been using it for a few months now and it's great.
**********

I agree, but I don't think it's available for PC yet.
**********

I second this
**********

It really is.
**********

I concur.
**********



In [156]:
print_samples("""Nvidia's Titan RTX is really good.""")

Yeah, I'm really looking forward to it.
**********

I really want to see that one.
**********

I agree, it's a great card.
**********

I agree, it's a great card.
**********

I really want to see it.
**********



In [157]:
print_samples("""Can Porsche beat Tesla with its new Taycan EV ?""")

I don't know, but I do know that Tesla can beat Porsche.
**********

I don't think so, but it would be interesting to see.
**********

Yes, but it won't be as fast.
**********

No, but they can beat Tesla.
**********

That's a good question.
**********



In [158]:
print_samples("""Can Porsche beat Tesla with its new Taycan EV?""")

I don't know, but I don't think so.
**********

No, but they can beat Tesla.
**********

No, but they can beat Tesla.
**********

No, but it can beat Tesla.
**********

I don't think it can.
**********



In [159]:
print_samples("""What do you think of the DialoGPT repo ?""")

I like it, but I don't use it.
**********

I haven't tried it yet.
**********

I haven't tried it yet.
**********

I haven't tried it yet.
**********

I haven't used it yet.
**********



In [160]:
print_samples("""What do you think of the DialoGPT repo?""")

I like it. It's a lot of work, but I think it's worth it.
**********

I haven't tried it yet, but I've heard good things.
**********

I think it's awesome.
**********

I think it's great.
**********

I think it's great.
**********



### Примеры из AskReddit

In [161]:
print_samples("""What's the grossest thing you've ever tasted?""")

I don't know. I don't think I've ever eaten anything gross.
**********

I don't think I have ever tasted anything gross.
**********

I don't know what you mean by grossest.
**********

I don't know what you mean by grossest.
**********

Cream cheese.
**********



In [162]:
print_samples("""If you could have 1 superpower what would it be and why?""")

Teleportation. I'd like to be able to travel around the universe at will.
**********

I would have to say that I would be able to fly.
**********

Teleportation. I'd like to be able to fly.
**********

Teleportation. I don't know why.
**********

I would love to be able to fly.
**********



In [163]:
print_samples("""Is there anything you should be doing right now, but you're wasting your time on reddit instead? If so, what is it?""")

I don't know what you mean by wasting my time.
**********

I don't know, I don't have anything to do.
**********

I don't know what you're asking.
**********

I'm not wasting my time.
**********

I'm at work.
**********



In [164]:
print_samples("""What is the scariest/creepiest theory you know about?""")

I don't know about creepiest, but I do know that I am the creepiest.
**********

I don't know about creepiest, but I know a lot of creepiest.
**********

I don't know what that means.
**********

I have no idea.
**********

I don't know.
**********



In [165]:
print_samples("""In the John Wick universe, assassins are shown to be everyone and everywhere, thus indicating a buyer's market. What limited time deals and offers do assassins provide to out price the competition? How does the economics of the world operate with so many trained killers?""")

I think the point is that the assassins are not all the same person.
**********

I don't know about you, but I'd love to be an assassin.
**********

I'm not sure if you're being serious or not.
**********

That's a good question.
**********

They don't.
**********



In [166]:
print_samples("""What is you favorite sex scene in the Bible?""")

I'm not sure if it's biblical, but I think it's the one where Jesus is crucified.
**********

The one where Jesus takes off his robe and wizard hat.
**********

I don't really have one.
**********

The one with the fish.
**********

I don't have one.
**********



In [167]:
print_samples("""What are underrated websites and what do you use them for?""")

I use them to keep track of how many times I've visited a website.
**********

I use them to help me find new websites to subscribe to.
**********

I use them to find out what websites I should avoid.
**********

I don't know. I've never used them.
**********

I don't know what you mean.
**********



In [168]:
print_samples("""Why do you hate the people that you hate?""")

I hate people that I hate.
**********

Because they're people.
**********

I don't.
**********

I don't.
**********

I don't.
**********



In [169]:
print_samples("""People who used to work in photo labs before digital cameras, what weird stuff did you develop?""")

I used to be a photographer. I still do, but I used to, too.
**********

I used to be a photographer, but now I work in a photo lab.
**********

I used to be a photographer.
**********

I used to be a photographer.
**********

I used to be a photographer.
**********



In [170]:
print_samples("""To the people who check behind the shower curtain before using the washroom; what’s the next plan if you see someone?""")

I don't know why you're being downvoted.
**********

I don't know why you're being downvoted.
**********

I don't know what you're talking about.
**********

That's a good question.
**********

Shower curtain?
**********



In [171]:
print_samples("""If you came with a warning label, what would it be?""")

I don't know. I'm not a lawyer.
**********

**********

**********

**********

**********



In [172]:
print_samples("""What is the best decision you've made while drunk?""")

I don't think I've ever made a decision while intoxicated.
**********

I don't remember.
**********

I don't drink.
**********

I don't drink.
**********

Drink more.
**********



In [173]:
print_samples("""What’s one rule you live by?""")

I don't know what you mean.
**********

I don't know what you mean.
**********

I don't know what you mean.
**********

I'm not sure what you mean.
**********

I don't know.
**********

