In [13]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

MODEL_NAME = 'distilgpt2' #'gpt2-medium'
MODEL_PATH = "./models"
SEQ_LEN = 50

tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
model = GPT2LMHeadModel.from_pretrained(MODEL_NAME)

In [14]:
# Declare special tokens for padding and separating the context from the slogan:
SPECIAL_TOKENS_DICT = {
    'pad_token': '<pad>',
    'additional_special_tokens': ['<context>', '<slogan>'],
}

# 어휘에 다음 특수 토큰을 추가하고 모델의 임베딩 크기를 조정:
tokenizer.add_special_tokens(SPECIAL_TOKENS_DICT)
model.resize_token_embeddings(len(tokenizer))

print(tokenizer.special_tokens_map)

{'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>', 'pad_token': '<pad>', 'additional_special_tokens': "['<context>', '<slogan>']"}


In [15]:
import torch
import torch.nn.functional as F
from tqdm import trange


def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):

    top_k = min(top_k, logits.size(-1))
    if top_k > 0:
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value

    if top_p > 0.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        sorted_indices_to_remove = cumulative_probs > top_p
        
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices_to_remove.scatter(dim=1, index=sorted_indices, src=sorted_indices_to_remove)
        logits[indices_to_remove] = filter_value
    return logits


def sample_sequence(model, length, context, segments_tokens=None, num_samples=1, temperature=1, top_k=0, top_p=0.0, repetition_penalty=1.0,
                    device='cpu'):
    context = torch.tensor(context, dtype=torch.long, device=device)
    context = context.unsqueeze(0).repeat(num_samples, 1)
    generated = context

    with torch.no_grad():
        for _ in trange(length):

            inputs = {'input_ids': generated}
            if segments_tokens != None:
              inputs['token_type_ids'] = torch.tensor(segments_tokens[:generated.shape[1]]).unsqueeze(0).repeat(num_samples, 1)


            outputs = model(**inputs)
            next_token_logits = outputs[0][:, -1, :] / (temperature if temperature > 0 else 1.)

            for i in range(num_samples):
                for _ in set(generated[i].tolist()):
                    next_token_logits[i, _] /= repetition_penalty
                
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
            if temperature == 0:
                next_token = torch.argmax(filtered_logits, dim=-1).unsqueeze(-1)
            else:
                next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
            generated = torch.cat((generated, next_token), dim=1)
    return generated


In [1]:
import os
import sys
import json
import urllib.request

def translate(user_text):
    input_text = user_text

    client_id = "" # 개발자센터에서 발급받은 Client ID 값
    client_secret = "" # 개발자센터에서 발급받은 Client Secret 값
    encText = urllib.parse.quote(input_text)
    data = "source=ko&target=en&text=" + encText
    url = "https://openapi.naver.com/v1/papago/n2mt"
    request = urllib.request.Request(url)
    request.add_header("X-Naver-Client-Id",client_id)
    request.add_header("X-Naver-Client-Secret",client_secret)
    response = urllib.request.urlopen(request, data=data.encode("utf-8"))
    rescode = response.getcode()
    if(rescode==200):
        response_body = response.read()
        res = json.loads(response_body.decode('utf-8'))
        result = res['message']['result']['translatedText']

    else:
        print("Error Code:" + rescode)
        
    return result

In [22]:
user_input = ''
result = translate(user_input)

    
context_tkn = tokenizer.additional_special_tokens_ids[0]
slogan_tkn = tokenizer.additional_special_tokens_ids[1]

input_ids = [context_tkn] + tokenizer.encode(result)

segments = [slogan_tkn] * SEQ_LEN
segments[:len(input_ids)] = [context_tkn] * len(input_ids)

input_ids += [slogan_tkn]


model.load_state_dict(torch.load(MODEL_PATH+ '/' + f'en slogan_2epoch_model.pth'))
model.eval()


generated = sample_sequence(model, length=30, context=input_ids, segments_tokens=segments, temperature=0.9, top_k=50, top_p=0.95, num_samples=20)

print('\n\n--- Generated Slogans ---\n')

for g in generated:
  slogan = tokenizer.decode(g.squeeze().tolist())
  slogan = slogan.split('<|endoftext|>')[0].split('<slogan>')[1]
  print(slogan)   

100%|██████████| 30/30 [00:17<00:00,  1.74it/s]




--- Generated Slogans ---

 A coffee company. A great place to have a great coffee.
 A cup of coffee.
 A coffee made better.
 A little cup of a tea.
 The one who loves coffee.
 A cup of coffee.
 A coffee company with an atmosphere.
 A coffee company. A place for coffee.
 Coffee as your cup.
 It won't happen to the coffee.
 A cup of great coffee.
 The best coffee company.
 A fresh cup of coffee.
 Delivering the best in your coffee.
 A great way to get your coffee.
 A cup of coffee.
 A cup of coffee, but not a cup.
 A better coffee for the everyday everyday.
 Have coffee and some coffee.
 A great place to live!


100%|██████████| 30/30 [00:16<00:00,  1.77it/s]




--- Generated Slogans ---

 We taste the best in the world.
 Make a cup of coffee for your life.
 Sake time for coffee.
 For coffee lovers who want to be Coffee lovers.
 Making coffee good.
 All cup. All cup. All flavor.
 For people who make coffee.
 The coffee people.
 Pure coffee. Pure taste.
 A taste of life.
 B cup. It's all in good taste.
 Great coffee can't help you.
 A good coffee can do!
 It's what you love.
 Be proud.
 All the good things brewing.
 A little taste of good coffee.
 For people who know coffee.
 For coffee lovers. For those who know coffee, they know coffee.
 It's all in the coffee.


100%|██████████| 30/30 [00:16<00:00,  1.79it/s]



--- Generated Slogans ---

 A cup of good coffee.
 A cup of milk in a good book.
 Make a difference.
 A good cup of coffee.
 A cup of good coffee.
 A good coffee break should be a good time.
 A coffee drink should be a matter of coffee.
 A cup of good coffee.
 The coffee drinker's choice.
 A cup of the good life.
 A cup of good and well done.
 Be yourself. Be yourself.
 A coffee experience.
 A cup to yourself.
 A coffee in the neighborhood with a view.
 Coffee that's worth talking.
 A coffee break away.
 A brand new you.
 A cup of freshness in the middle.
 A coffee cup of character.





In [24]:
    
context_tkn = tokenizer.additional_special_tokens_ids[0]
slogan_tkn = tokenizer.additional_special_tokens_ids[1]

input_ids = [context_tkn] + tokenizer.encode(result)

segments = [slogan_tkn] * SEQ_LEN
segments[:len(input_ids)] = [context_tkn] * len(input_ids)

input_ids += [slogan_tkn]


for epoch in range(1, 4):
  model.load_state_dict(torch.load(MODEL_PATH+ '/' + f'en slogan_{epoch}epoch_model.pth'))
  model.eval()

  # 최개길이 20의 20개의 슬로건 샘플
  # 확률분포를 조금 뾰족하게 하여 확률값이 높은 토큰이 살짝 더 잘나오도록 (temperature=0.9)
  # top_k 샘플링을 적용하여 확률값이 낮은 토큰들은 후보 단어에서 배제 (top_k=5)
  generated = sample_sequence(model, length=20, context=input_ids, segments_tokens=segments, num_samples=20)

  print('\n\n--- Generated Slogans ---\n')

  for g in generated:
    slogan = tokenizer.decode(g.squeeze().tolist())
    slogan = slogan.split('<|endoftext|>')[0].split('<slogan>')[1]
    print(slogan)   

100%|██████████| 30/30 [00:16<00:00,  1.82it/s]




--- Generated Slogans ---

 Your coffee tray. Your friendly coffee.
  Cold coffee has a good stay.
 A coffee. A great time.
 An Bean bean. <context> A better coffee than ever. One drop of beans.
 Bare to seat young? A way to seat young?
 The cup of coffee.
 A planted place for new experiences for some of your coffee and whisky.
  averring laughter.
 A small mug ride away from right Coffee.
 Because coffee is an everyday way to pampered bean.
 A smart place to go, a small dark much less accounting.
 Healthful coffee. Own coffee. Food. Love. Your Cuppa.
 A better fork fullMoon less longer (too) or a <context> No drinks ( professionalism makes life harder.
 A coffee company. The coffee store. Every Day.
 A great coffee and a great coffee.
 Not too flassiful. A Degree to Open Yours():10 electrodes for you.
 The first coffee. It goes back to coffee.
 A look, a taste, a beer, worth.
 IJeff i Jeffi. Let the experience your cup.
 A small comfort adventure. features little green tea.


100%|██████████| 30/30 [00:16<00:00,  1.84it/s]




--- Generated Slogans ---

 Get moved. Because I love coffee.
 Where good things come true.
 You won't want to.
 Real coffee. Real coffee.
 A coffee that never clamsites.
 Every drop.
 A good way to go with Begaaa.
 Keeping coffee counts.
 Soup for the whole kid.
 It's just a mug in the mug.
 Cornish taste classics.
 A well-crafted coffee experience delivered.
 Passion for coffee.
 Is the coffee for you?
 Have only a word.
 Take us to the top!
 It is possible.
 Be a cup of pure freshness, pure taste life.
 Share. Share. Share. Share on all.
 It's very refreshing.


100%|██████████| 30/30 [00:16<00:00,  1.81it/s]



--- Generated Slogans ---

 A coffee can't do this without Italia.
 When someone's cup, you trust a cup.
 A small coffee break.
 A place for tea and people.
 The bean specialist.
 Emotional coffee everyday. Daily develop.
 A cup of Coffee best.
 A leading coffee company in the business!
 A good long time, a great coffee break.
 Old in the future.
 Style your coffee, meaning your time.
 A great coffee people.
 Indulge your cuppa. You get your Bays.
 Have some good coffee.
 Trust your coffee.
 A cup of confection.
 A cup above the rest.
 A small company with a big reputation.
 A coffee in the morning. If only late or early.
 A cool cup of good health.



