In [1]:
import flair
import transformers
from flair.data import Sentence
from flair.models import SequenceTagger
import openai
import pickle
import pprint
pp = pprint.PrettyPrinter(indent=4)
from IPython.display import clear_output
import time
from gpt3_sandbox.api import *
import os.path
from os import path
from IPython.display import Image, display
import random
import warnings
import numpy as np

# utility functions
def clean_and_unify_caption(caption):
    return caption[0].strip()+', '+caption[1].strip()

# load data
with open('meme_900k_cleaned_data_v2.pkl', 'rb') as f:
    data = pickle.load(f)
    for uuid in data['uuid_caption_cased_dic']:
        data['uuid_caption_cased_dic'][uuid] = [clean_and_unify_caption(caption) for caption in \
                                               data['uuid_caption_cased_dic'][uuid]]

In [2]:
tagger = SequenceTagger.load("flair/chunk-english")



2022-11-06 19:10:48,573 loading file /home/s1hegde/.flair/models/chunk-english/5b53097d6763734ee8ace8de92db67a1ee2528d5df9c6d20ec8e3e6f6470b423.d81b7fd7a38422f2dbf40f6449b1c63d5ae5b959863aa0c2c1ce9116902e8b22
2022-11-06 19:10:49,051 SequenceTagger predicts: Dictionary with 45 tags: <unk>, O, B-NP, E-NP, I-NP, S-PP, S-VP, S-SBAR, S-ADVP, S-NP, S-ADJP, B-VP, E-VP, B-PP, E-PP, I-VP, S-PRT, B-ADVP, E-ADVP, B-ADJP, E-ADJP, B-CONJP, I-CONJP, E-CONJP, I-ADJP, B-SBAR, E-SBAR, S-INTJ, I-ADVP, I-PP, B-UCP, I-UCP, E-UCP, S-LST, B-PRT, I-PRT, E-PRT, S-CONJP, B-INTJ, E-INTJ, I-INTJ, B-LST, E-LST, <START>, <STOP>


In [3]:
def print_entities(tagger, sentence, span_str='np'):
    sentence = Sentence(sentence)
    tagger.predict(sentence)
    for entity in sentence.get_spans(span_str):
        print(entity)

In [4]:
caption = "Forever Alone! y u no find other Forever Alone?"

In [5]:
print_entities(tagger, caption)

Span[0:2]: "Forever Alone" → ADVP (0.8809)
Span[4:5]: "u" → NP (0.6443)
Span[6:7]: "find" → VP (0.9078)
Span[7:10]: "other Forever Alone" → NP (0.7332)


In [6]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(caption)

for token in doc:
    if token.pos_ in ['ADV', 'ADJ', "PROPN", "VERB", "NOUN"]:
        print(token.text, token.pos_, token.tag_)

Forever ADV RB
Alone ADJ JJ
y PROPN NNP
no ADV RB
find VERB VB
other ADJ JJ
Forever ADV RB
Alone ADJ JJ


In [7]:
uuid_list = list(data['uuid_caption_cased_dic'])
uuid = uuid_list[int(input('Input uuid index number:'))]
label = data['uuid_label_dic'][uuid]

captions = data['uuid_caption_cased_dic'][uuid]

Input uuid index number:0


In [8]:
print(captions[0])

commercial, y u no same volume as show!?


In [9]:
print_entities(tagger, captions[0])

Span[0:1]: "commercial" → ADJP (0.5615)
Span[4:7]: "no same volume" → NP (0.9489)
Span[7:8]: "as" → PP (0.9998)
Span[8:9]: "show" → VP (0.9084)


In [10]:
print_entities(tagger, "Why is this commercial not the same volume as the show ughh")

Span[0:1]: "Why" → ADVP (1.0)
Span[2:4]: "this commercial" → NP (0.8854)
Span[5:8]: "the same volume" → NP (0.9778)
Span[8:9]: "as" → PP (0.9997)
Span[9:11]: "the show" → NP (0.9994)


In [11]:
def transform_caption_to_phrases(caption):
    sentence = Sentence(caption)
    tagger.predict(sentence)
    entities = []
    for entity in sentence.get_spans('np'):
#         import pdb; pdb.set_trace()
        if entity.get_label('np').value in ['NP', 'VP']:
            entities.append(entity.text)
    return ';'.join(entities)

In [12]:
prompt = transform_caption_to_phrases(captions[0])

In [13]:
captions[0]

'commercial, y u no same volume as show!?'

In [14]:
prompt

'no same volume;show'

In [15]:
ex_captions = captions[:5]

transformed_captions = []
for caption in ex_captions:
    entities = transform_caption_to_phrases(caption)
    transformed_captions.append(entities)

In [16]:
ex_captions

['commercial, y u no same volume as show!?',
 'Victoria, y u no tell us your secret?!',
 'KONY, Y u no take justin bieber',
 'TED, y u no tell us how you met their mother',
 'Google, Y U NO LET ME FINISH TYPING?']

In [17]:
transformed_captions

['no same volume;show',
 'Victoria;tell;us;your secret',
 'KONY;take;justin bieber',
 'TED;tell;us;you;met;their mother',
 'Google;U NO;LET;ME;FINISH TYPING']

# GPT 3 prompt

In [18]:
# Sumanth's API key
your_personal_api_key = "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"

# constants
set_openai_key(your_personal_api_key)
gpt3_engine = 'text-davinci-002'
temperature = 0.7
max_tokens = 100

gpt = GPT(engine=gpt3_engine, temperature=temperature, max_tokens=max_tokens)
for i, caption in enumerate(ex_captions):
    gpt.add_example(Example(transformed_captions[i], caption))

In [19]:
caption = "Forever Alone! y u no find other Forever Alone?"

In [20]:
prompt = transform_caption_to_phrases(caption)

response = gpt.submit_request(prompt)

In [21]:
prompt

'u;find;other Forever Alone'

In [22]:
print_entities(tagger, caption)

Span[0:2]: "Forever Alone" → ADVP (0.8809)
Span[4:5]: "u" → NP (0.6443)
Span[6:7]: "find" → VP (0.9078)
Span[7:10]: "other Forever Alone" → NP (0.7332)


In [23]:
response['choices'][0]['text'].strip(" |output:|\n")

'Y U NO FIND OTHER FOREVER ALONE'

In [24]:
caption

'Forever Alone! y u no find other Forever Alone?'

# Let's Unify this

In [25]:
# let's build a dataset which maps a uuid to a list of 5 random captions for that meme
uuid_to_ex_captions = {}
uuid_to_trans_ex_captions = {}

for uuid in data['uuid_caption_cased_dic']:
    captions = data['uuid_caption_cased_dic'][uuid]
    ex_captions = np.random.choice(captions, size=5, replace=False)
    uuid_to_ex_captions[uuid] = list(ex_captions)
    uuid_to_trans_ex_captions[uuid] = [transform_caption_to_phrases(caption) for caption in ex_captions]

In [26]:
gpt3_data = {}
gpt3_data['uuid_caption_dic'] = uuid_to_ex_captions
gpt3_data['uuid_trans_caption_dic'] = uuid_to_trans_ex_captions

with open("data/gpt3_data.pkl", "wb") as f:
    pickle.dump(gpt3_data, f)

In [78]:
class PrimeGPT(object):
    def __init__(self, api_key, gpt3_engine, temperature, max_tokens):
        set_openai_key(api_key)
        self.gpt = GPT(engine=gpt3_engine, temperature=temperature, max_tokens=max_tokens)
        
    def clear_gpt_examples(self):
        self.gpt.examples = {}
    
    def prime_gpt_from_uuid(self, uuid):
        self.clear_gpt_examples()
        transformed_captions = uuid_to_trans_ex_captions[uuid]
        captions = uuid_to_ex_captions[uuid]
        for (transformed_caption, caption) in zip(transformed_captions, captions):
            self.gpt.add_example(Example(transformed_caption, caption))
    
    def get_response(self, uuid, caption):
        self.prime_gpt_from_uuid(uuid)
        prompt = transform_caption_to_phrases(caption)
        return self.gpt.submit_request(prompt)

In [88]:
prime_gpt = PrimeGPT(your_personal_api_key, gpt3_engine, temperature, max_tokens)

In [101]:
uuid_list = list(data['uuid_caption_cased_dic'])
uuid = uuid_list[int(input('Input uuid index number:'))]
label = data['uuid_label_dic'][uuid]

print(label)

Input uuid index number:299
bane-permission-to-die


In [100]:
[ (i, data['uuid_label_dic'][uuid]) for i, uuid in enumerate(uuid_list)]

[(0, 'y-u-no'),
 (1, 'bad-luck-brian'),
 (2, 'willy-wonka'),
 (3, 'the-most-interesting-man-in-the-world'),
 (4, 'futurama-fry'),
 (5, 'success-kid'),
 (6, 'one-does-not-simply'),
 (7, 'first-world-problems'),
 (8, 'philosoraptor'),
 (9, 'grumpy-cat'),
 (10, 'winter-is-coming'),
 (11, 'forever-alone'),
 (12, 'good-guy-greg'),
 (13, 'scumbag-steve'),
 (14, 'what-if-i-told-you'),
 (15, 'kermit-the-frog-drinking-tea'),
 (16, 'conspiracy-keanu'),
 (17, 'yo-dawg'),
 (18, 'all-the-things'),
 (19, 'insanity-wolf'),
 (20, 'joseph-ducreux'),
 (21, 'trollface'),
 (22, 'pedobear'),
 (23, 'skeptical-3rd-world-kid'),
 (24, 'annoying-facebook-girl'),
 (25, 'disaster-girl'),
 (26, 'socially-awkward-penguin'),
 (27, 'prepare-yourself'),
 (28, 'slowpoke'),
 (29, 'dr-evil-meme'),
 (30, 'advice-yoda-gives'),
 (31, 'joker-mind-loss'),
 (32, 'stoner-stanley'),
 (33, 'foul-bachelor-frog'),
 (34, 'pleaseguy'),
 (35, 'batman-slap-robin'),
 (36, 'high-expectations-asian-father'),
 (37, 'koala-cant-believe-it')

In [126]:
uuid_to_ex_captions[uuid]

['robert kunda, you have my permission to die',
 'WHEN YOU HAVE SUBMITTED ALL YOUR RECORDS, YOU HAVE MY PERMISSION TO DIE',
 "Once you Have sent me my stuff, You'll have my permission to die",
 'when you have read the house of mirth, then you have my permission to die',
 'When nph is present, YOU HAVE MY PERMISSION TO HIGH FIVE']

In [127]:
uuid_to_trans_ex_captions[uuid]

['robert kunda;you;have;my permission;to die',
 'YOU;HAVE SUBMITTED;ALL YOUR RECORDS;YOU;HAVE;MY PERMISSION;TO DIE',
 "you;Have sent;me;my stuff;You;'ll have;my permission;to die",
 'you;have read;the house;mirth;you;have;my permission;to die',
 'nph;is;YOU;HAVE;MY PERMISSION;HIGH FIVE']

In [128]:
response = prime_gpt.get_response(uuid, "You can die when you've submitted the assignment!")

In [129]:
response['choices'][0]['text'].strip(" |output:|\n")

"Once you've submitted the assignment, you can die."

# Minimal Demo