# Caption augmenter 
Adapted from https://github.com/makcedward/nlpaug

In [1]:
from nlpaug.util.file.download import DownloadUtil
DownloadUtil.download_word2vec(dest_dir='.') # Download word2vec model
DownloadUtil.download_glove(model_name='glove.6B', dest_dir='.') # Download GloVe model
DownloadUtil.download_fasttext(model_name='wiki-news-300d-1M', dest_dir='.') # Download fasttext model

# Config

In [None]:
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc

from nlpaug.util import Action

Importing the captions from CelebA-HQ dataset and putting them in a dictionary, so every key is a file name from the dataset, and every value is the coresponding celebrity description. 

In [3]:
import os
from collections import OrderedDict

os.chdir('/home/gusbuzvi@GU.GU.SE/aics/captions')

text = OrderedDict()
for filename in os.listdir():
    with open(filename, 'r') as f:
        captions = f.read()
        captions = captions.strip().split('\n')
    text[filename] = captions

# Word Augmenter<a class="anchor" id="word_aug"></a>

Here word level augmentation is performed in order to increase the vocabulary for celebrity descriptions. We make use of word2vec (Mikolov et al., 2013), GloVe (Pennington et al., 2014), fasttext (Joulin et al., 2016), BERT(Devlin et al., 2018) and WordNet to insert and substitute similar word. `Word2vecAug`,  `GloVeAug` and `FasttextAug` use word embeddings to find most similar group of words to replace original word. On the other hand, `BertAug` use language models to predict possible target word. `WordNetAug` use statistics way to find the similar group of words.

### Word Embeddings Augmenter<a class="anchor" id="word_embs_aug"></a>

##### Insert word randomly by word embeddings similarity

In [14]:
# model_type: word2vec, glove or fasttext
model_dir = '/home/gusbuzvi@GU.GU.SE/aics'
# text = text['3792.txt'][0:5] #5 first image captions of the demo girl
aug = naw.WordEmbsAug(
#     model_type='word2vec', model_path=model_dir+'GoogleNews-vectors-negative300.bin',
#     model_type='glove', model_path=model_dir+'glove.6B.300d.txt',
    model_type='fasttext', model_path=model_dir+'wiki-news-300d-1M.vec',
    action="insert")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling.', 'The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings.', 'This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young.', 'This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. ', 'She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.']
Augmented Text:
['LaPlata This person has FIFI bags FAQS under Masquerade eyes and wears lipstick, WNBR necklace, and earrings. She is KwaNdebele smiling.', 'The woman has Azurite bags under Funassyi eyes, Menschel mouth slightly open, arched eyebrows, coalmining blond Jiuling hair, and dance- high cheekbones. Kambo She wears Sagoths earrings.', 'Airwaves This Kryvyi woman موږ wears heavy makeup. She has arched eyebrows. She is

##### Substitute word by word2vec similarity

In [13]:
# model_type: word2vec, glove or fasttext
model_dir = '/home/gusbuzvi@GU.GU.SE/aics/'
aug = naw.WordEmbsAug(
#     model_type='word2vec', model_path=model_dir+'GoogleNews-vectors-negative300.bin',
#     model_type='glove', model_path=model_dir+'glove.6B.300d.txt',
    model_type='fasttext', model_path=model_dir+'wiki-news-300d-1M.vec', #biski geriau
    action="substitute")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling.', 'The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings.', 'This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young.', 'This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. ', 'She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.']
Augmented Text:
['This person has bags under ever-watchful and assumes mini-skirt, neckpiece, finally earrings. She is kissing.', 'The woman has cartons under skin, mouth slightly build, latticed eyebrows, gray-eyed blow-dry, and high cheekbones. So competes earrings.', 'This husband- wears huge makeups. She has arched eyebrows. She constitutes smiling, making whippersnappers.', 'Moreover person is attractive, and young and has

### Contextual Word Embeddings Augmenter<a class="anchor" id="context_word_embs_aug"></a>

##### Insert word by contextual word embeddings (BERT, DistilBERT, RoBERTA or XLNet)

In [16]:
aug = naw.ContextualWordEmbsAug(
    model_path='bert-base-uncased', action="insert")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling.', 'The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings.', 'This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young.', 'This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. ', 'She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.']
Augmented Text:
['then this person only has bags showing under eyes and always wears lipstick, beautiful necklace, piercing and earrings. she is smiling.', 'overall the woman has bags under eyes, her mouth open slightly open, dark arched eyebrows, black blond hair, and with high side cheekbones. now she wears earrings.', 'female this woman generally wears heavy black makeup. she has two arched eyebrows. she although is generall

##### Substitute word by contextual word embeddings (BERT, DistilBERT, RoBERTA or XLNet)

In [15]:
aug = naw.ContextualWordEmbsAug(
    model_path='bert-base-uncased', action="substitute")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Original:
['This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling.', 'The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings.', 'This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young.', 'This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. ', 'She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.']
Augmented Text:
['this person has bags under eyes yet lacks lipstick, lipstick, ridiculous hat. he is smiling.', 'young woman has pink about eyes, mouth a pink, arched eyebrows, blond hair, features high cheekbones. she produces robes.', 'this woman with hair embroidered. it has arched cheeks. she gazes smiling, and young.', 'the man is attractive, and young and dark amber under eyes, wavy hair, arched eyebrows, but face tinted

In [18]:
aug = naw.ContextualWordEmbsAug(
    model_path='distilbert-base-uncased', action="insert")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
['This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling.', 'The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings.', 'This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young.', 'This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. ', 'She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.']
Augmented Text:
['this person has bags under snake eyes sunglasses and wears fake lipstick, turtle necklace, and pearl earrings. then she is smiling.', '... the woman has bags under eyes, her mouth slightly folded open, beautifully arched wavy eyebrows, blond auburn hair, breasts and very high cheekbones. she wears earrings.', '... this woman wears heavy hair makeup. she merely has slightly arched eyebrows. she therefore is not

In [17]:
aug = naw.ContextualWordEmbsAug(
    model_path='distilbert-base-uncased', action="substitute")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/256M [00:00<?, ?B/s]

Original:
['This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling.', 'The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings.', 'This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young.', 'This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. ', 'She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.']
Augmented Text:
['this person wears eyelashes under eyes who wear lipstick, necklace, floral earrings. she is beautiful.', 'the woman displays straw shaped eyes, mouth slightly open, protruding eyebrows, blond hair, and pale forehead. she uses glasses.', 'this creature smells heavy makeup. silvia gets arched eyebrows. she looks smiling, forever young.', 'prehistoric lizard appeared attractive, appears young and has orange under

In [21]:
aug = naw.ContextualWordEmbsAug(
    model_path='roberta-base', action="insert")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling. The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings. This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young. This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.
Augmented Text:
This person has bags under eyes and wears lipstick, necklace, beads and earrings. She is smiling. The woman has small bags under eyes, mouth slightly open, arched eyebrows, blond hair hair, and high cheekbones. She wears earrings. This woman Also wears heavy makeup. She has arched eyebrows. She is smiling, and looking young. This person is attractive, fresh and also young and has bags under eyes, wavy hair, arched eyebrows, and 

In [22]:
aug = naw.ContextualWordEmbsAug(
    model_path='roberta-base', action="substitute")
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling. The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings. This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young. This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.
Augmented Text:
This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling. This woman has bags under eyes, mouth slightly open, arched lip, blond hair, and pierced cheekbones. She wears earrings. This women wears heavy makeup. She has arched eyebrows. She s attractive, and young. This female is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, y mouth slightly open. She is wearing lipsti

We evaluated the captions vocabulary-wise and decided to abstain from augmenting our captions with either substituted or inserted synonyms. The above models did not show significantly better performance in comparison from original captions. 

### Synonym Augmenter<a class="anchor" id="synonym_aug"></a>

##### Substitute word by WordNet's synonym

In [23]:
import nltk

aug = naw.SynonymAug(aug_src='wordnet')
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling. The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings. This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young. This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.
Augmented Text:
This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling. The woman has bags under eyes, mouth slightly open, arched brow, blond hair, and high cheekbones. She wears earrings. This woman wears enceinte makeup. She have arched eyebrows. She is smiling, and young. This person comprise attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. She is tire li

### Antonym Augmenter<a class="anchor" id="antonym_aug"></a>

##### Substitute word by antonym

In [None]:
aug = naw.AntonymAug(aug_min=1, aug_p=1)
output_dict = dict()
for key, value in text.items():
    augmented_text = aug.augment(value)
    output_dict[key] = augmented_text
    output = open('antonym_caption.txt','w') #quickly check all the augmented captions
    print(output_dict, file=output)

In [87]:
aug = naw.AntonymAug(aug_min=1, aug_p=1)

for key, value in text.items():
    output_string = ''
    for line in value:
        augmented_text = aug.augment(line)
        output_string += f'{augmented_text}\n'
    with open('/home/gusbuzvi@GU.GU.SE/aics/augmented_captions/' + key, 'w') as out: #save all the augmented captions
        out.write(output_string)

An example with captions augmented with antonyms. We changed the augmentation probability to 1, so all verbs, adverbs and adjectives would be changed to a word of opposite meaning. The antonyms are taken from WordNet. We assumed that the additional information on which features are lacking would make the data more varied because it is very rare that a dataset contains labels to what is not in the given picture.

In [25]:
aug = naw.AntonymAug(aug_p=1)
augmented_text = aug.augment(text)
print("Original:")
print(text)
print("Augmented Text:")
print(augmented_text)

Original:
This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling. The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings. This woman wears heavy makeup. She has arched eyebrows. She is smiling, and young. This person is attractive, and young and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly open. She is wearing lipstick. She has blond hair, wavy hair, bags under eyes, and mouth slightly open. She is smiling, and attractive.
Augmented Text:
This person has bags under eyes and wears lipstick, necklace, and earrings. She is smiling. The woman has bags under eyes, mouth slightly open, arched eyebrows, blond hair, and high cheekbones. She wears earrings. This woman refresh heavy makeup. She has arched eyebrows. She is smiling, and old. This person differ attractive, and old and has bags under eyes, wavy hair, arched eyebrows, and mouth slightly closed. She differ weari