In [1]:
import pandas as pd
import gensim
import tensorflow as tf
from keras_preprocessing import image as im
from transformers import GPT2Tokenizer, GPT2LMHeadModel, BertModel, BertTokenizer
import torch
import gc
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import os
try:
    import tensorflow_text as text
except:
    pass
from tqdm import tqdm
import tensorflow_hub as hub
import tensorflow.keras.applications as apps
import transformers
sns.set()
from PIL import Image



# Loading data and processing

In [2]:
def load_data(image_path, annotations_path):
    df = pd.read_csv(annotations_path)
    df = df[~df['human_sentiment'].isna()]
    in_folder = os.listdir(image_path)
    df.loc[df['image_name'].isin(in_folder), 'image_name'] = \
    df.loc[df['image_name'].isin(in_folder), 'image_name'].apply(lambda x: os.path.join(image_path,x))
    images = []
    for image_name in df['image_name'].values:
        images.append(np.array(im.load_img(image_name, target_size=(224,224))))
    images = np.array(images)
    images = images/255
    annotations = df['annotation'].str.lower().values
    return images, annotations

In [3]:
images, annotations = load_data("../../data/emo-at-cap/images/", '../../data/emo-at-cap/emo-at-cap.csv')

In [4]:
len(annotations)

3840

# Saving features

In [5]:
def check_folder(path):
    if not os.path.exists(path):
        os.mkdir(path)
    

In [6]:
check_folder('../image_features')
check_folder('../text_features')

In [7]:
from sklearn.decomposition import IncrementalPCA
import csv

In [8]:
def create_representation_tensorflow(data, model, path, n_components=128, batch_size=16):
 
    with open(path,'w') as fw:
        csv_writer = csv.writer(fw, delimiter='\t')
        for sample in tqdm(data): 
            sample = np.expand_dims(sample,axis=0)
            feature = model.predict(sample)
            try:
                feature = np.hstack(feature.numpy())
            except:
                feature = np.hstack(feature)
            csv_writer.writerow(feature)            
    print('Saved representations to : {}'.format(path))

In [9]:
def write(data, path):
    with open(path,'w') as fw:
        csv_writer = csv.writer(fw, delimiter='\t')
        csv_writer.writerows(data)            
    print('Saved representations to : {}'.format(path))

In [10]:
def load_resnet(block='conv4_block5_out', pooling=True):
    resnet = apps.ResNet152V2(include_top=False, weights='imagenet')
    outputs = [i for i in resnet.layers if i.name==block][0]
    inputs = resnet.layers[0]
    if pooling:
        x = tf.keras.layers.GlobalAveragePooling2D()(outputs.output)
    else:
        x = tf.keras.layers.Flatten()(outputs.output)
    resnet = tf.keras.Model(inputs.input,x)
    return resnet

In [11]:
def elmo_representation(data, path):
    model = hub.Module("https://tfhub.dev/google/elmo/3", trainable=False)
    placeholder = tf.placeholder(tf.string, shape=[None]) 
    representation = model(placeholder, signature="default", as_dict=True)["elmo"]
    representation = tf.reduce_mean(representation, axis=1)
    with open(path,'w') as fw:
        csv_writer = csv.writer(fw, delimiter='\t')
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for sample in tqdm(data): 
                sample = np.expand_dims(sample,axis=0)
                feature = sess.run(representation, feed_dict={placeholder: sample})
                feature = np.hstack(feature)
                csv_writer.writerow(feature)            
    print('Saved representations to : {}'.format(path))

In [12]:
def load_emb_from_disk(path):
    model = gensim.models.KeyedVectors.load_word2vec_format(path, binary=True)

    return model

In [13]:
def gpt2_representation(data, model_name, path, gpu=False, 
                        batch_size=16,
                       layer='emb', layer_num=11, num_last_layers_to_use=4):
    assert layer in ['emb','intermidiate', 'sum'], 'Please choose name of layer from the following list: ({})'.format(*['emb','intermidiate','sum'])
    
    if layer!='sum':
        model = GPT2LMHeadModel.from_pretrained(model_name)
    else:
        model = GPT2LMHeadModel.from_pretrained(model_name, output_hidden_states=True)
   
    if layer=='emb':
        model = model.transformer.wte
    elif layer=='intermidiate':
        model_embs = model.get_input_embeddings()
        model = model.transformer.h[layer_num]
    
    model.eval()
        
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
    
    if gpu:
        model.to('cuda')
    else:
        model.to('cpu')
        
    tokenized_data = []
    for sample in data:
        tokenized_data.append(np.array(tokenizer.encode(sample)))

        
    with open(path,'w') as fw:
        csv_writer = csv.writer(fw, delimiter='\t')
        for tokenized in tqdm(tokenized_data):
            tokenized = torch.tensor([tokenized])
            
            if gpu:
                tokenized = tokenized.to('cuda')
            else:
                tokenized = tokenized.to('cpu')
                
            with torch.no_grad():
                if layer=='intermidiate':
                    tokenized = model_embs(tokenized)
                    outputs = model(tokenized)[0]
                else:
                    outputs = model(tokenized)
                if layer=='sum':
                    outputs = outputs.hidden_states[-num_last_layers_to_use:]
                    features = torch.zeros_like(outputs[0])
                    for hidden in outputs:
                        features+=hidden
                    outputs = features/num_last_layers_to_use
                features = torch.mean(outputs, dim=1)
            features = np.ravel(features.cpu().detach().numpy())
            csv_writer.writerow(features)
    torch.cuda.empty_cache()
    print('Saved representations to : {}'.format(path))

In [14]:
def bert_representation(data, model_name, path, gpu=False, 
                        batch_size=16,
                       layer='emb', layer_num=11, num_last_layers_to_use=4):
    assert layer in ['emb','intermidiate', 'sum'], 'Please choose name of layer from the following list: ({})'.format(*['emb','intermidiate','sum'])
    
    model = BertModel.from_pretrained(model_name, output_hidden_states=True)
   
    if layer=='emb':
        model = model.get_input_embeddings()

    
    model.eval()
        
    tokenizer = BertTokenizer.from_pretrained(model_name)

    
    if gpu:
        model.to('cuda')
    else:
        model.to('cpu')
        
    tokenized_data = []
    for sample in data:
        tokenized_data.append(np.array(tokenizer.encode(sample)))

        
    with open(path,'w') as fw:
        csv_writer = csv.writer(fw, delimiter='\t')
        for tokenized in tqdm(tokenized_data):
            tokenized = torch.tensor([tokenized])
            
            if gpu:
                tokenized = tokenized.to('cuda')
            else:
                tokenized = tokenized.to('cpu')
                
            with torch.no_grad():
                if layer=='intermidiate':
                    outputs = model(tokenized)[2][layer_num]
                elif layer=='emb':
                    outputs = model(tokenized)
                elif layer=='sum':
                    outputs = model(tokenized)[2][-num_last_layers_to_use:]
                    features = torch.zeros_like(outputs[0])
                    for hidden in outputs:
                        features+=hidden
                    outputs = features/num_last_layers_to_use
                features = torch.mean(outputs, dim=1)
            features = np.ravel(features.cpu().detach().numpy())
            csv_writer.writerow(features)
    torch.cuda.empty_cache()
    print('Saved representations to : {}'.format(path))

In [15]:
from nltk.corpus import stopwords 
def prepare_annotations_embeddings(annotations):
    prepared_annotations = []
    for sentence in annotations:
        sentence = ''.join([i for i in sentence if not (i in [',','.','!','?'] )])
        prepared_annotations.append([i for i in sentence.split(' ') if not (i in stopwords.words('english'))])
    unique_words_annotations = np.unique(np.hstack(prepared_annotations))
    return prepared_annotations, unique_words_annotations

In [16]:
def create_representation_embeddings(embeddings, sentences, path, dim=300):
    with open(path, "w") as fw:
        csv_writer = csv.writer(fw, delimiter='\t')
        for n, sentence in enumerate(sentences):
            vector = np.zeros(shape=(dim,))
            counter = 0 
            for word in sentence:
                representation = embeddings.get(word)
                if not(representation is None):
                    vector+=representation
                    counter+=1
            if counter!=0:
                vector/=counter
            csv_writer.writerow(vector)
    print('Saved representation to : {}'.format(path))

In [17]:
def get_emb_rep(tokens, embeddings):
    dict_tokens = {}
    missing = []
    for w in tqdm(tokens):
        try:
            try:
                dict_tokens.update({w: embeddings.word_vec(w.lower())})
            except:
                dict_tokens.update({w: embeddings.word_vec(w.captialize())})
        except:
            missing.append(w)

    print('{} words where absent in embedding'.format(len(missing)))
    return dict_tokens


In [18]:
def get_emb_rep_glove(tokens, embeddings):
    dict_tokens = {}
    missing = []
    for w in tqdm(tokens):
        if w in embeddings.keys():
            dict_tokens.update({w: embeddings[w]})
        elif w.lower() in embeddings.keys():
            dict_tokens.update({w: embeddings[w.lower()]})
        elif w.capitalize() in embeddings.keys():
            dict_tokens.update({w: embeddings[w.capitalize()]})
        else:
            missing.append(w)

    print('{} words where absent in embedding'.format(len(missing)))
    return dict_tokens

In [19]:
def load_glove(file):
    print("Loading Glove Model")
    f = open(file, 'r')
    glove_embeddings = {}
    for line in f:
        splitLines = line.split()
        word = splitLines[0]
        wordEmbedding = np.array([float(value) for value in splitLines[1:]])
        glove_embeddings[word] = wordEmbedding
    print(len(glove_embeddings), " words loaded!")
    return glove_embeddings


In [20]:
prepared_annotations, unique_words_annotations = prepare_annotations_embeddings(annotations)

# resnet

In [21]:
resnet = load_resnet()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [22]:
create_representation_tensorflow(images, resnet, '../image_features/resnet_conv4_block5.tsv')

100%|██████████| 1880/1880 [00:38<00:00, 48.48it/s]

Saved representations to : ../image_features/resnet_conv4_block5.tsv





In [23]:
import gc
del resnet
gc.collect();

# resnet conv5_block3_out

In [24]:
resnet = load_resnet(block='conv5_block3_out')

In [25]:
create_representation_tensorflow(images, resnet, '../image_features/resnet_conv5_block3.tsv')

100%|██████████| 1880/1880 [02:05<00:00, 14.94it/s]


Saved representations to : ../image_features/resnet_conv5_block3.tsv


# gpt2  word embeddings 

In [26]:
gpt2_representation(data=prepared_annotations, model_name='gpt2-medium', path='../text_features/gpt2_wte.tsv',
                   gpu=False, layer='emb')

100%|██████████| 1880/1880 [00:01<00:00, 1408.80it/s]

Saved representations to : ../text_features/gpt2_wte.tsv





# gpt2 11th layer

In [27]:
gpt2_representation(data=prepared_annotations, model_name='gpt2-medium', path='../text_features/gpt2_11th_layer.tsv',
                   gpu=False, layer='intermidiate', layer_num=11)

100%|██████████| 1880/1880 [00:08<00:00, 231.44it/s]


Saved representations to : ../text_features/gpt2_11th_layer.tsv


# gpt2 last layer

In [28]:
gpt2_representation(data=prepared_annotations, model_name='gpt2-medium', path='../text_features/gpt2_last_layer.tsv',
                   gpu=False, layer='intermidiate', layer_num=-1)

100%|██████████| 1880/1880 [00:08<00:00, 231.99it/s]

Saved representations to : ../text_features/gpt2_last_layer.tsv





# gpt2 4 last layers

In [29]:
gpt2_representation(data=prepared_annotations, model_name='gpt2-medium', path='../text_features/gpt_4_last_layers.tsv',
                   gpu=False, layer='sum', num_last_layers_to_use=4)

100%|██████████| 1880/1880 [03:23<00:00,  9.25it/s]

Saved representations to : ../text_features/gpt_4_last_layers.tsv





# stbs bert

In [21]:
from sentence_transformers import SentenceTransformer

In [22]:
def stbs_representation(annotations, path, gpu=False):
    stbs_bert = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')
    if gpu:
        stbs_bert.to('gpu')
    else:
        stbs_bert.to('cpu')
    with open(path,'w') as fw:
        csv_writer = csv.writer(fw, delimiter='\t')
        for annotation in tqdm(annotations):
            feature = stbs_bert.encode(annotation)
            feature = np.ravel(feature)
            csv_writer.writerow(feature)        
    print('Saved representation to : {}'.format(path))

In [23]:
stbs_representation(annotations,'../text_features/stbs_bert.tsv')

100%|██████████| 3840/3840 [00:23<00:00, 164.73it/s]

Saved representation to : ../text_features/stbs_bert.tsv





# w2v embeddings

In [24]:
w2v_embeddings = get_emb_rep(unique_words_annotations, load_emb_from_disk('../embeddings/GoogleNews-vectors-negative300.bin'))

  import sys
100%|██████████| 2062/2062 [00:00<00:00, 175006.67it/s]

22 words where absent in embedding





In [25]:
create_representation_embeddings(w2v_embeddings, prepared_annotations, '../text_features/w2v.tsv')

Saved representation to : ../text_features/w2v.tsv


In [26]:
del w2v_embeddings
gc.collect();

# glove embeddings

In [27]:
glove = load_glove('../embeddings/glove.6B.300d.txt')

Loading Glove Model
400000  words loaded!


In [28]:
glove = get_emb_rep_glove(unique_words_annotations, glove)

100%|██████████| 2062/2062 [00:00<00:00, 796377.06it/s]

34 words where absent in embedding





In [29]:
create_representation_embeddings(glove,prepared_annotations, '../text_features/glove.tsv')

Saved representation to : ../text_features/glove.tsv


In [30]:
del glove
gc.collect();

In [31]:
import gensim.downloader as api

# fastText

In [32]:
model = api.load('fasttext-wiki-news-subwords-300')

In [33]:
fasttext_embs = get_emb_rep(unique_words_annotations,model)

  import sys
100%|██████████| 2062/2062 [00:00<00:00, 208150.54it/s]

23 words where absent in embedding





In [34]:
create_representation_embeddings(fasttext_embs,prepared_annotations, '../text_features/fasttext.tsv')

Saved representation to : ../text_features/fasttext.tsv


# resNet50 on FER

In [21]:
model = tf.keras.models.load_model('../weights/ResNet-50_faces.h5')

In [22]:
model = tf.keras.models.Model(model.layers[0].input, model.layers[-2].output)

In [23]:
def prepare_images_fer(images):
    images = (images*255).astype(np.uint8)
    fer_images = []
    for image in images:
        fer_images.append(np.array(Image.fromarray(image).convert('RGB').resize((197, 197), Image.ANTIALIAS)))
    return np.array(fer_images)

In [24]:
images_fer = prepare_images_fer(images)

In [25]:
create_representation_tensorflow(images_fer,model,'../image_features/resnet_fer_last.tsv')

100%|██████████| 3840/3840 [03:55<00:00, 16.34it/s]

Saved representations to : ../image_features/resnet_fer_last.tsv





# bert  last 4 layers

In [26]:
bert_representation(data=prepared_annotations, model_name='bert-base-uncased', path='../text_features/bert_4_last_layers.tsv',
                   gpu=False, layer='sum', num_last_layers_to_use=4)

100%|██████████| 3840/3840 [01:54<00:00, 33.49it/s]

Saved representations to : ../text_features/bert_4_last_layers.tsv





# bert word embeddings

In [27]:
bert_representation(data=prepared_annotations, model_name='bert-base-uncased', path='../text_features/bert_emb.tsv',
                   gpu=False, layer='emb')

100%|██████████| 3840/3840 [00:02<00:00, 1809.80it/s]


Saved representations to : ../text_features/bert_emb.tsv


# bert last layer

In [28]:
bert_representation(data=prepared_annotations, model_name='bert-base-uncased', path='../text_features/bert_last_layer.tsv',
                   gpu=False, layer='intermidiate', layer_num=-1)

100%|██████████| 3840/3840 [01:56<00:00, 33.04it/s]


Saved representations to : ../text_features/bert_last_layer.tsv


# elmo

In [26]:
elmo_representation(data=annotations, path='../text_features/elmo.tsv')

INFO:absl:Using /tmp/tfhub_modules to cache modules.
INFO:absl:Downloading TF-Hub Module 'https://tfhub.dev/google/elmo/3'.
INFO:absl:Downloading https://tfhub.dev/google/elmo/3: 180.34MB
INFO:absl:Downloading https://tfhub.dev/google/elmo/3: 310.34MB
INFO:absl:Downloaded https://tfhub.dev/google/elmo/3, Total size: 357.40MB
INFO:absl:Downloaded TF-Hub Module 'https://tfhub.dev/google/elmo/3'.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
100%|██████████| 3840/3840 [03:03<00:00, 20.92it/s]


Saved representations to : ../text_features/elmo.tsv


# Efficient net on Age/Gender data  block3c_expand_conv

In [30]:
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import GlobalAveragePooling2D
import tensorflow as tf

In [31]:
def load_efficient_net(path, layer='block3c_expand_conv'):
    model = load_model(path)
    inputs = model.layers[0]
    last_conv = [i for i in model.layers if i.name==layer][0]
    x = GlobalAveragePooling2D()(last_conv.output)
    model = tf.keras.Model(inputs.input,x)
    return model

In [32]:
model = load_efficient_net('../weights/EfficientNetB3_224_weights.11-3.44.hdf5')

In [33]:
create_representation_tensorflow(images, model, '../image_features/efnet_bloc3c_age_gender.tsv')

100%|██████████| 3840/3840 [02:46<00:00, 23.04it/s]

Saved representations to : ../image_features/efnet_bloc3c_age_gender.tsv





# Efficient net on Age/Gender data  top_out

In [34]:
model = load_efficient_net('../weights/EfficientNetB3_224_weights.11-3.44.hdf5', 'top_activation')

In [35]:
create_representation_tensorflow(images, model, '../image_features/efnet_top_conv_age_gender.tsv')

100%|██████████| 3840/3840 [04:41<00:00, 13.63it/s]

Saved representations to : ../image_features/efnet_top_conv_age_gender.tsv





# Hierarchy-based Image Embeddings for Semantic Image Retrieval

In [21]:
from keras.models import load_model

Using TensorFlow backend.


In [22]:
import keras

In [23]:
model = load_model('../weights/imagenet_unitsphere-embed+cls_rn50.model.h5')

















In [24]:
inputs = model.layers[0].input
last_layer = [i for i in model.layers if i.name=='embedding'][0]
model = keras.Model(inputs, last_layer.output)

In [25]:
create_representation_tensorflow(images, model, '../image_features/hierarchy_based_sim.tsv')

100%|██████████| 3840/3840 [01:40<00:00, 38.11it/s]


Saved representations to : ../image_features/hierarchy_based_sim.tsv
