In [None]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageFont
from PIL import ImageDraw 
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
import copy
import pdb

In [None]:
ls history/

In [None]:
with open("history/history-merge-date_5-18-14-40.pkl", "rb") as handle:
    history = pickle.load(handle)

In [None]:
def plot_loss(history, title="Model Loss"):
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title(title)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

In [None]:
plot_loss(history, "Merge-Concat Model Loss")

In [None]:
with open("history/history-inject-date_5-16-15-45.pkl", "rb") as handle:
    history2 = pickle.load(handle)

In [None]:
plot_loss(history2, "Inject Model Loss")

In [None]:
with open("history/history-merge_add-date_6-4-14-11.pkl", "rb") as handle:
    history3 = pickle.load(handle)
handle.close()

In [None]:
plot_loss(history3, "Merge-Add Model Loss")

In [None]:
with open("../data/tokenizer/tokenizer.pkl","rb") as handle:
    tokenizer = pickle.load(handle)
handle.close()

In [None]:

1 + len(tokenizer.word_index)

In [None]:
reverse_tokenizer = {index: word for word,index in tokenizer.word_index.items()} 

In [None]:
with open("../data/features/valid_features.pkl", "rb") as handle:
    valid_features = pickle.load(handle)
handle.close()

In [None]:
valid_ids = []
for k in valid_features:
    valid_ids.append(k)

In [None]:
def get_photo_features(photo_id):
    return valid_features[photo_id]

In [None]:

model1 = load_model("models/model_merge-date_5-18-14-40-ep016-loss4.704_lr-0.010000_patience-3.h5")

In [None]:
model1.summary()

In [None]:
model2 = load_model("models/model_inject-date_5-16-15-45-ep030-loss5.009_lr-0.010000_patience-3.h5")

In [None]:
model2.summary()

In [None]:
def generate_predictions_id(photo_id, model, tokenizer, reverse_tokenizer, max_length = 15):
    photo_features = get_photo_features(photo_id)
 
    in_seq = np.repeat(0, max_length)
    in_seq[0] = tokenizer.word_index["startseq"]

    end_token = tokenizer.word_index["endseq"]
    for i in range(1, max_length):
 
        pred = model.predict([photo_features,in_seq.reshape(1,-1)], verbose=0)

        pred = np.argmax(pred)
        in_seq[i] = pred

        if pred == end_token:
            break

    out_seq = in_seq[1:]

    out_string = []
    for idx in out_seq:
        if idx == 0 or idx == end_token:
            break
        out_string.append(reverse_tokenizer[idx])
    return(" ".join(out_string))

In [None]:
[.9**i for i in range(15)]

In [None]:
class SequenceCandidate(object):
    
    @staticmethod
    def template_seq(start_idx = 1, max_length = 15, ignore_idx = None, alpha = .9):
        seq = np.repeat(0,15)
        seq[0] = start_idx

        probs = np.repeat(0.0,15)
        probs[0] = 1
        return SequenceCandidate(seq, probs, max_length, ignore_idx, alpha)
        
        
    
    def __init__(self, seq, probs, max_length = 15, ignore_idx = None, alpha = .9):
        assert len(seq) == max_length
        self._max_length = max_length
        self._seq = seq
        self._probs = probs
       
        self._num_elem = max_length 
        for i in range(len(seq)):
            if seq[i] == 0:
                self._num_elem = i  
                break
        
        self._bigrams = set()
        self._ignore_idx = ignore_idx
        if ignore_idx is None:
            self._ignore_idx = []
        self._prob_weights = [alpha**i for i in range(max_length)]
    
    
    def add_token(self, token, prob):
       
        if self._num_elem >= self._max_length:
            raise IndexError("Sequence is already populated.\nCan't add any more tokens to it.")
        
        newcandidate = copy.deepcopy(self)
      
        newcandidate._seq[self._num_elem] = token
        
        newcandidate._probs[self._num_elem] = prob
        
        newcandidate._bigrams.add(tuple(newcandidate._seq[self._num_elem - 1 : newcandidate._num_elem + 1]))
    
        newcandidate._num_elem += 1
        return(newcandidate)
    
    def probsum(self):
        
        valid_probs = self._probs[~np.in1d(self._seq, self._ignore_idx)]
        
        return np.sum(np.multiply(valid_probs, self._prob_weights[:len(valid_probs)]))
    
    def final_token(self):
        return self._seq[self._num_elem - 1]
    
    
    def to_words(self,reverse_tokenizer, end_idx):
        
        out_words = []
        for i in range(1,len(self._seq)):
            
            idx = self._seq[i]
            if idx == 0 or idx == end_idx:
                break
            if idx in self._ignore_idx:
                continue
            
            if self._seq[i - 1] != idx:
                out_words.append(reverse_tokenizer[idx])
        out_string = " ".join(out_words)
        return out_string
    
    
    def __lt__(self, other):
        try:
            return self.probsum() < other.probsum()
        except AttributeError:
            return NotImplemented

In [None]:
def generate_predictions_beam_id(photo_id, model,reverse_tokenizer, width, num_neighbors,
                                 top_n = 3, end_idx = 2, max_length = 15, ignore_idx = [4], alpha = .9):
    
    photo_features = get_photo_features(photo_id)
    
    accepted_sequences = []
    
    population = []
    
    start_sequence = SequenceCandidate.template_seq(ignore_idx = ignore_idx, alpha = alpha)
    population.append(start_sequence)
    for i in range(max_length - 1):
        tmp = []
        for cand_seq in population:
             
            pred = model.predict([photo_features, cand_seq._seq.reshape(1,-1)], verbose=0)[0]
            
            pred_argsort = pred.argsort()
            
            for next_idx in pred_argsort[-num_neighbors:]:
                
                if (cand_seq.final_token(), next_idx) in cand_seq._bigrams:
                    accepted_sequences.append(cand_seq)
                    continue
                
                next_prob = pred[next_idx]
                new_candidate = cand_seq.add_token(next_idx,next_prob)
                
                if next_idx == end_idx:
                    accepted_sequences.append(new_candidate)
                else:
                    tmp.append(new_candidate)
         
        try:
            population = sorted(tmp)[-width:]
        except:
            
            population = tmp
            break
    
    accepted_sequences = sorted(accepted_sequences + population, reverse = True)
    
    accepted = 0
    outputwords, outputprobs = [], []
    for acc_seq in accepted_sequences:
        seq_string = acc_seq.to_words(reverse_tokenizer,end_idx)
        if seq_string not in outputwords:
            outputwords.append(seq_string)
            outputprobs.append(acc_seq.probsum())
            accepted += 1
            if accepted >= top_n:
                break
    output = list(zip(outputwords, outputprobs))
    return output

In [None]:
im = Image.open("../data/yelp_photos/photos/q82QFxvqPecnz8FWv_TxCw.jpg")

In [None]:
im

In [None]:
generate_predictions_beam_id("q82QFxvqPecnz8FWv_TxCw", model2, reverse_tokenizer, width = 5,
                             num_neighbors=3, top_n = 3, ignore_idx = [4,61,345], alpha = .5)

In [None]:
generate_predictions_beam_id("q82QFxvqPecnz8FWv_TxCw", model2, reverse_tokenizer, width = 5,
                             num_neighbors=3, top_n = 3, ignore_idx = [4,61,345], alpha = .8)

In [None]:
generate_predictions_beam_id("q82QFxvqPecnz8FWv_TxCw", model2, reverse_tokenizer, width = 5,
                             num_neighbors=3, top_n = 3, ignore_idx = [4,61,345], alpha = .9)

In [None]:
im = Image.open("../data/yelp_photos/photos/L5E4Qa0N5tPslTbxgVA7Gg.jpg")

In [None]:
im

In [None]:
generate_predictions_beam_id("L5E4Qa0N5tPslTbxgVA7Gg", model2, reverse_tokenizer, width = 5,
                             num_neighbors=3, top_n = 3, ignore_idx = [4,61,345], alpha = .7)

In [None]:
generate_predictions_beam_id("L5E4Qa0N5tPslTbxgVA7Gg", model2, reverse_tokenizer, width = 5,
                             num_neighbors=3, top_n = 3, ignore_idx = [4,61,345], alpha = .8)

In [None]:
generate_predictions_beam_id("L5E4Qa0N5tPslTbxgVA7Gg", model2, reverse_tokenizer, width = 5,
                             num_neighbors=3, top_n = 3, ignore_idx = [4,61,345], alpha = .9)

In [None]:
list(zip([1,2],[2,4]))

In [None]:
generate_predictions_beam_id("xd3WPCnSnaF4WvOK5X5kdQ", model2, reverse_tokenizer, width = 5, num_neighbors=3, alpha =1.5, top_n = 3)

In [None]:
def write_photos(ids, model1, alpha1, alpha2, folder = "examples/"):
    for photo_id in ids:
        im1 = Image.open("../data/yelp_photos/photos/%s.jpg"%photo_id)
        descs1 = generate_predictions_beam_id(photo_id, model1, 
                    reverse_tokenizer, width = 5, num_neighbors=3,  top_n = 3,
                                             ignore_idx = [4,61,345], alpha = alpha1)
        draw = ImageDraw.Draw(im1)
        for i in range(len(descs1)):
            draw.text((0,i*10),str(descs1[i])[1:-1],(255,0,0))
        
        im2 = Image.open("../data/yelp_photos/photos/%s.jpg"%photo_id)
        descs2 = generate_predictions_beam_id(photo_id, model1, 
                    reverse_tokenizer, width = 5, num_neighbors=3,  top_n = 3,
                                             ignore_idx = [4,61,345], alpha = alpha2)
        draw = ImageDraw.Draw(im2)
        for i in range(len(descs2)):
            draw.text((0,i*10),str(descs2[i])[1:-1],(255,0,0))
        
        
        total_width = im1.size[0] + im2.size[0]
        max_height = max(im1.size[1] , im2.size[1])
        new_im = Image.new('RGB', (total_width, max_height))
        x_offset = 0
        for im in [im1,im2]:
            new_im.paste(im, (x_offset,0))
            x_offset += im.size[0]
        
        new_im.save("%s%s.jpg"%(folder,photo_id))

In [None]:
mkdir examples


In [None]:
photo_dir = "../data/yelp_photos/photos/"

In [None]:
im = Image.open("../data/yelp_photos/photos/To6WAH5Rtok5ORBLyopnAw.jpg")



In [None]:
descs = generate_predictions_beam_id("To6WAH5Rtok5ORBLyopnAw",
                                     model1, reverse_tokenizer, width = 5, num_neighbors=3, alpha =1.5, top_n = 3)

In [None]:
draw = ImageDraw.Draw(im)

draw.text((0, 0),str(descs[0])[1:-1],(255,0,0))
draw.text((0, 10),str(descs[1])[1:-1],(255,0,0))
draw.text((0, 20),str(descs[2])[1:-1],(255,0,0))


In [None]:
im

In [None]:
np.random.seed(80)
choices = np.random.choice(valid_ids, 100)
choices

In [None]:
mkdir alpha7vs8

In [None]:
mkdir alpha6vs7

In [None]:
write_photos(choices,model2,alpha1 = .6, alpha2 = .7, folder = "alpha6vs7/" )

In [None]:
write_photos(ids, model1, alpha1, alpha2, folder = "examples/"):

In [None]:
all_images = [Image.open(photo_dir + c + ".jpg") for c in choices]

In [None]:
for i in range(len(choices)):
    desc = generate_predictions_id(choices[i], model = model, 
                               tokenizer = tokenizer, reverse_tokenizer = reverse_tokenizer)
    im = all_images[i]
    draw = ImageDraw.Draw(im)
  
    draw.text((0, 0),desc,(255,255,255))
    im.save("/Users/timibennatan/Desktop/funny_images/%s.png"%(choices[i]), "PNG")

---

In [None]:
im = Image.open("../data/yelp_photos/photos/J4kY0vanHHvkjpim7vr8nA.jpg")
im

In [None]:
im = Image.open("../data/yelp_photos/photos/J4kY0vanHHvkjpim7vr8nA.jpg")
im

In [None]:
im = Image.open('../data/yelp_photos/photos/J4kY0vanHHvkjpim7vr8nA.jpg')
im

In [None]:
im = Image.open("../data/yelp_photos/photos/J4kY0vanHHvkjpim7vr8nA.jpg")
im

In [None]:
im = Image.open('../data/yelp_photos/photos/795n_ZFZFum0R6IMXv4iow.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/YSixHEXmKUt47ws43y1ckA.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/HZ9M0E7e3MkGVqsWay9MJQ.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/N8cpFCz6XnkcWZ_vvuzHUg.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/GIj4ab48r_rHqdMHtPfEwQ.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/f6PktcNbaBU5XJdDmwsuBA.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/FmCbppb6mM73ZkTtVYQAZA.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/c5svSfbZTSEi5F8fv4n3og.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/DVU9SwtVRfpDGvTwJiKo-Q.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/f_tVArgZQ7941ggF3Rvt6A.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/FCkWALul8LBRMQppYq8tvg.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/q1jtcgo0j-ukiO1ANHkgGg.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/bkbfhk_tn4NtiiCDbgLWqg.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/tcpjFKWdLrHzzUujORqsEQ.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/iGefVUYU9dq5WC7yGjanJg.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/4ug7V8ep_SsNfFzuZi4pGw.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/SXVZ2eNglIexXlETW3_WNw.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/l3X2RU4K9So7zpRPyE47Og.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/0WEM3KzBtwBq21232FDDwQ.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/fXfqzKdLWyLTFWk4sJ7HkQ.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/AWHas5jH6AlHLZ78LoLL-w.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/Y8P-q4UUfzJaoEDo6_Zerg.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/bb2poXMhUXW2bRK0jjosHw.jpg')
im

In [None]:
im = Image.open('../data/yelp_photos/photos/O4C7riqw83InALQEF50GoQ.jpg')
im