In [1047]:
!export CUDA_VISIBLE_DEVICES='2'

In [1048]:
import pickle
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
import torch
import json
import pandas as pd
import numpy as np
from tqdm import tqdm

In [1049]:
## using the train embeddings given
f = open('full/embeddings_train1.pkl', 'rb')
d_train = pickle.load(f)

In [1050]:
### extracting embedding in a sequence
df_train = pd.DataFrame({'id':d_train[2]})
df_train.reset_index(inplace=True)
df_train = df_train.set_index('id')
df_train.shape

(281598, 1)

In [1051]:
## using the train embeddings given
f = open('full/embeddings_test1.pkl', 'rb')
d_test = pickle.load(f)

In [1042]:
### extracting test embedding in a sequence

df_test = pd.DataFrame({'id':d_test[2]})
df_test.reset_index(inplace=True)
df_test = df_test.set_index('id')
df_test.shape

(60740, 1)

In [14]:
# Data maps to map text and images to each other
TEXT_LAYERS = 'recipe1M_layers/layer2.json'
f = open(TEXT_LAYERS, 'rb')
data_map = json.load(f)

In [17]:
# Data maps
data_map_list = []
for i in data_map:
    for j in i['images']:
        data_map_list.append([i['id'], j['id']])
        break
data_map_df = pd.DataFrame(data_map_list, columns=['id', 'image_id'])
data_map_df['id'] = data_map_df['id'].astype(str)
data_map_df.reset_index(inplace=True)
data_map_df.drop(columns=['index'], inplace=True)
data_map_df = data_map_df.set_index('id')
data_map_df.shape

(402760, 1)

In [19]:
# extracting text information for title
TEXT_LAYERS_1 = 'recipe1M_layers/layer1.json'
f = open(TEXT_LAYERS_1, 'rb')
text_map = json.load(f)
text_data_list = []
for i in text_map:
    text_data_list.append([i['id'], i['title']])
text_map_df = pd.DataFrame(text_data_list, columns=['id', 'title'])
text_map_df['id'] = text_map_df['id'].astype(str)
text_map_df.reset_index(inplace=True)
text_map_df = text_map_df.set_index('id')
text_map_df.head()

Unnamed: 0_level_0,index,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1
000018c8a5,0,Worlds Best Mac and Cheese
000033e39b,1,Dilly Macaroni Salad Recipe
000035f7ed,2,Gazpacho
00003a70b1,3,Crunchy Onion Potato Bake
00004320bb,4,Cool 'n Easy Creamy Watermelon Pie


In [41]:
train_data_map_df = df_train.join(data_map_df).join(text_map_df, lsuffix='_left', rsuffix='_right')
train_data_map_df['title'] = train_data_map_df['title'].str.lower()
train_data_map_df.head()

Unnamed: 0_level_0,index_left,image_id,index_right,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
000075604a,0,6bdca6e490.jpg,6,kombu tea grilled chicken thigh
00007bfd16,1,6409eab844.jpg,7,strawberry rhubarb dump cake
000095fc1d,2,a1374cdd98.jpg,8,yogurt parfaits
0000b1e2b5,3,cb1a684683.jpg,11,fennel-rubbed pork tenderloin with roasted fen...
0000c79afb,4,2f4b4c4452.jpg,12,pink sangria


In [42]:
test_data_map_df = df_test.join(data_map_df).join(text_map_df, lsuffix='_left', rsuffix='_right')
test_data_map_df['title'] = test_data_map_df['title'].str.lower()
test_data_map_df.head()

Unnamed: 0_level_0,index_left,image_id,index_right,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
00003a70b1,0,3e233001e2.jpg,3,crunchy onion potato bake
00047059be,1,1657f23729.jpg,61,butternut squash soup or bisque (roasting method)
00059477e2,2,3020f58577.jpg,83,gyro meat loaf w/ tzatziki sauce
000731b459,3,1b970a4a0d.jpg,105,"cauliflower ""mac & cheese"" bake"
0007a28fe7,4,6187a834d7.jpg,112,calico beans


In [36]:
train_text_features = torch.tensor(d_train[1])
train_img_features = torch.tensor(d_train[0])

test_text_features = torch.tensor(d_test[1])
test_img_features = torch.tensor(d_test[0])

train_text_features.shape, test_text_features.shape

(torch.Size([281598, 1024]), torch.Size([60740, 1024]))

In [1018]:
from cca_zoo.models.rcca import CCA
model = CCA(50)

In [1019]:
model = model.fit((train_text_features, train_img_features))

In [1020]:
trans_train_text_features, trans_train_img_features = model.transform((train_text_features, train_img_features))

In [1021]:
trans_test_text_features, trans_test_img_features = model.transform((test_text_features, test_img_features))

In [1060]:
full_data = []

In [1069]:
#finding the mean cake and cup-cake vector
all_cake = train_data_map_df[train_data_map_df['title'].str.contains('cake') & train_data_map_df['title'].str.contains('chocolate')]
just_cake = train_data_map_df[train_data_map_df['title'].str.contains('lasagna')]
cup_cake = train_data_map_df[train_data_map_df['title'].str.contains('salad') ]


cake_mean = np.mean(trans_train_text_features[just_cake['index_left']], axis=0)
cup_cake_mean = np.mean(trans_train_text_features[cup_cake['index_left']], axis=0)
just_cake.shape, cup_cake.shape

((1277, 4), (17210, 4))

In [1070]:
test_data_map_df[test_data_map_df['title'].str.contains('chicken lasagna')]

Unnamed: 0_level_0,index_left,image_id,index_right,title
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
26545a1c8f,9053,b4b09acf9a.jpg,154311,mexican-style chicken lasagna
26a43f0aac,9115,6ebdb2e7a3.jpg,155591,white chicken lasagna roll ups
26d336442c,9153,560a449b4a.jpg,156327,cajun chicken lasagna
2f433dfa9e,11161,13726cedaf.jpg,190144,buffalo chicken lasagna
50a268be5a,19159,19bb22e585.jpg,324485,spicy chicken lasagna roll-ups
731ff54f43,27292,5ead00c4f2.jpg,463094,taco fiesta chicken lasagna
7a1c278a06,28958,c6e18a2892.jpg,490975,chicken lasagna florentine
7b821a3049,29249,8dffd86a1a.jpg,496543,skillet chicken lasagna roll-ups
b530e1a7a3,43001,1d5578f785.jpg,728729,incredible chicken lasagna
b8f73297e3,43944,a3b448c4f7.jpg,744008,"buffalo chicken lasagna (aka buffalo ""dew-sagna"")"


In [1071]:
# doing the arithmetic operation for chocolate cake - cake + cupcake = chocolate cupcake
data = {}

_id = "d314d0f594"
data['test_image'] = {'id' : _id, 'title': test_data_map_df[test_data_map_df.index == _id]['title'].values[0], 'image_id': test_data_map_df[test_data_map_df.index == _id]['image_id'].values[0]}

chocolate_cake = test_img_features[test_data_map_df[test_data_map_df.index == _id]['index_left']]
chocolate_cake_mean = np.mean(trans_train_img_features[np.dot(chocolate_cake, train_img_features.T).argsort().reshape(-1, )[-4:]], axis=0)
nn4_df = train_data_map_df.iloc[np.dot(chocolate_cake, train_img_features.T).argsort().reshape(-1, )[-4:]]

cake_mean_nn4_df = train_data_map_df.iloc[np.dot(cake_mean, trans_train_img_features.T).argsort().reshape(-1, )[-4:]]
cup_cake_mean_nn4_df = train_data_map_df.iloc[np.dot(cup_cake_mean, trans_train_img_features.T).argsort().reshape(-1, )[-4:]]

data['closest'] = []
for i,j in zip(nn4_df['title'], nn4_df['image_id']):
    data['closest'].append([i, j])

data['closet_minus'] = []
for i,j in zip(cake_mean_nn4_df['title'], cake_mean_nn4_df['image_id']):
    data['closet_minus'].append([i, j])

data['closet_plus'] = []
for i,j in zip(cup_cake_mean_nn4_df['title'], cup_cake_mean_nn4_df['image_id']):
    data['closet_plus'].append([i, j])


chocolate_cup_cake = chocolate_cake_mean.reshape(1, -1) - 1 * cake_mean.reshape(1, -1) + 1 * cup_cake_mean.reshape(1, -1)
retrieved_df = train_data_map_df.iloc[np.argmax(np.dot(chocolate_cup_cake, trans_train_img_features.T))]
data['retr'] = {'title': retrieved_df['title'], 'image_id': retrieved_df['image_id']}
full_data.append(data)

In [1072]:
full_data

[{'test_image': {'id': '032752603d',
   'title': 'guinness chocolate cake',
   'image_id': '5b55b733a6.jpg'},
  'closest': [['chocolate-guinness cheesecake', 'ed01c82813.jpg'],
   ['sweet or sour cream chocolate cake', '1171377fd6.jpg'],
   ['straight to your hips chocolate cake', '0e9a3741ae.jpg'],
   ['vegan chocolate cake', 'fcca9a1d51.jpg']],
  'closet_minus': [["buttermilk devil's food cake with freshly squeezed orange cream cheese frosting",
    'f0c3e43775.jpg'],
   ['orange chocolate marble cake with orange buttercream frosting',
    'a549018052.jpg'],
   ['rum caramel cake (cake mix)', 'e327c921ce.jpg'],
   ['chocolate buttermilk cake', '2c5255ee09.jpg']],
  'closet_plus': [['fresh orange cupcakes with double chocolate frosting',
    'def4897df0.jpg'],
   ['hi-hat cupcake', 'b97c7b9132.jpg'],
   ['peppermint mocha cupcakes', '0aef51313a.jpg'],
   ['devils food cupcakes with vanilla buttercream', 'cfe534dad6.jpg']],
  'retr': {'title': 'chocolate cupcakes with chocolate cream c