# Multi-Input Models Learning
## A multi-model deep learning approach for image recognition

#### This Jupyter notebook is for the article [Deep Multi-Input Models Transfer Learning for Image and Word Tag Recognition](https://towardsdatascience.com/deep-multi-input-models-transfer-learning-for-image-and-word-tag-recognition-7ae0462253dc) in Towards Data Science.


In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os, shutil
import cv2
from os import listdir
from os.path import isfile, join
import string
import re
import nltk
from keras import layers
from keras import Input
from keras.models import Model
from keras import models

%matplotlib inline

In [0]:
#original_dataset_dir = '/Users/yuhuang/yuefeng/machine-learning/deep_learning/multi_input_models_learning/data/ESPGame100k'
#base_dataset_dir = '/Users/yuhuang/yuefeng/machine-learning/deep_learning/multi_input_models_learning/data/ESPGame100k_small'

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!pwd
%cd drive
!ls

/content
/content/drive
'My Drive'


In [0]:
%cd 'My Drive'

/content/drive/My Drive


In [0]:
%cd images/
%cd train/

/content/drive/My Drive/images
/content/drive/My Drive/images/train


In [0]:
import cv2
import os
images_list = listdir()
train = []
for i in images_list:
  train.append(cv2.imread(i))


In [0]:
len(train)

716

In [0]:
%cd ../test/

/content/drive/My Drive/images/test


In [0]:
images_list = listdir()
test = []
for i in images_list:
  test.append(cv2.imread(i))

In [0]:
len(test)

213

In [0]:
%cd ../val/
images_list = listdir()
val = []
for i in images_list:
  val.append(cv2.imread(i))

/content/drive/My Drive/images/val


In [0]:
len(val)

131

In [0]:
%cd ../../
import pickle
file = open('ingr.pkl','rb')
ingredients = pickle.load(file)
file.close()

/content/drive/My Drive


In [0]:
len(ingredients)

1060

### setup a small dataset

In [0]:
# if not os.path.isdir(base_dataset_dir):
#     os.mkdir(base_dataset_dir)
    
# small_label_path = os.path.join(base_dataset_dir, 'labels')
# small_image_path = os.path.join(base_dataset_dir, 'thumbnails')
# if not os.path.isdir(small_label_path):
#     os.mkdir(small_label_path)
# else:
#     shutil.rmtree(small_label_path)
#     os.mkdir(small_label_path)
    
# if not os.path.isdir(small_image_path):
#     os.mkdir(small_image_path)
# else:
#     shutil.rmtree(small_image_path)
#     os.mkdir(small_image_path)

In [0]:
# copy label files
# for fname in original_label_files[:2000]:
#     src = os.path.join(original_label_path, fname)
#     dst = os.path.join(small_label_path, fname)
#     shutil.copyfile(src, dst)

In [0]:
# copy image files
# for fname in original_label_files[:2000]:
#     img_fname = fname[:-5]
#     src = os.path.join(original_image_path, img_fname)
#     dst = os.path.join(small_image_path, img_fname)
#     shutil.copyfile(src, dst)

In [0]:
import json
det_ingrs = json.load(open('det_ingrs.json', 'r'))

In [0]:
len(det_ingrs)

1060

In [0]:
label_map = {'id' : [], 'word_tags' : [], 'word_tags_nostop' : [], 'word_tags_clean' : []}
for i in range(len(det_ingrs)): 
  label_map['id'].append(det_ingrs[i]['id'])
  lis = []
  for j in range(len(det_ingrs[i]['ingredients'])):
    lis.append(det_ingrs[i]['ingredients'][j]['text'])
  label_map['word_tags'].append(lis)
  label_map['word_tags_nostop'].append(lis)
  label_map['word_tags_clean'].append(lis)
label_df = pd.DataFrame(label_map)
label_df.head()

Unnamed: 0,id,word_tags,word_tags_nostop,word_tags_clean
0,213449,"[ olive oil, green bell pepper,, sweet onion,...","[ olive oil, green bell pepper,, sweet onion,...","[ olive oil, green bell pepper,, sweet onion,..."
1,61270,"[ refrigerated gnocchi, unsalted butter, shal...","[ refrigerated gnocchi, unsalted butter, shal...","[ refrigerated gnocchi, unsalted butter, shal..."
2,216629,"[ ground turkey, can tomatoes, can tomato sa...","[ ground turkey, can tomatoes, can tomato sa...","[ ground turkey, can tomatoes, can tomato sa..."
3,216909,"[ jalapeno peppers, lengthwise, package cream...","[ jalapeno peppers, lengthwise, package cream...","[ jalapeno peppers, lengthwise, package cream..."
4,112239,"[ graham cracker crumbs, butter, melted, whit...","[ graham cracker crumbs, butter, melted, whit...","[ graham cracker crumbs, butter, melted, whit..."


In [0]:
# label_map = {'label_file' : [], 'word_tags' : []}
# for fname in listdir(): 
#     # f = join(small_label_path, fname)
#     f = fname
#     if isfile(f):
#         f = open(f)
#         label_map['label_file'].append(fname)
#         line = f.read().splitlines()
#         label_map['word_tags'].append(line)
# label_df = pd.DataFrame(label_map)
# label_df.head()

In [0]:
label_df.shape

(1060, 4)

In [0]:
# class Preprocessing(object):
#     def __init__(self, data, source_column_name='word_tags', target_column_name='word_tags_clean'):
#         self.data = data
#         self.source_column_name = source_column_name
#         self.feature_name = target_column_name
        
#     def remove_punctuation(self, text):
#         text_nopunct = "".join([char for char in text if char not in string.punctuation])# It will discard all punctuations
#         return text_nopunct
    
#     def tokenize(self, text):
#         # Match one or more characters which are not word character
#         tokens = re.split('\W+', text) 
#         return tokens
    
#     def remove_stopwords(self, tokenized_list):
#         # Remove all English Stopwords
#         stopword = nltk.corpus.stopwords.words('english')
#         text = [word for word in tokenized_list if word not in stopword]
#         return text
    
#     def stemming(self, tokenized_text):
#         ps = nltk.PorterStemmer()
#         text = [ps.stem(word) for word in tokenized_text]
#         return text
    
#     def lemmatizing(self, tokenized_text):
#         wn = nltk.WordNetLemmatizer()
#         text = [wn.lemmatize(word) for word in tokenized_text]
#         return text
    
#     def alnum(self, text):
#         def hasNumbers(inputString):
#             return any(char.isdigit() for char in inputString)
        
#         text = [word for word in text if len(word) > 0 and word.isalnum() and not word.isdigit() and not hasNumbers(word)]
#         return text
    
#     def tokens_to_string(self, tokens_string):
#         try:
#             list_obj = ast.literal_eval(tokens_string)
#             text = " ".join(list_obj)
#         except:
#             text = None
#         return text
    
#     def dropna(self):
#         feature_name = self.feature_name
#         if self.data[feature_name].isnull().sum() > 0:
#             column_list=[feature_name]
#             self.data = self.data.dropna(subset=column_list)
#             return self.data
        
#     def preprocessing(self):
#         nopunc = self.source_column_name + '_nopunc'
#         tokenized = self.source_column_name + '_tokenized'
#         nostop = self.source_column_name + '_nostop'
#         stemmed = self.source_column_name + '_stemmed'
#         lemmatized = self.source_column_name + '_lemmatized'
#         self.data[nostop] = self.data[self.source_column_name].apply(lambda x: self.remove_stopwords(x))
#         self.data[self.feature_name] = self.data[nostop].apply(lambda x: self.alnum(x))  
#         return self.data
    
#     def save(self, filepath="./data/tags_cleaned.csv"):
#         self.data.to_csv(filepath, index=False, sep=',')  
        
#     def load(self, filepath="./data/tags_cleaned.csv"):
#         self.data = pd.read_csv(filepath)
#         return self.data

In [0]:
# import nltk
# nltk.download('stopwords')

In [0]:
# pp = Preprocessing(label_df.copy(), source_column_name='word_tags', target_column_name='word_tags_clean')
# label_df_clean = pp.preprocessing()

In [0]:
# label_df_clean.head()

In [0]:
# non_empty_tags = label_df_clean['word_tags_clean'].apply(lambda x: len(x) > 0)
# label_df_clean = label_df_clean[non_empty_tags]

In [0]:
# label_df_clean.head()

In [0]:
# label_df_clean.shape

In [0]:
num_of_samples = label_df.shape[0]
num_of_samples

1060

## GloVe
* glove.6B.zip: https://nlp.stanford.edu/projects/glove/

In [0]:
!pip install glove_python
import itertools
from gensim.models.word2vec import Text8Corpus
from glove import Corpus, Glove
# sentences and corpus from standard library
#sentences = list(itertools.islice(Text8Corpus('text8'),None))
corpus = Corpus()
# fitting the corpus with sentences and creating Glove object
corpus.fit(label_df['word_tags_clean'], window=10)
glove = Glove(no_components=100, learning_rate=0.05)
# fitting to the corpus and adding standard dictionary to the object
glove.fit(corpus.matrix, epochs=30, no_threads=4, verbose=True)
glove.add_dictionary(corpus.dictionary)

Collecting glove_python
[?25l  Downloading https://files.pythonhosted.org/packages/3e/79/7e7e548dd9dcb741935d031117f4bed133276c2a047aadad42f1552d1771/glove_python-0.1.0.tar.gz (263kB)
[K     |█▎                              | 10kB 19.3MB/s eta 0:00:01[K     |██▌                             | 20kB 2.2MB/s eta 0:00:01[K     |███▊                            | 30kB 3.2MB/s eta 0:00:01[K     |█████                           | 40kB 2.1MB/s eta 0:00:01[K     |██████▎                         | 51kB 2.6MB/s eta 0:00:01[K     |███████▌                        | 61kB 3.1MB/s eta 0:00:01[K     |████████▊                       | 71kB 3.6MB/s eta 0:00:01[K     |██████████                      | 81kB 4.1MB/s eta 0:00:01[K     |███████████▏                    | 92kB 4.6MB/s eta 0:00:01[K     |████████████▌                   | 102kB 3.5MB/s eta 0:00:01[K     |█████████████▊                  | 112kB 3.5MB/s eta 0:00:01[K     |███████████████                 | 122kB 3.5MB/s eta 0:

In [0]:
glove.word_vectors[glove.dictionary['triple sec']]

array([ 5.71214887e-03, -5.89350519e-03, -1.60091226e-03,  8.28913193e-04,
       -1.00296716e-03,  1.96757490e-03,  5.09616339e-03,  1.96024513e-03,
       -6.32402205e-04,  8.53898246e-04, -4.13814571e-03,  6.13668728e-03,
       -1.00000454e-02, -9.27100371e-03,  1.59657527e-03,  4.26766266e-04,
       -1.30010706e-03,  4.03243917e-03, -7.79037616e-03, -1.76995262e-03,
       -4.21931615e-04,  6.07639316e-03,  1.11135988e-03, -5.40745776e-03,
       -5.15257178e-03, -7.56238519e-03, -1.28157651e-03, -2.77036419e-03,
       -4.70744496e-03, -3.32821978e-03, -1.24731020e-02, -4.29327186e-03,
       -1.09202058e-03,  3.03794577e-03, -4.51211296e-04,  4.46345799e-03,
        5.29838514e-04,  4.98488422e-03, -8.15194692e-03,  1.61505853e-03,
        7.05844851e-03, -5.15629398e-03, -7.98486510e-03, -3.01534647e-03,
       -4.79282533e-03,  6.56547143e-03,  3.05603648e-03, -1.41411484e-03,
       -4.28856147e-03,  4.11186131e-04, -3.30255892e-03, -5.53285997e-03,
       -8.43137765e-03, -

In [0]:
for i in glove.dictionary:
  print (glove.word_vectors[glove.dictionary[i]].shape)

In [0]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

maxlen = 100
training_samples = 742
tag_vocabulary_size = 10000
max_words = tag_vocabulary_size

In [0]:
# #glove_dir = '/Users/yuhuang/yuefeng/machine-learning/deep_learning/multi_input_models_learning/data/'

embeddings_index = {}
f = open('glove.6B.100d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

Found 400000 word vectors.


In [0]:
tokenizer = Tokenizer(num_words=max_words)
texts = []
for tag_list in label_df['word_tags']:
    texts.append(' '.join(tag_list))
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print('Found {} unique tokens'.format(len(word_index)))
tag_data = pad_sequences(sequences, maxlen=maxlen)

Found 1386 unique tokens


In [0]:
word_index.items()

dict_items([('salt', 1), ('pepper', 2), ('ground', 3), ('sugar', 4), ('white', 5), ('butter', 6), ('oil', 7), ('cheese', 8), ('black', 9), ('flour', 10), ('all', 11), ('purpose', 12), ('onion', 13), ('water', 14), ('or', 15), ('milk', 16), ('sauce', 17), ('powder', 18), ('can', 19), ('eggs', 20), ('cream', 21), ('olive', 22), ('dried', 23), ('vanilla', 24), ('extract', 25), ('baking', 26), ('package', 27), ('egg', 28), ('vegetable', 29), ('red', 30), ('clovegarlic', 31), ('garlic', 32), ('shredded', 33), ('vinegar', 34), ('brown', 35), ('green', 36), ('grated', 37), ('beef', 38), ('dry', 39), ('large', 40), ('cut', 41), ('lemon', 42), ('cayenne', 43), ('juice', 44), ('cinnamon', 45), ('soda', 46), ('softened', 47), ('for', 48), ('bread', 49), ('chocolate', 50), ('melted', 51), ('fluid', 52), ('broth', 53), ('parsley', 54), ('chicken', 55), ('mustard', 56), ('heavy', 57), ('mix', 58), ('hot', 59), ('cheddar', 60), ('divided', 61), ('tomatoes', 62), ('wine', 63), ('potatoes', 64), ('toma

In [0]:
tag_data.shape

(1060, 100)

In [0]:
embedding_dim = 100

embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if i < max_words:
        if embedding_vector is not None:
            # Words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector

In [0]:
tag_data.shape

(1060, 100)

In [0]:
tag_input = Input(shape=(None,), dtype='int32', name='tag')
embedded_tag = layers.Embedding(max_words, embedding_dim)(tag_input)
encoded_tag = layers.LSTM(256)(embedded_tag)

NameError: ignored

## Conv2D

In [0]:
from keras.applications import VGG16

image_input = Input(shape=(150, 150, 3), name='image')
vgg16 = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3))(image_input)
x = layers.Flatten()(vgg16) 
x = layers.Dense(64, activation='relu')(x)

In [0]:
concatenated = layers.concatenate([x, encoded_tag], axis=-1)
output = layers.Dense(1, activation='relu')(concatenated)

model = Model([image_input, tag_input], output)

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

In [0]:
model.summary()

Model: "model_8"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image (InputLayer)              (None, 150, 150, 3)  0                                            
__________________________________________________________________________________________________
vgg16 (Model)                   (None, 4, 4, 512)    14714688    image[0][0]                      
__________________________________________________________________________________________________
tag (InputLayer)                (None, None)         0                                            
__________________________________________________________________________________________________
flatten_5 (Flatten)             (None, 8192)         0           vgg16[1][0]                      
____________________________________________________________________________________________

In [0]:
# model.layers[1].trainable = False # freeze VGG16 convolutional base
model.layers[4].set_weights([embedding_matrix])
model.layers[4].trainable = False # freeze GloVe word embedding

In [0]:
%cd ../

/content/drive/My Drive


In [0]:
!pwd

/content/drive/My Drive


In [0]:
layer1 = json.load(open('layer1.json', 'r'))

In [0]:
len(layer1)

1060

In [0]:
dim = (150, 150)
X_image_train = []
X_tag_train = tag_data
y_train = []
%cd allphotos/
for fname in listdir():
    # fpath = os.path.join(small_image_path, fname)
    im = cv2.imread(fname)
    im_resized = cv2.resize(im, dim, interpolation = cv2.INTER_AREA)
    #im_resized = im
    X_image_train.append(im_resized)
    y_train.append(1)

[Errno 2] No such file or directory: 'allphotos/'
/content/drive/My Drive/allphotos


In [0]:
len(X_image_train)

1060

In [0]:
len(y_train)

1060

In [0]:
X_image_train[0].shape

(150, 150, 3)

In [0]:
# add wrong tag samples
num_negative_samples = len(y_train)
for i in range(num_negative_samples):
    image = X_image_train[i]
    X_image_train.append(image)
    j = (i + 1) % num_negative_samples # get a different tag
    tag = X_tag_train[j]
    X_tag_train = np.append(X_tag_train, tag) 
    y_train.append(0)

In [0]:
len(X_image_train)

2120

In [0]:
X_image_train = np.array(X_image_train)
X_tag_train   = np.array(X_tag_train)
y_train       = np.array(y_train)

In [0]:
X_image_train_n = (X_image_train - np.mean(X_image_train, axis = 0) )/np.ndarray.max(X_image_train, axis = 0)
X_tag_train_n = (X_tag_train - np.mean(X_tag_train, axis = 0))/np.ndarray.max(X_tag_train, axis = 0)

In [0]:
np.shape(y_train)

(2120,)

In [0]:
X_image_train[0].shape

(150, 150, 3)

In [0]:
perm = np.arange(y_train.shape[0])
np.random.shuffle(perm)
X_image_train = X_image_train[perm]
X_tag_train   = X_tag_train[perm]
y_train       = y_train[perm]

In [0]:
X_image_train.shape, X_tag_train.shape, y_train.shape

((2120, 150, 150, 3), (2120,), (2120,))

In [0]:
X_image_tr = X_image_train[:1700]
X_image_te = X_image_train[1700:1900]
X_image_val = X_image_train[1900:]
X_tag_tr = X_tag_train[:1700]
X_tag_te = X_tag_train[1700:1900]
X_tag_val = X_tag_train[1900:]
y_tr = y_train[:1700]
y_te = y_train[1700:1900]
y_val = y_train[1900:]

In [0]:
model.fit([X_image_tr, X_tag_tr], y_tr, epochs=10, batch_size=64)

Epoch 1/10
 384/1700 [=====>........................] - ETA: 18:24 - loss: 8.0171 - acc: 0.5026

KeyboardInterrupt: ignored

In [0]:
from keras.models import model_from_json

# serialize model to JSON
model_json = model.to_json()
with open("multi-input-models-epoch-30.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("multi-input-models-epoch-30.h5")
print("Saved model to disk")
 
# load json and create model
json_file = open('multi-input-models-epoch-30.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("multi-input-models-epoch-30.h5")
print("Loaded model from disk")