**Goal**: 

Create a multi-modal model concatenating both embeddings and adding a classification layer 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from glob import glob
from tqdm import tqdm
import data_science_functions as ds
import project_tools as pt

import tensorflow as tf
import tensorflow_hub as hub

# os.environ["TF_KERAS"]='1'
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

2023-06-20 13:22:28.995153: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-20 13:22:29.380509: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-20 13:22:29.382903: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Num GPUs Available:  0


2023-06-20 13:22:32.489069: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


# Build the multimodal model

In [2]:
# Load the mobilenet model for feature extraction
pre_trained_model = tf.keras.applications.mobilenet.MobileNet(
    weights="imagenet",
    input_shape=(224, 224, 3)
)

image_model = tf.keras.models.Model(
    pre_trained_model.layers[0].output,
    pre_trained_model.layers[-2].output,
    name='Image_feature_extractor_mobilenet'
)

for layer in image_model.layers:
    layer.trainable = False

image_model.summary()


Model: "Image_feature_extractor_mobilenet"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 conv1 (Conv2D)              (None, 112, 112, 32)      864       
                                                                 
 conv1_bn (BatchNormalizatio  (None, 112, 112, 32)     128       
 n)                                                              
                                                                 
 conv1_relu (ReLU)           (None, 112, 112, 32)      0         
                                                                 
 conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)     288       
                                                                 
 conv_dw_1_bn (BatchNormaliz  (None, 112, 112, 32)     128       
 ation)                          

In [3]:
# Load USE for feature extraction
sentence_encoding_layer = hub.KerasLayer(
    "https://tfhub.dev/google/universal-sentence-encoder/4",
    input_shape= [],
    trainable=False,
    dtype=tf.string,
    name='U.S.E',
)

inputs = tf.keras.layers.Input(shape=(),dtype='string', name='input_layer')
outputs = sentence_encoding_layer(inputs)

NLP_model = tf.keras.Model(inputs, outputs, name='USE_feature_extractor')
NLP_model.summary()


Model: "USE_feature_extractor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None,)]                 0         
                                                                 
 U.S.E (KerasLayer)          (None, 512)               256797824 


2023-06-20 13:23:39.398067: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype string
	 [[{{node inputs}}]]
2023-06-20 13:23:39.486837: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder' with dtype string and shape [?]
	 [[{{node Placeholder}}]]


                                                                 
Total params: 256,797,824
Trainable params: 0
Non-trainable params: 256,797,824
_________________________________________________________________


In [18]:
n_classes = 7

# Concatenate the outputs of the 2 models and add layers to 
# make it a classifier for our task.
merged = tf.keras.layers.Concatenate(name='join_embeddings')(
    [image_model.output, NLP_model.output]
)
# merged = tf.keras.layers.Dropout(0.1)(merged)
# merged = tf.keras.layers.Dense(256, activation='relu')(merged)
merged = tf.keras.layers.Dropout(0.3, name='dropout0.3')(merged)
merged = tf.keras.layers.Dense(
    n_classes,
    activation='softmax',
    name='classification_layer',
)(merged)

model = tf.keras.models.Model(
    [image_model.layers[0].output, NLP_model.layers[0].output],
    merged,
    name='Joint_embeddings_classifier'
)

In [19]:
model.summary()

Model: "Joint_embeddings_classifier"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 conv1_bn (BatchNormalization)  (None, 112, 112, 32  128         ['conv1[0][0]']                  
                                )                                       

In [None]:

BATCH_SIZE = 16
# define a decreasing lr_schedule strategy
STEPS_PER_EPOCH = (97*7) // BATCH_SIZE

lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*5,
  decay_rate=1,
  staircase=False)

def get_optimizer():
  return tf.keras.optimizers.Adam(lr_schedule)

model.compile(optimizer=get_optimizer(),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy'],)

# Build the tf.data.dataset

## Reorganize directories to have description and image in the same directory

In [20]:
# load descriptions
df = (
    pd.read_csv('text_data.csv.gz', index_col='product_id')
    .drop('name', axis=1)
)
df.shape

(1050, 2)

In [80]:
df.head(3)

Unnamed: 0_level_0,description,category
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1
55b85ea15a1536d46b7190ad6fff8ce7,Elegance Polyester Multicolor Abstract Eyelet ...,home_furnishing
7b72c92c2f6c40268628ec5f14c6d590,Sathiyas Cotton Bath Towel. Specifications of ...,baby_care
64d5d4a258243731dc7bbb1eef49ad74,Eurospa Cotton Terry Face Towel Set. Key Featu...,baby_care


In [75]:
prod_id = df.index[0]

In [76]:
glob(f'./Flipkart_products_images/*/{prod_id}*')

['./Flipkart_products_images/home_furnishing/55b85ea15a1536d46b7190ad6fff8ce7.jpg']

In [90]:
# os.mkdir('./Flipkart_products')
# !cp -r ./Flipkart_products_images2/* ./Flipkart_products

In [91]:
# In the testing, training, and validation directories :
# - Find the directory of an image.
# - Create a directory with the id of the product (suffix in suffix.jpg) 
# in this directory.
# - Move the image there (one layer deeper).
# - Create a text file with the description + name of the product
# aside the image.
for prod_id in df.index:
    im_path = glob(f'./Flipkart_products/*/*/{prod_id}*')
    if im_path:
        # get the string
        im_path = im_path[0]
        # split into directory and image name
        dir_, im_name = os.path.split(im_path)
        # create a directory in dir_ with the product_id
        new_dir = os.path.join(dir_,prod_id)
        os.mkdir(new_dir)
        # move the image in that directory
        os.replace(im_path, os.path.join(new_dir, im_name))
        
        text_filename = os.path.join(new_dir, f'{prod_id}.txt')
        desc = df.description.loc[prod_id]
        with open(text_filename, 'w') as f:
            f.write(desc)

    
    else:
        print(f'{prod_id} was not found in the directories')


In [94]:
!tree --dirsfirst ./Flipkart_products/testing/baby_care

[01;34m./Flipkart_products/testing/baby_care[0m
├── [01;34m0bff0eda7a6677dc1acb0477a1f7a121[0m
│   ├── [01;35m0bff0eda7a6677dc1acb0477a1f7a121.jpg[0m
│   └── 0bff0eda7a6677dc1acb0477a1f7a121.txt
├── [01;34m27b7851ae3dfc2ae8c1958cebe93bf0b[0m
│   ├── [01;35m27b7851ae3dfc2ae8c1958cebe93bf0b.jpg[0m
│   └── 27b7851ae3dfc2ae8c1958cebe93bf0b.txt
├── [01;34m3dfdf277bccb74532c0387adfed2897d[0m
│   ├── [01;35m3dfdf277bccb74532c0387adfed2897d.jpg[0m
│   └── 3dfdf277bccb74532c0387adfed2897d.txt
├── [01;34m3e2b2a04696f7d83a7835e9894d79df7[0m
│   ├── [01;35m3e2b2a04696f7d83a7835e9894d79df7.jpg[0m
│   └── 3e2b2a04696f7d83a7835e9894d79df7.txt
├── [01;34m44218245b37581c534038cc27d081093[0m
│   ├── [01;35m44218245b37581c534038cc27d081093.jpg[0m
│   └── 44218245b37581c534038cc27d081093.txt
├── [01;34m49f7763dc9c7ece284a1ae27166f8c6c[0m
│   ├── [01;35m49f7763dc9c7ece284a1ae27166f8c6c.jpg[0m
│   └── 49f7763dc9c7ece284a1ae27166f8c6c.txt
├── [01;34m5325213d84c80ad310b6e4b37a1ac824

# 