## Train a Multi-label classification model for furniture type and style
* We will use transfer learning to train a classification model to obtain a feature layer for similarity identification.

### Dataset
* Hozzu dataset with 90,298 images from 6 categories of furniture across 17 different styles. 
* Source: https://cvml.comp.nus.edu.sg/furniture/index.html
* We have limited the data to 200 images on 15 styles
* in total 21 classes for prediction

the model will predict the style of the furniture
then we save the model and use its one before last layer as embeding vector

In [None]:
import os
from pathlib import Path
from imutils import paths
import pandas as pd
import numpy as np
import random
import argparse
import random
import pickle

import matplotlib.pyplot as plt
%matplotlib inline
          
#openCV
import cv2                                

In [None]:
#Tensor Flow
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import vgg16

from tensorflow.keras import callbacks
from tensorflow.keras.applications import xception
from tensorflow.keras.models import Model

# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv1D,Conv2D, MaxPooling2D
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


In [None]:
labels = ['tables', 'sofas', 'lamps', 'chairs', 'dressers', 'beds']

In [None]:
#get the tag and images from csv
pathroot = "data/"
tags= []
images = []
path = []

for ptype in labels:
    try:
        for files in os.listdir(pathroot+ptype):
            paths = pathroot+ptype+"/"+files
            if os.path.isdir(paths):
                for filename in os.listdir(paths):
                    if filename.endswith(".jpg"):
                        img_path = os.path.join(paths, filename)
                        img_arr = cv2.imread(img_path)
                        img_arr = cv2.resize(img_arr,(100,100))
                        path.append(ptype+"/"+files+"/"+filename)
                        images.append(img_arr)
                        tags.append([ptype,files.lower()])
                    else:
                        continue
    except:
        continue


In [None]:
images,tags = shuffle(images,tags,random_state=20)


### Transfer learning
* Train on a smaller dataset 
* Taking features learned from ImageNet dataset
* Dataset of over 14 million images belonging to 1000 classes


### Multi-label classification
* Multi-label classification is a predictive modeling involves predicting zero or more mutually non-exclusive class labels 
* Multi Label Binarizer: transform the classes to list of binary number using multi label binarizer

In [None]:
mlb = MultiLabelBinarizer()
tags = mlb.fit_transform(tags)

In [None]:
pd.DataFrame(list(mlb.classes_)).to_csv('classes.csv',index=False)

### Allocation of train/test/validation data
* 70% Training 15% Testing 15% Validation

In [None]:
fullsize = len(images)
trainsize = int(fullsize*0.7)
testsize = int(trainsize+fullsize*0.15)

In [None]:
X_train=np.array(images[0:trainsize])
y_train=np.array(tags[0:trainsize])
X_test=np.array(images[trainsize:testsize])
y_test=np.array(tags[trainsize:testsize])
X_val=np.array(images[testsize:])
y_val=np.array(tags[testsize:])

In [None]:
epo = 30
init_lr = 1e-3
bs = 32
image_dims = (100, 100, 3)

### Using Image Data Generator for Image augmentation
*  Flow method: Takes data & label arrays, generates batches of augmented data.

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=25,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest")


test_datagen = ImageDataGenerator(
    rescale=1./255)


train_generator = train_datagen.flow(X_train, y_train, batch_size=bs)
test_generator = test_datagen.flow(X_test, y_test, batch_size=bs)


### Model Evaluation Overview
#### VGG16 with weights imagenet
* Model 1: VGG16 with all layers freezed, one 256 nodes fully-connected layer

### Model 1 -  VGG16 with all layers freezed, one 256 nodes fully-connected layer

In [None]:
from tensorflow.keras.models import Model

base_model = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(100, 100, 3))

for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = Dropout(0.5)(x)
x = Flatten()(x)
x = Dense(256,activation='relu')(x)
predictions = Dense(21, activation='sigmoid')(x)

model0 = Model(inputs=base_model.input, outputs=predictions)

print (model0.summary())

In [None]:
opt = Adam(lr=init_lr)
model0.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])    

cp_callback = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5),
               tf.keras.callbacks.ModelCheckpoint(filepath='model0.{epoch:02d}-{val_loss:.2f}.h5',verbose=1),
               tf.keras.callbacks.TensorBoard(log_dir='./logs'),]

# Train the model with the new callback

hist0 = model0.fit(train_generator,steps_per_epoch=len(X_train)/bs,
                 epochs=epo,
                 validation_data=test_generator,
                 callbacks=cp_callback,verbose=1)  # Pass callback to training

In [None]:
model0.save("multilabel0")

with open('trainhist0', 'wb') as file_pi:
      pickle.dump(hist0.history, file_pi)

In [None]:
#   Plot data to see relationships in training and validation data
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot') 
%matplotlib inline

epoch_list = list(range(1, len(hist0.history['accuracy']) + 1))  # values for x axis [1, 2, ..., # of epochs]
plt.plot(epoch_list, hist0.history['accuracy'], epoch_list, hist0.history['val_accuracy'])
plt.legend(('Training Accuracy', 'Validation Accuracy'))
plt.xlabel('epoch')
plt.show()

epoch_list = list(range(1, len(hist0.history['loss']) + 1))  # values for x axis [1, 2, ..., # of epochs]
plt.plot(epoch_list, hist0.history['loss'], epoch_list, hist0.history['val_loss'])
plt.legend(('Training Loss', 'Validation Loss'))
plt.xlabel('epoch')
plt.show()


In [None]:
val_list = []
val_generator = test_datagen.flow(X_val, y_val, batch_size=bs)

#   Evaluate the model with the test data to get the scores on "real" data.
score0 = model0.evaluate(val_generator, verbose=0)

print('Test loss:', score0[0])
print('Test accuracy:', score0[1])

## from now to the rest there was more models to try but i deleted them because they are not necessery