In [1]:
# tf tools
import tensorflow as tf

# image processsing
from tensorflow.keras.preprocessing.image import (load_img,
                                                  img_to_array,
                                                  ImageDataGenerator)
# VGG16 model
from tensorflow.keras.applications.vgg16 import (preprocess_input,
                                                 decode_predictions,
                                                 VGG16)
# cifar10 data - 32x32
from tensorflow.keras.datasets import cifar10

# layers
from tensorflow.keras.layers import (Flatten, 
                                     Dense, 
                                     Dropout, 
                                     BatchNormalization)
# generic model object
from tensorflow.keras.models import Model

# optimizers
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import SGD

#scikit-learn
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report

# for plotting
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

2023-04-19 18:50:49.840357: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


- LOad images in with specific size 
- create modal with input size 
- maybe normalise images 
- binarize labels.

# Json FIles

In [14]:
test_metadata = pd.read_json(os.path.join("..", "..", "..", "images", "metadata", "test_data.json"), lines=True) # Loading in JSON files of metadata
train_metadata = pd.read_json(os.path.join("..", "..", "..", "images", "metadata", "train_data.json"), lines=True)
val_metadata = pd.read_json(os.path.join("..", "..", "..", "images", "metadata", "val_data.json"), lines=True)

In [32]:
val_metadata

Unnamed: 0,image_url,image_path,brand,product_title,class_label,color
0,https://m.media-amazon.com/images/I/81LOPbFPiQ...,images/val/0.jpeg,Generic,Women's Khadi Cotton Saree With Blouse Piece (...,saree,
1,https://m.media-amazon.com/images/I/81Q8Oktw4s...,images/val/1.jpeg,Yashvi Designer,Women's Net Saree With Unstitched Blouse Piece,saree,
2,https://m.media-amazon.com/images/I/613S1YGCmo...,images/val/2.jpeg,Aarrah,Georgette Strip Print Saree[S_SHIKHA30021SR02_...,saree,
3,https://m.media-amazon.com/images/I/819budhQl1...,images/val/3.jpeg,MIMOSA,Women's Patola Style Art Silk Saree (Green),saree,
4,https://m.media-amazon.com/images/I/61cAZ94ZQV...,images/val/4.jpeg,Generic,Women's Pure Hand Block Patola Printed Cotton ...,saree,
...,...,...,...,...,...,...
7495,https://m.media-amazon.com/images/I/614GvuaIJu...,images/val/7495.jpeg,ishin,Women's Rayon Pink Printed A-Line Kurta Palazz...,women_kurta,
7496,https://m.media-amazon.com/images/I/81WAbf4-ao...,images/val/7496.jpeg,Cotton Culture,Women's Kesar Pink Cotton Straight Kurta,women_kurta,
7497,https://m.media-amazon.com/images/I/61unbmOA7k...,images/val/7497.jpeg,Bae's Wardrobe,Printed Rayon Anarkali Kurti with Plazzo Pant ...,women_kurta,
7498,https://m.media-amazon.com/images/I/71zULAnVNY...,images/val/7498.jpeg,Aarika,Girl's Regular Dress,women_kurta,


## Code with flow from dir

In [34]:

# Data augmentaion 
# ImageDataGenerator from tensorflow 
datagen = ImageDataGenerator(horizontal_flip=True, # Flip it horizontally around the access
                             rotation_range=20, # Rotate the image randomly 20 degress around the access
                             rescale = 1/255 # rescale it between 0-1
)
# Take your images, create a pipelie (Take an image modify it, pass it on)

In [43]:
directory_train = os.path.join("..","..","..") # Defining path to images, the rest of the path is defined in the metadata
directory_validation = os.path.join("..","..","..")
directory_test = os.path.join("..","..","..")



In [76]:
X_train_tf = datagen.flow_from_dataframe( # using keras flow  from dataframe 
    dataframe = train_metadata, # Defining dataframe 
    directory = directory_train, # Path to images 
    x_col = "image_path", # rest of the image path from dataframe 
    y_col = "class_label", # column label
    subset = "training", # what this data is 
    target_size=(80, 80), # image should be loaded in as size 
    color_mode="rgb", # colors 
    class_mode = "categorical", # One hot encoding the labels 
    batch_size = 128, # take images of batchs 128 at a time
    shuffle = True # shuffle the images around 
)

Found 91166 validated image filenames belonging to 15 classes.


In [73]:
val_tf = datagen.flow_from_dataframe(
    dataframe = val_metadata,
    directory = directory_validation,
    x_col = "image_path",
    y_col = "class_label",
    #subset = "validation",
    target_size=(80, 80),
    color_mode="rgb",
    class_mode = "categorical",
    batch_size = 128,
    shuffle = True
)

Found 7500 validated image filenames belonging to 15 classes.


In [42]:
test_datagen = ImageDataGenerator(
                                rescale = 1./255. # datagenerator for test, it only has to rescale the images 
)

In [74]:
test_tf = test_datagen.flow_from_dataframe(
    dataframe = test_metadata,
    directory = directory_test,
    x_col = "image_path",
    target_size=(80, 80),
    color_mode="rgb",
    class_mode = None,
    batch_size = 128,
    shuffle = False # do not shuffle the images 
)

Found 7500 validated image filenames.


# Model

In [77]:
# load model without classifier layers
model = VGG16(include_top=False, 
              pooling='avg',
              input_shape=(80, 80, 3))

# mark loaded layers as not trainable
for layer in model.layers:
    layer.trainable = False
    
# add new classifier layers
flat1 = Flatten()(model.layers[-1].output)
bn = BatchNormalization()(flat1) # Added batnormalization from tensorflow. Take the previouslayer, normalise the values, and than pass them on
class1 = Dense(256, 
               activation='relu')(bn) # Added new classification layer 
class2 = Dense(128, 
               activation='relu')(class1)
output = Dense(15, 
               activation='softmax')(class2)

# define new model
model = Model(inputs=model.inputs, 
              outputs=output)

# compile
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)
sgd = SGD(learning_rate=lr_schedule)

model.compile(optimizer=sgd,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
# summarize
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 80, 80, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 80, 80, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 80, 80, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 40, 40, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 40, 40, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 40, 40, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 20, 20, 128)       0   

### Train model

In [78]:
# fit the data generator to our images

# fits the model on batches with real-time data augmentation:
#H = model.fit(datagen.flow(X_train, y_train, batch_size=128, subset = "training"), # Creating a flow -- take the original images, some get modifyied. 
              #validation_data = datagen.flow(X_val, y_val, batch_size=128, subset = "validation"),
            #epochs=1) # change to 10

H = model.fit( # fitting the model to 
    X_train_tf, # training tensorflow dataframe 
    steps_per_epoch = len(X_train_tf), # take as many steps as the length of the dataframe 
    validation_data = val_tf, # Validation data 
    validation_steps = len(val_tf), 
    epochs = 10
)
# Possible to get image (stream the image) (stream it frow the dataGenerator) from the folder, instead of loading the image into the script. 

Epoch 1/10


2023-04-19 20:10:25.415532: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


  5/713 [..............................] - ETA: 1:48:35 - loss: 2.8740 - accuracy: 0.0844

KeyboardInterrupt: 