In [111]:
import numpy as np
import pandas as pd

import os # used for navigating to image path
import imageio # used for writing images

from sklearn.preprocessing import LabelEncoder

import keras

#  Keras preprocessing
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.preprocessing import image_dataset_from_directory

# Keras modeling
from keras.models import Sequential
from keras.layers import  Lambda , Dense, Flatten, Dropout, Conv2D, MaxPool2D
from keras.callbacks import EarlyStopping
from keras.layers import BatchNormalization, Convolution2D , MaxPooling2D

In [112]:
train = pd.read_csv('train.csv', error_bad_lines=False, sep = '\t')
test = pd.read_csv('test.csv', error_bad_lines=False, sep = '\t')

In [113]:
train.head()

Unnamed: 0,imageid,label,productname
0,2653,Bags,Murcia Women Leather Office Grey Bag
1,55997,Others,Colorbar Velvet Matte Temptation Lipstick 24MA
2,2640,Shoes,Carlton London Men Brown Formal Shoes
3,40565,Topwear,W Women Maroon Kurta
4,38932,Bottomwear,Gini and Jony Girls Pink Leggings


In [114]:
train['image'] = train.apply(lambda row: str(row['imageid']) + ".jpg", axis=1)
test['image'] = test.apply(lambda row: str(row['imageid']) + ".jpg", axis=1)

In [115]:
path = 'images/'
batch_size = 40
target_size = (100,100)
epochs = 40
num_train_samples = train.shape[0]
num_test_samples = test.shape[0]

In [116]:

image_generator = ImageDataGenerator(
    validation_split=0.2)
print('Training data')
training_generator = image_generator.flow_from_dataframe(
    dataframe=train,
    directory=path,
    x_col="image",
    y_col="label",
    target_size=target_size,
    class_mode='categorical',
    color_mode="grayscale",
    batch_size=batch_size,
    subset="training"
)
print('Validation data')
validation_generator = image_generator.flow_from_dataframe(
    dataframe=train,
    directory=path,
    x_col="image",
    y_col="label",
    target_size=target_size,
    class_mode='categorical',
    color_mode="grayscale",
    batch_size=batch_size,
    subset="validation"
)
print('Test data')
test_image_gen = ImageDataGenerator()
test_generator = test_image_gen.flow_from_dataframe(
    dataframe=test,
    directory=path,
    x_col='image',
    y_col=None,
    class_mode=None,
    color_mode="grayscale",
    target_size=target_size,
    batch_size=batch_size,
    shuffle=False
)




Training data
Found 32353 validated image filenames belonging to 13 classes.
Validation data
Found 8088 validated image filenames belonging to 13 classes.
Test data
Found 4000 validated image filenames.


In [117]:
model = Sequential(
    [
        keras.Input(shape=(100, 100, 1)),
        Conv2D(32, kernel_size=(4, 4), activation="relu"),
        MaxPooling2D(pool_size=(2, 2)),
        Dropout(0.3),
        Flatten(),
        Dense(256, activation = "relu"),
        Dropout(0.3),
        Dense(13, activation="softmax"),
        
    ]
)

model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 97, 97, 32)        544       
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 48, 48, 32)        0         
_________________________________________________________________
dropout_22 (Dropout)         (None, 48, 48, 32)        0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 73728)             0         
_________________________________________________________________
dense_12 (Dense)             (None, 256)               18874624  
_________________________________________________________________
dropout_23 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 13)               

In [118]:
model.compile(optimizer='rmsprop',loss='binary_crossentropy',metrics=['accuracy'])

In [119]:
model.fit_generator(
        training_generator,
        steps_per_epoch= 0.8 * num_train_samples // batch_size,
        epochs=2,# lower the computational cost
        #verbose=1,
        validation_data=validation_generator,
        validation_steps= 0.2 * num_train_samples // batch_size)



Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x1a628d39d88>

In [120]:
predict = model.predict_generator(test_generator, steps=num_test_samples //batch_size )

# convert prediction result of integers to categorical names
predicted_class_indices=np.argmax(predict,axis=1)
labels = (training_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]



In [121]:
from sklearn.metrics import accuracy_score

accuracy_score(test['label'], predictions)

0.88525