In [30]:
import os
import sys
import math
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [31]:
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)


In [32]:
## Folder Locations
TEST_IMAGE_FOLDER = '../data/raw/Test/testset/'
TEST_CSV = '../data/raw/Test/testset.csv'

TRAIN_IMAGE_FOLDER = '../data/raw/Train/testset/'
TRAIN_CSV = '../data/raw/Train/testset.csv'


# VALID_IMAGE_FOLDER = '../data/raw/Train/testset/'
# VALID_CSV = '../data/raw/Valid/new_valid_set.csv'

In [33]:
train_set = pd.read_csv(TRAIN_CSV)
test_set = pd.read_csv(TEST_CSV)
train_set.head()

Unnamed: 0,Data,Label
0,600795.jpeg,10
1,627152.jpeg,10
2,119963.jpeg,10
3,118264.jpeg,10
4,199420.jpeg,10


In [34]:
test_set.head()

Unnamed: 0,Data,Label
0,632755.jpeg,10
1,496855.jpeg,10
2,155390.jpeg,10
3,265013.jpeg,10
4,496360.jpeg,10


In [35]:
test_set['Label'] = test_set['Label'].apply(str)
train_set['Label'] = train_set['Label'].apply(str)

In [88]:
single_test_data = train_set.groupby('Label').apply(lambda x: x.sample(1))
single_test_data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Data,Label
Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3764,932784.jpeg,0
1,3111,383133.jpeg,1
10,4,199420.jpeg,10
11,5764,271118.jpeg,11
12,5220,251601.jpeg,12


In [91]:
len(single_test_data['Label'].unique())

48

In [36]:
test_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2609 entries, 0 to 2608
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Data    2609 non-null   object
 1   Label   2609 non-null   object
dtypes: object(2)
memory usage: 40.9+ KB


In [37]:
CLASSES = len(train_set['Label'].unique())

# Image Data generator

In [38]:
from keras_preprocessing.image import ImageDataGenerator

In [50]:
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)
#                            zoom_range=0.2,rotation_range = 5,
#                            horizontal_flip=True)

In [92]:
TARGET_SIZE=(224, 224)

single_test_data=datagen.flow_from_dataframe(
dataframe=single_test_data,
directory=TRAIN_IMAGE_FOLDER,
x_col="Data",
y_col="Label",
subset="training",
seed=RANDOM_STATE,
shuffle=True,
class_mode="categorical",
batch_size = 32,
target_size=TARGET_SIZE)


train_generator=datagen.flow_from_dataframe(
dataframe=train_set,
directory=TRAIN_IMAGE_FOLDER,
x_col="Data",
y_col="Label",
subset="training",
seed=RANDOM_STATE,
shuffle=True,
class_mode="categorical",
batch_size = 32,
target_size=TARGET_SIZE)


valid_generator=datagen.flow_from_dataframe(
dataframe=train_set,
directory=TRAIN_IMAGE_FOLDER,
x_col="Data",
y_col="Label",
subset="validation",
seed=RANDOM_STATE,
shuffle=True,
batch_size = 32,
class_mode="categorical",
target_size=TARGET_SIZE)


test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(
dataframe=test_set,
directory=TEST_IMAGE_FOLDER,
x_col="Data",
y_col=None,
seed=RANDOM_STATE,
shuffle=False,
class_mode=None,
batch_size = 32,
target_size=TARGET_SIZE)

Found 36 validated image filenames belonging to 48 classes.
Found 7200 validated image filenames belonging to 48 classes.
Found 2400 validated image filenames belonging to 48 classes.
Found 2609 validated image filenames.


In [93]:
TARGET_SIZE[1]

224

## DENSENET

In [94]:
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.models import Sequential

In [126]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
import tensorflow
# create the base pre-trained model
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3), pooling='avg')

for layer in base_model.layers:
    layer.trainable = True
        
# add a global spatial average pooling layer
x = base_model.output
x = Dense(1000, kernel_regularizer=regularizers.l1_l2(0.01), activity_regularizer=regularizers.l2(0.01))(x)
x = Activation('relu')(x)
x = Dense(500, kernel_regularizer=regularizers.l1_l2(0.01), activity_regularizer=regularizers.l2(0.01))(x)
x = Activation('relu')(x)
predictions = Dense(CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)


# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

# train the model on the new data for a few epochs
# model.fit(...)



In [127]:
valid_generator.n//valid_generator.batch_size

75

In [128]:
train_generator.n//train_generator.batch_size

225

In [129]:
EPOCHS = 5
BATCH_SIZE = 32
STEPS_PER_EPOCH = train_generator.n//train_generator.batch_size,
VALIDATION_STEPS = 64,
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

# EPOCHS = 10
# BATCH_SIZE = 32
# STEPS_PER_EPOCH = single_test_data.n//single_test_data.batch_size,
# # VALIDATION_STEPS = 64,
# # STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

MODEL_FILE = 'filename.model'
history = model.fit_generator(
    train_generator,
    epochs=EPOCHS,
    #steps_per_epoch=STEPS_PER_EPOCH
    validation_data=valid_generator,
    validation_steps=STEP_SIZE_VALID)




  
model.save(MODEL_FILE)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: filename.model/assets


In [20]:
len(model.layers)

314

In [None]:
# # at this point, the top layers are well trained and we can start fine-tuning
# # convolutional layers from inception V3. We will freeze the bottom N layers
# # and train the remaining top layers.

# # let's visualize layer names and layer indices to see how many layers
# # we should freeze:
# for i, layer in enumerate(base_model.layers):
#    print(i, layer.name)

# # we chose to train the top 2 inception blocks, i.e. we will freeze
# # the first 249 layers and unfreeze the rest:
# for layer in model.layers[:249]:
#    layer.trainable = False
# for layer in model.layers[249:]:
#    layer.trainable = True

# # we need to recompile the model for these modifications to take effect
# # we use SGD with a low learning rate
# from tensorflow.keras.optimizers import SGD
# model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

# # we train our model again (this time fine-tuning the top 2 inception blocks
# # alongside the top Dense layers
# model.fit(...)