In [2]:
import os
import numpy as np 
import pandas as pd 
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
import sklearn.model_selection as sklrn
from matplotlib import pyplot as plt
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 13047230419634017054
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 9857662976
locality {
  bus_id: 1
  links {
  }
}
incarnation: 15185315770901756970
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4080 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9"
xla_global_id: 416903419
]


In [30]:
train_dir = '..\Dataset-fix'

train_X = []
train_labels = []

for label in os.listdir(train_dir):
    PATH_TRAIN_LABEL = os.path.join(train_dir, label)
    for img in os.listdir(PATH_TRAIN_LABEL):
        train_X.append(
            os.path.join(PATH_TRAIN_LABEL, img)
        )
        train_labels.append(label)

train_X = np.array(train_X)
train_labels = np.array(train_labels)

print ('Training shape:', train_X.shape, train_labels.shape) 
print(train_X[:5], train_labels[:5])

Training shape: (1927,) (1927,)
['..\\Dataset-fix\\busuk\\captured_image_2024-01-07_18-31-27.jpg'
 '..\\Dataset-fix\\busuk\\captured_image_2024-01-07_18-32-01.jpg'
 '..\\Dataset-fix\\busuk\\captured_image_2024-01-07_18-32-23.jpg'
 '..\\Dataset-fix\\busuk\\captured_image_2024-01-07_18-32-26.jpg'
 '..\\Dataset-fix\\busuk\\captured_image_2024-01-07_18-32-28.jpg'] ['busuk' 'busuk' 'busuk' 'busuk' 'busuk']


In [31]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(8, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [32]:
TRAIN_TEST_SPLIT_AT = 0.9
BATCH_SIZE = 100
TARGET_SIZE = (128, 128)
NO_OF_EPOCHS = 1
EXPERIMENT_SIZE = 10000
NO_OF_FOLDS = 5

In [34]:
def train_and_cross_validate (model, x_data, y_data, n_folds=NO_OF_FOLDS, epochs=NO_OF_EPOCHS, batch_size=BATCH_SIZE):
    # 
    scores = []
    
    #  Loading images through generators ...
    train_datagen = ImageDataGenerator(rescale=1. / 255.,
                                       rotation_range=40,
                                       width_shift_range=0.2,
                                       height_shift_range=0.2,
                                       shear_range=0.2,
                                       zoom_range=0.2,
                                       horizontal_flip=True)    
    validation_datagen = ImageDataGenerator(rescale=1. / 255)
          
    # prepare cross validation
    kfold = sklrn.KFold(n_folds, shuffle=True, random_state=1)
    # enumerate splits
    FoldsSetNo = 0 
    for train_ix, test_ix in kfold.split(x_data):
        print ('Folds Set # {0}'.format(FoldsSetNo))
        # select rows for train and test
        xx_train, yy_train, xx_test, yy_test = \
            x_data[train_ix], y_data[train_ix], x_data[test_ix], y_data[test_ix]

        # flow training images in batches for the current folds set
        # for training         
        train_generator = train_datagen.flow_from_dataframe(
            dataframe = pd.DataFrame({'id':xx_train,'label':yy_train}), 
            directory=train_dir, 
            x_col='id',
            y_col='label',
            batch_size=batch_size,
            target_size=TARGET_SIZE,
            class_mode='categorical',
            shuffle = False)
        
        # and for validation         
        validation_generator = validation_datagen.flow_from_dataframe(
            dataframe = pd.DataFrame({'id':xx_test,'label':yy_test}), 
            directory=train_dir, 
            x_col='id',
            y_col='label',
            batch_size=batch_size,
            target_size=TARGET_SIZE,
            class_mode='categorical',
            shuffle=False)

        # fit the model
        history = model.fit(train_generator,
                            epochs=epochs,  # The more we train the more our model fits the data
                            batch_size=batch_size,  # Smaller batch sizes = samller steps towards convergence
                            validation_data=validation_generator,
                            verbose=1)
        # store scores
        scores.append({'acc':np.average(history.history['accuracy']),'val_acc':np.average(history.history['val_accuracy'])})
        FoldsSetNo +=1

    return scores
print('Starting training and k-fold cross validation ...')
scores = train_and_cross_validate(model, train_X, train_labels)

Starting training and k-fold cross validation ...
Folds Set # 0
Found 1541 validated image filenames belonging to 4 classes.
Found 386 validated image filenames belonging to 4 classes.


In [None]:
train = []
validation = []
plt.subplot(1, 1, 1)
for s in scores:
    train.append(s['acc'])
    validation.append(s['val_acc'])
print(train)
print(validation)
plt.plot(train, color='blue', label='train')
plt.plot(validation , color='red', label='validation')
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()