In [51]:
import cv2 
import os 
import sys 
import sklearn 
import numpy as np
import pandas as pd 
import tensorflow as tf 
import matplotlib.pyplot as plt 

In [52]:
from IPython.display import SVG 
from sklearn.model_selection import train_test_split 
from tensorflow.keras.models import Model, Sequential 
from tensorflow.keras.optimizers import Adam, RMSprop, SGD 
from tensorflow.python.keras.utils.vis_utils import plot_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
from tensorflow.keras.utils import to_categorical, Sequence, model_to_dot 
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, LearningRateScheduler 
from tensorflow.keras.layers import Activation, Dense, Input, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.applications import Xception, MobileNetV2
from tensorflow.keras.applications.mobilenet import preprocess_input as mobile_preprocess_input

In [3]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [4]:

drive_path = "/content/drive/MyDrive/"
source_filename = drive_path + "dataset/cat-and-dog.zip"
extract_folder = "dataset/"

import shutil 
shutil.unpack_archive(source_filename, extract_folder)

In [5]:
BATCH_SIZE = 64
IMAGE_SIZE = 160

In [6]:
def get_show_plot_image(images, labels): 
    fig, axes = plt.subplots(3, 5)
    fig.set_size_inches(12, 12)

    for i in range(15):
        ax = axes[i//5, i%5]
        ax.imshow(images[i])
        ax.axis("off")
        ax.set_title(str(labels[i]))
    
    plt.tight_layout()
    plt.show()

In [7]:
def set_create_dataframe():
    paths = []
    dataset_gubuns = []
    label_gubuns = []

    for dirname, _, filenames in os.walk('dataset/'):
        for filename in filenames:
            if '.jpg' in filename:
                file_path = dirname+'/'+ filename
                paths.append(file_path)
                if '/training_set/' in file_path:
                    dataset_gubuns.append('train')  
                elif '/test_set/' in file_path:
                    dataset_gubuns.append('test')
                else: dataset_gubuns.append('N/A')

                if 'dogs' in file_path:
                    label_gubuns.append('DOG')
                elif 'cats' in file_path:
                    label_gubuns.append('CAT')
                else: label_gubuns.append('N/A')
    
    data_df = pd.DataFrame({'path':paths, 'dataset':dataset_gubuns, 'label':label_gubuns})
    return data_df

In [22]:
data_df = set_create_dataframe()

data_df.head(n=10)

Unnamed: 0,path,dataset,label
0,dataset/training_set/training_set/dogs/dog.279...,train,DOG
1,dataset/training_set/training_set/dogs/dog.255...,train,DOG
2,dataset/training_set/training_set/dogs/dog.173...,train,DOG
3,dataset/training_set/training_set/dogs/dog.221...,train,DOG
4,dataset/training_set/training_set/dogs/dog.331...,train,DOG
5,dataset/training_set/training_set/dogs/dog.193...,train,DOG
6,dataset/training_set/training_set/dogs/dog.122...,train,DOG
7,dataset/training_set/training_set/dogs/dog.234...,train,DOG
8,dataset/training_set/training_set/dogs/dog.102...,train,DOG
9,dataset/training_set/training_set/dogs/dog.396...,train,DOG


In [24]:
def get_show_shape(images):
    for i in range(len(images)):
        print(f"shape {images[i].shape}")

In [23]:
def get_train_test_valid_separate(data_df):
    train_df = data_df[data_df["dataset"]=="train"]
    test_df = data_df[data_df["dataset"]=="test"]

    train_path = train_df["path"].values
    train_label = pd.factorize(train_df["label"])[0]

    test_path = test_df["path"].values
    test_label = pd.factorize(test_df["label"])[0]

    tr_path, val_path, tr_label, val_label = train_test_split(train_path, train_label, test_size=0.15, random_state=2022)

    return tr_path, tr_label, val_path, val_label, test_path, test_label

In [9]:
def get_show_plot_loss_accuracy(history, epochs):
    val_loss, loss = history.history["val_loss"], history.history["loss"]  
    val_acc, acc = history.history["val_accuracy"], history.history["accuracy"]

    fig, axes = plt.subplots(1, 2, figsize=(12, 4))

    axes[0].plot(range(1, epoch+1), val_loss, label="validation")
    axes[0].plot(range(1, epoch+1), loss, label="Training")
    axes[0].legend(loc="best")
    axes[0].set_title("loss")
    
    axes[1].plot(range(1, epoch+1), val_acc, label="validation")
    axes[1].plot(range(1, epoch+1), acc, label="Training")
    axes[1].legend(loc="best")
    axes[1].set_title("acc")

    fig.tight_layout()
    plt.show()

In [25]:
def create_model(model_name='mobilenet', verbose=False):

    input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
    
    if model_name == 'vgg16':
        base_model = VGG16(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'resnet50':
        base_model = ResNet50V2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'xception':
        base_model = Xception(input_tensor=input_tensor, include_top=False, weights='imagenet')
    elif model_name == 'mobilenet':
        base_model = MobileNetV2(input_tensor=input_tensor, include_top=False, weights='imagenet')
    
    bm_output = base_model.output

    x = GlobalAveragePooling2D()(bm_output)
    
    if model_name != 'vgg16':
        x = Dropout(rate=0.5)(x)
    x = Dense(50, activation='relu', name='fc1')(x)
    
    output = Dense(1, activation='sigmoid', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output)
    
    if verbose:
        model.summary()
        
    return model

In [48]:
class Data_set(Sequence):
    
    def __init__(self, image_filenames, labels, batch_size=BATCH_SIZE, augmentor=None, pre_func=None, shuffle=False):
        self.image_filenames = image_filenames 
        self.labels = labels 
        self.batch_size = batch_size 
        self.augmentor = augmentor 
        self.pre_func = pre_func 
        self.shuffle = shuffle 
        
        if self.shuffle : 
            self.set_shuffle()
        else:
            pass 
    
    def __len__(self):
        #return int(np.ceil(len(self.image_filenames)/self.batch_size))
        return int(np.ceil(len(self.image_filenames)/BATCH_SIZE))
    
    def __getitem__(self, index):

        image_batchname = self.image_filenames[index * self.batch_size:(index+1) * self.batch_size]

        if self.labels is not None:
            label_batch = self.labels[index*self.batch_size:(index +1 ) * self.batch_size]
        
        image_batch = np.zeros((image_batchname.shape[0], IMAGE_SIZE, IMAGE_SIZE, 3), dtype='float32')

        for image_index in range(image_batchname.shape[0]):

            image = cv2.cvtColor(cv2.imread(image_batchname[image_index]), cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))

            if self.augmentor is not None : 
                image = augmnetor(image=image)["image"]
            if self.pre_func is not None :
                image = self.pre_func(image)
            
            image_batch[image_index] = image 
        
        return image_batch, label_batch
    
    def set_shuffle(self):
        if self.shuffle:
            self.image_filenames, self.labels = sklearn.utils.shuffle(self.image_filenames, self.labels)
        else:
            pass 
    

In [49]:
def train_model(data_df, model_name, augmentor, preprocessing_func):
    tr_path, tr_label, val_path, val_label, test_path, test_label = get_train_test_valid_separate(data_df)
    
    get_show_shape([tr_path, tr_label, val_path, val_label, test_path, test_label])
    tr_ds = Data_set(
        tr_path, 
        tr_label, 
        batch_size=BATCH_SIZE, 
        augmentor=augmentor,
        shuffle=True, 
        pre_func=preprocessing_func)
    
    val_ds = Data_set(
        val_path, 
        val_label, 
        batch_size=BATCH_SIZE, 
        augmentor=None,
        shuffle=False, 
        pre_func=preprocessing_func)
    
    model = create_model(model_name=model_name)
    model.compile(
        optimizer=Adam(0.0001), 
        loss='binary_crossentropy', 
        metrics=['accuracy'])

    N_EPOCHS = 20
    history = model.fit(
        tr_ds, 
        epochs=N_EPOCHS, 
        steps_per_epoch=int(np.ceil(tr_path.shape[0]/BATCH_SIZE)),
        validation_data=val_ds, 
        validation_steps=int(np.ceil(val_path.shape[0]/BATCH_SIZE)),
        verbose=1)
    
    return model, history

In [None]:


mobile_model, mobile_history = train_model(data_df = data_df, model_name="mobilenet", preprocessing_func = mobile_preprocess_input, augmentor=None)
get_show_plot_loss_accuracy(mobile_history, 20)