In [1]:
!pip3 install -r requirements.txt

Collecting scikit-learn
  Downloading scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting joblib>=1.1.1
  Downloading joblib-1.2.0-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m00:01[0m
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)




Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.2.0 scikit-learn-1.2.2 threadpoolctl-3.1.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [2]:
import os
import numpy as np
import tensorflow as tf

from keras.utils import load_img, img_to_array
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.layers import Flatten, Dense, Dropout
from keras import Model, callbacks

2023-04-20 08:58:00.558437: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
def init_vgg16(checkpoint_path:str=None) -> Model:
    """
    Creates a new instance of the VGG16 model with imageNet pretrained weights.
    
    args: 
        checkpoint_path: the path to the location of a tf checkpoint for resuming training
        
    returns:
        model: a tf.Model
    """

    # Download the model with weights pre-trained using ImageNet database
    vgg16 = VGG16(weights='imagenet', 
                  include_top=False,
                  input_shape=(224,224,3))
    
    
    # Freeze layers for training
    for layer in vgg16.layers:
        layer.trainable = False

    # Create a new 'top' of the model of fully-connected layers
    top_model = vgg16.output
    top_model = Flatten(name="flatten")(top_model)
    top_model = Dense(4096, activation='relu', name="top_dense_1")(top_model)
    top_model = Dropout(0.2)(top_model)
    top_model = Dense(512, activation='relu', name="top_dense_2")(top_model)
    top_model = Dropout(0.2)(top_model)
    top_model = Dense(256, activation='relu', name="top_dense_3")(top_model)
    top_model = Dropout(0.2)(top_model)
    top_model = Dense(128, activation='relu', name="top_dense_4")(top_model)
    top_model = Dropout(0.2)(top_model)
    output_layer = Dense(1, activation='softmax', name="output" )(top_model)
    
    # Group the convolutional base and new fully-connected layers into a Model object.
    model = Model(inputs=vgg16.input, outputs=output_layer)

    # Compiles the model for training.
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy'])
    
    if checkpoint_path is not None:
        model.load_weights(checkpoint_path)
    
    return model


In [4]:
def load_and_preprocess_img(path: str):

    img = load_img(path, target_size=(224, 224))
    ary = img_to_array(img)
    ary = np.expand_dims(ary, axis=0)
    ary = preprocess_input(ary)
    return ary[0]


In [5]:
from os import listdir

def load_data(path: str, label: int):

    X = []
    y = []

    for filename in listdir(path):
        x = load_and_preprocess_img(path + filename)
        X.append(x)
        y.append(label)

    return X, y


In [6]:
FILEPATH_POS = "./out_pos/"
FILEPATH_NEG = "./out_neg/"

X, y = load_data(FILEPATH_POS, 1)
X_neg, y_neg = load_data(FILEPATH_NEG, 0)

X.extend(X_neg)
y.extend(y_neg)

X = np.array(X)
y = np.array(y)

print(f"X: {X.shape}")
print(f"y: {y.shape}")


X: (6767, 224, 224, 3)
y: (6767,)


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.33, random_state=42)

In [8]:
# Checkpoint during training
checkpoint_path = "classifier_chkpts/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

model = init_vgg16(checkpoint_path)
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


2023-04-20 08:58:17.262946: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-04-20 08:58:17.265734: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-04-20 08:58:17.265856: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
batch_size = 32

# Create a callback that saves the model's weights
cp_callback = callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                        save_weights_only=True,
                                        save_freq=10*batch_size,
                                        verbose=1)

model.fit(X_train,
          y_train,
          batch_size=batch_size,
          epochs=200,
          validation_data=(X_test, y_test),
          callbacks=[cp_callback])

Epoch 1/200


2023-04-20 08:58:27.401110: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8600
2023-04-20 08:58:28.841039: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-04-20 08:58:29.224788: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7f8e7246bfb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-04-20 08:58:29.224809: I tensorflow/compiler/xla/service/service.cc:177]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2023-04-20 08:58:29.227387: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-04-20 08:58:29.303012: I ./tensorflow/compiler/jit/device_compiler.h:180] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the p

Epoch 2/200
Epoch 3/200
Epoch 3: saving model to classifier_chkpts/cp.ckpt
Epoch 4/200
Epoch 5/200
Epoch 5: saving model to classifier_chkpts/cp.ckpt
Epoch 6/200
Epoch 7/200
Epoch 7: saving model to classifier_chkpts/cp.ckpt
Epoch 8/200
Epoch 9/200
Epoch 10/200
  1/142 [..............................] - ETA: 13s - loss: 0.3871 - binary_accuracy: 0.8750
Epoch 10: saving model to classifier_chkpts/cp.ckpt
Epoch 11/200
Epoch 12/200
Epoch 12: saving model to classifier_chkpts/cp.ckpt
Epoch 13/200
Epoch 14/200
Epoch 14: saving model to classifier_chkpts/cp.ckpt
Epoch 15/200
Epoch 16/200
Epoch 16: saving model to classifier_chkpts/cp.ckpt
Epoch 17/200
Epoch 18/200
Epoch 19/200
  3/142 [..............................] - ETA: 13s - loss: 0.2868 - binary_accuracy: 0.9167
Epoch 19: saving model to classifier_chkpts/cp.ckpt
Epoch 20/200
Epoch 21/200
Epoch 21: saving model to classifier_chkpts/cp.ckpt
Epoch 22/200
Epoch 23/200
Epoch 23: saving model to classifier_chkpts/cp.ckpt
Epoch 24/200
Epoch 