In [10]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
from pathlib import Path #used for handling path. Instead of using path as strings it converts them into path objects which is usefull in neural networks
from tensorflow.keras.utils import to_categorical #used to convert categorical labels into categorical vectors 
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50

In [29]:
# Creating the Dataset

parent_dir = Path(r"images")

actual_img_paths = [p for child_dir in ['yes','no']
                    if (parent_dir/child_dir).is_dir()
                    for p in (parent_dir/child_dir).glob('*')
                    if not p.name.startswith('.')]

labels = [p.parent.name for p in actual_img_paths]

df = pd.DataFrame({'filepath' : actual_img_paths, 'label' : labels})

In [32]:
df


Unnamed: 0,filepath,label
0,images\yes\Y1.jpg,yes
1,images\yes\Y10.jpg,yes
2,images\yes\Y100.JPG,yes
3,images\yes\Y101.jpg,yes
4,images\yes\Y102.jpg,yes
...,...,...
248,images\no\No18.jpg,no
249,images\no\No19.jpg,no
250,images\no\No20.jpg,no
251,images\no\No21.jpg,no


In [33]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(df['filepath'], df['label'], test_size = 0.2, random_state = 42)


In [35]:
X_train.size, X_test.size

(202, 51)

In [37]:
#standardizes the input image to 224x224
from tensorflow.keras.applications.resnet50 import preprocess_input
import cv2

def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return None
    img = cv2.resize(img, (224,224))
    img = preprocess_input(img) #preprocesses the images (normalizes it so that it is accepted by the model)
    return img

In [38]:
X_train_images = [load_and_preprocess_image(img_path) for img_path in X_train]

X_train_images = np.array([img for img in X_train_images if img is not None]) # to remove any failed loads

X_test_images = [load_and_preprocess_image(img_path) for img_path in X_test]

X_test_images = np.array([img for img in X_test_images if img is not None])


In [39]:
#one-hot encode the labels for format compatibility
le = LabelEncoder()
y_train_encoded = le.fit_transform(Y_train)
y_test_encoded = le.fit_transform(Y_test)

X_train_shuffled, Y_train_shuffled = shuffle(X_train_images, y_train_encoded, random_state = 42)

y_train_shuffled_encoded = to_categorical(Y_train_shuffled)
y_test_encoded = to_categorical(y_test_encoded)

In [41]:
#data augmentation

from tensorflow.keras.preprocessing.image import ImageDataGenerator

demo_datagen = ImageDataGenerator(
    rotation_range = 30,
    width_shift_range = 0.15,
    height_shift_range = 0.15,
    shear_range = 0.15,
    zoom_range = 0.15,
    horizontal_flip = True,
    vertical_flip = False,
    fill_mode = 'nearest',
    brightness_range = [0.8, 1.2],
    channel_shift_range = 20
)