<a href="https://colab.research.google.com/github/C23-PS435-bangkit/MachineLearning/blob/normal-dataset-only/intial_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import Libs

In [24]:
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
import matplotlib.pyplot as plt

In [25]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Import Split Datasets

check the datasets

In [8]:
import os

base_dir = '/content/drive/MyDrive/dataset anjing/normal dataset'

bd_dir = os.path.join(base_dir, 'bacterial_dermatosis')
fi_dir = os.path.join(base_dir, 'fungal_infection')
ha_dir = os.path.join(base_dir, 'hypersensitivity_allergic_dermatosis')
he_dir = os.path.join(base_dir, 'healthy')

print('total training bacterial_dermatosis images:', len(os.listdir(bd_dir)))
print('total training fungal_infection images:', len(os.listdir(fi_dir)))
print('total training hypersensitivity_allergic_dermatosis images:', len(os.listdir(ha_dir)))
print('total training healthy images:', len(os.listdir(he_dir)))

bd_files = os.listdir(bd_dir)
print(bd_files[:10])

fi_files = os.listdir(fi_dir)
print(fi_files[:10])

ha_files = os.listdir(ha_dir)
print(ha_files[:10])

he_files = os.listdir(he_dir)
print(he_files[:10])

total training bacterial_dermatosis images: 12
total training fungal_infection images: 11
total training hypersensitivity_allergic_dermatosis images: 13
total training healthy images: 26
['dog210612_03_01_13_pic0.jpg', 'dog210617_00_02_02_pic0.jpg', 'dog210615_01_02_06_pic0.jpg', 'Dog210619_01_01_44_pic0.jpg', 'Dog210624_08_02_35_pic0.jpg', 'Dog210628_04_01_17_pic0.jpg', 'Dog210628_11_01_16_pic0.jpg', 'Dog210629_09_02_13_pic0.jpg', 'Dog210630_04_02_11_pic0.jpg', 'Dog210702_09_01_09_pic0.jpg']
['dog210422_04_02_33_pic0.jpg', 'dog210424_09_02_32_pic0.jpg', 'dog210430_05_01_29_pic0.jpg', 'dog210430_08_01_30_pic0.jpg', 'dog210504_49_01_28_pic0.jpg', 'dog210610_04_02_14_pic0.jpg', 'dog210610_48_02_15_pic0.jpg', 'Dog210622_03_02_41_pic0.jpg', 'Dog210623_08_02_37_pic0.jpg', 'Dog210706_01_02_06_pic0.jpg']
['dog210424_05_01_31_pic0.jpg', 'dog210612_48_02_12_pic0.jpg', 'dog210617_02_02_01_pic0.jpg', 'dog210617_01_02_05_pic0.jpg', 'Dog210621_01_01_43_pic0.jpg', 'Dog210622_01_01_39_pic0.jpg', 'Dog

## bd

### manage new directory structure

Desired structure:

```
bd modelling
  |--- bd_dir (images of bacterial_dermatosis)
  |   |--- img1.png
  |   |--- img2.png
  |   |--- ...
  |            
  |--- non_bd_dir (images if fi, ha, he)
      |--- img1.png
      |--- img2.png
      |--- ...
```



Function to make new dir with new structured image dataset to satisfy the need of bd_modelling

In [9]:
import os
import shutil

def create_modelling_dataset(base_dir, bd_dir, fi_dir, ha_dir, he_dir):
    # Create the 'bd_modelling' directory inside 'base_dir'
    bd_modelling_dir = os.path.join(base_dir, 'bd_modelling')
    os.makedirs(bd_modelling_dir, exist_ok=True)

    # Create the 'bd_dir' directory inside 'bd_modelling'
    bd_dir_modelling = os.path.join(bd_modelling_dir, 'bd_dir')
    os.makedirs(bd_dir_modelling, exist_ok=True)

    # Copy all images from 'bd_dir' to 'bd_dir_modelling'
    bd_files = os.listdir(bd_dir)
    for file in bd_files:
        src_path = os.path.join(bd_dir, file)
        dst_path = os.path.join(bd_dir_modelling, file)
        shutil.copy(src_path, dst_path)

    # Create the 'non_bd_dir' directory inside 'bd_modelling'
    non_bd_dir_modelling = os.path.join(bd_modelling_dir, 'non_bd_dir')
    os.makedirs(non_bd_dir_modelling, exist_ok=True)

    # Copy all images from the rest of the directories to 'non_bd_dir_modelling'
    for dir_path in [fi_dir, ha_dir, he_dir]:
        files = os.listdir(dir_path)
        for file in files:
            src_path = os.path.join(dir_path, file)
            dst_path = os.path.join(non_bd_dir_modelling, file)
            shutil.copy(src_path, dst_path)

    print("Modelling dataset created successfully.")

In [11]:
create_modelling_dataset(base_dir, bd_dir, fi_dir, ha_dir, he_dir)

Modelling dataset created successfully.


### Crop & Augment Images

In [5]:
def update_progress(progress):
    bar_length = 20  # Length of the progress bar
    filled_length = int(bar_length * progress)
    bar = '█' * filled_length + '-' * (bar_length - filled_length)
    percentage = int(progress * 100)
    print(f'\rProgress: |{bar}| {percentage}% ', end='', flush=True)

In [6]:
# function to crop image
# note: the amount of croped pixels were approxed by me (nopal) 
#       by measure the excess pixels
def crop_image(image):
    crop_width = 1920 - 170 - 150  # Calculate the resulting width after trimming
    crop_height = 1080  # Height remains the same
    crop_location = (170, 0)  # Starting position of the crop

    # Crop the image using array indexing
    cropped_image = image.crop((crop_location[0], crop_location[1],
                                crop_location[0] + crop_width, crop_location[1] + crop_height))

    return cropped_image

In [7]:
# Example usage
datagen = ImageDataGenerator(
    rotation_range=30,
    # width_shift_range=0.1,
    # height_shift_range=0.1,
    shear_range=0.2,
    # zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

In [8]:
import os
import numpy as np
import shutil
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def augment_and_save_images(src_dir, target_dir, datagen, amount):
    os.makedirs(target_dir, exist_ok=True)

    for file_name in os.listdir(src_dir):
        file_path = os.path.join(src_dir, file_name)

        # Load the image using PIL
        image = Image.open(file_path)

        # Crop the image using the crop_image() function
        cropped_image = crop_image(image)

        # Expand dimensions to match the expected input shape of datagen.flow
        image_array = np.expand_dims(cropped_image, axis=0)

        # Generate 20 augmented images using the data generator
        augmented_images = datagen.flow(image_array, batch_size=1, save_to_dir=target_dir, save_prefix='aug_', save_format='jpeg')

        print(f"\nAugmenting {file_name}")

        # Iterate over the augmented images and save them to the target directory
        for i, augmented_image in enumerate(augmented_images):
            if i >= amount:
                break
            update_progress((i+1)/amount)

    print("\nAugmentation completed successfully.")

In [9]:
src_dir = '/content/drive/MyDrive/dataset anjing/bd_modelling/bd_dir'
target_dir = '/content/drive/MyDrive/dataset anjing/bd_modelling_aug/bd_dir'
augment_and_save_images(src_dir, target_dir, datagen, 10)


Augmenting dog210612_03_01_13_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210617_00_02_02_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210615_01_02_06_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210619_01_01_44_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210624_08_02_35_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210628_04_01_17_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210628_11_01_16_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210629_09_02_13_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210630_04_02_11_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210702_09_01_09_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210706_48_02_05_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210707_01_01_04_pic0.jpg
Progress: |████████████████████| 100% 
Augmentation completed successfully.


In [10]:
src_dir = '/content/drive/MyDrive/dataset anjing/bd_modelling/non_bd_dir'
target_dir = '/content/drive/MyDrive/dataset anjing/bd_modelling_aug/non_bd_dir'
augment_and_save_images(src_dir, target_dir, datagen, 5)


Augmenting dog210422_04_02_33_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210424_09_02_32_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210430_05_01_29_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210430_08_01_30_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210504_49_01_28_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210610_04_02_14_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210610_48_02_15_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210622_03_02_41_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210623_08_02_37_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210706_01_02_06_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting Dog210708_01_02_01_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210424_05_01_31_pic0.jpg
Progress: |████████████████████| 100% 
Augmenting dog210612_48_02_12_pic0.jpg
Progress: |█████████████

### Data finalization

In [13]:
import os
import shutil

# Function to copy all files from src_dir to target_dir

def copy_files(src_dir, target_dir):
    # Get a list of all files in the source directory
    files = os.listdir(src_dir)

    # Copy each file to the destination directory
    for file in files:
        src_path = os.path.join(src_dir, file)
        dest_path = os.path.join(target_dir, file)
        shutil.copy(src_path, dest_path)

In [18]:
# combine all bd images (augmented & non augmented)
copy_files('/content/drive/MyDrive/dataset anjing/bd_modelling/bd_dir', '/content/drive/MyDrive/dataset anjing/bd_modelling_final/bd_dir')
copy_files('/content/drive/MyDrive/dataset anjing/bd_modelling_aug/bd_dir', '/content/drive/MyDrive/dataset anjing/bd_modelling_final/bd_dir')

In [21]:
# combine all non_bd images (augmented & non augmented)
copy_files('/content/drive/MyDrive/dataset anjing/bd_modelling/non_bd_dir', '/content/drive/MyDrive/dataset anjing/bd_modelling_final/non_bd_dir')
copy_files('/content/drive/MyDrive/dataset anjing/bd_modelling_aug/non_bd_dir', '/content/drive/MyDrive/dataset anjing/bd_modelling_final/non_bd_dir')

In [23]:
print(len(os.listdir('/content/drive/MyDrive/dataset anjing/bd_modelling_final/bd_dir')))

143


In [22]:
print(len(os.listdir('/content/drive/MyDrive/dataset anjing/bd_modelling_final/non_bd_dir')))

348


## Modelling

### Undersample

In [28]:
import os
import random
import shutil

def perform_random_undersampling(base_dir, majority_dir, minority_dir, target_dir, desired_ratio):
    os.makedirs(target_dir, exist_ok=True)

    majority_files = os.listdir(os.path.join(base_dir, majority_dir))
    minority_files = os.listdir(os.path.join(base_dir, minority_dir))

    majority_count = len(majority_files)
    minority_count = len(minority_files)

    # Calculate the number of samples to randomly select from the majority class
    max_majority_samples = int(minority_count * desired_ratio)

    # Randomly select samples from the majority class
    selected_majority_samples = random.sample(majority_files, max_majority_samples)

    # Copy the selected majority samples to the target directory
    for file_name in selected_majority_samples:
        src_path = os.path.join(base_dir, majority_dir, file_name)
        dst_path = os.path.join(base_dir, target_dir, majority_dir, file_name)
        shutil.copy(src_path, dst_path)

    # Copy all samples from the minority class to the target directory
    for file_name in minority_files:
        src_path = os.path.join(base_dir, minority_dir, file_name)
        dst_path = os.path.join(base_dir, target_dir, minority_dir, file_name)
        shutil.copy(src_path, dst_path)

    print("Random undersampling completed successfully.")


In [31]:

# Example usage
base_dir = '/content/drive/MyDrive/dataset anjing/bd_modelling_final/'
majority_dir = 'non_bd_dir'
minority_dir = 'bd_dir'
target_dir = 'undersampled'
desired_ratio = 1.0

perform_random_undersampling(base_dir, majority_dir, minority_dir, target_dir, desired_ratio)


Random undersampling completed successfully.


In [32]:
print(len(os.listdir('/content/drive/MyDrive/dataset anjing/bd_modelling_final/undersampled/bd_dir')))

143


In [33]:
print(len(os.listdir('/content/drive/MyDrive/dataset anjing/bd_modelling_final/undersampled/non_bd_dir')))

143


### Split data into training set & validation set

In [40]:
root_dir = '/content/drive/MyDrive/dataset anjing/bd_final'

# Create train directory
train_cats_dir = os.path.join(root_dir, 'training/bd')
os.makedirs(train_cats_dir)
train_dogs_dir = os.path.join(root_dir, 'training/non_bd')
os.makedirs(train_dogs_dir)
# Create validation directory
val_cats_dir = os.path.join(root_dir, 'validation/bd')
os.makedirs(val_cats_dir)
val_dogs_dir = os.path.join(root_dir, 'validation/non_bd')
os.makedirs(val_dogs_dir)

In [41]:
for rootdir, dirs, files in os.walk(root_dir):
    for subdir in dirs:
        print(os.path.join(rootdir, subdir))

/content/drive/MyDrive/dataset anjing/bd_final/training
/content/drive/MyDrive/dataset anjing/bd_final/validation
/content/drive/MyDrive/dataset anjing/bd_final/training/bd
/content/drive/MyDrive/dataset anjing/bd_final/training/non_bd
/content/drive/MyDrive/dataset anjing/bd_final/validation/bd
/content/drive/MyDrive/dataset anjing/bd_final/validation/non_bd


In [37]:
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  # get list of files
  files = os.listdir(SOURCE_DIR)

  # filter out files with zero length
  verified_files = []
  for file in files :
    if os.path.getsize(os.path.join(SOURCE_DIR, file)) == 0 :
      print(f"{file} is zero length, so ignoring.")
    else :
      verified_files.append(file)
      

  # shuffle the files
  random.shuffle(verified_files)

  # calculate the split index
  split_idx = int(SPLIT_SIZE * len(verified_files))

  # split the files
  train_files = verified_files[:split_idx]
  val_files = verified_files[split_idx:]

  # copy train files
  for file in train_files:
      src_path = os.path.join(SOURCE_DIR, file)
      dst_path = os.path.join(TRAINING_DIR, file)
      shutil.copy(src_path, dst_path)

  # copy validation files
  for file in val_files:
      src_path = os.path.join(SOURCE_DIR, file)
      dst_path = os.path.join(VALIDATION_DIR, file)
      shutil.copy(src_path, dst_path)

In [42]:
# Define paths
bd_src_dir = '/content/drive/MyDrive/dataset anjing/bd_modelling_final/undersampled/bd_dir'
non_bd_src_dir = '/content/drive/MyDrive/dataset anjing/bd_modelling_final/undersampled/non_bd_dir'

train_dir = "/content/drive/MyDrive/dataset anjing/bd_final/training"
val_dir = "/content/drive/MyDrive/dataset anjing/bd_final/validation"

bd_train_dir = os.path.join(train_dir, "bd/")
bd_val_dir = os.path.join(val_dir, "bd/")

non_bd_train_dir = os.path.join(train_dir, "non_bd/")
non_bd_val_dir = os.path.join(val_dir, "non_bd/")

# Empty directories in case you run this cell multiple times
if len(os.listdir(bd_train_dir)) > 0:
  for file in os.scandir(bd_train_dir):
    os.remove(file.path)
if len(os.listdir(non_bd_train_dir)) > 0:
  for file in os.scandir(non_bd_train_dir):
    os.remove(file.path)
if len(os.listdir(bd_val_dir)) > 0:
  for file in os.scandir(bd_val_dir):
    os.remove(file.path)
if len(os.listdir(non_bd_val_dir)) > 0:
  for file in os.scandir(non_bd_val_dir):
    os.remove(file.path)

# Define proportion of images used for training
split_size = .8

# Run the function
# NOTE: Messages about zero length images should be printed out
split_data(bd_src_dir, bd_train_dir, bd_val_dir, split_size)
split_data(non_bd_src_dir, non_bd_train_dir, non_bd_val_dir, split_size)

# Your function should perform copies rather than moving images so original directories should contain unchanged images
print(f"\n\nOriginal bd directory has {len(os.listdir(bd_src_dir))} images")
print(f"Original non_bd directory has {len(os.listdir(non_bd_src_dir))} images\n")

# Training and validation splits. Check that the number of images matches the expected output.
print(f"There are {len(os.listdir(bd_train_dir))} images of bd for training")
print(f"There are {len(os.listdir(non_bd_train_dir))} images of non_bd for training")
print(f"There are {len(os.listdir(bd_val_dir))} images of bd for validation")
print(f"There are {len(os.listdir(non_bd_val_dir))} images of non_bd for validation")



Original bd directory has 143 images
Original non_bd directory has 143 images

There are 114 images of bd for training
There are 114 images of non_bd for training
There are 29 images of bd for validation
There are 29 images of non_bd for validation


### Transfer Learning on RestNet

In [54]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Load the pre-trained ResNet-50 model without the top classification layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

# Freeze the pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

# Add your own classification layers on top of the pre-trained base model
x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [55]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 150, 150, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 156, 156, 3)  0           ['input_3[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 75, 75, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 conv1_bn (BatchNormalization)  (None, 75, 75, 64)   256         ['conv1_conv[0][0]']       

In [56]:
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
  # Instantiate the ImageDataGenerator class 
  train_datagen = ImageDataGenerator(rescale=1./255)

  # Pass in the appropriate arguments to the flow_from_directory method
  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      batch_size=10, 
                                                      class_mode='binary',
                                                      target_size=(150, 150))

  # Instantiate the ImageDataGenerator class
  validation_datagen = ImageDataGenerator(rescale=1./255)

  # Pass in the appropriate arguments to the flow_from_directory method
  validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                                                batch_size=10, 
                                                                class_mode='binary',
                                                                target_size=(150, 150))
  return train_generator, validation_generator

In [57]:
train_dir = '/content/drive/MyDrive/dataset anjing/bd_final/training'
val_dir = '/content/drive/MyDrive/dataset anjing/bd_final/validation'
train_generator, validation_generator = train_val_generators(train_dir, val_dir)

Found 228 images belonging to 2 classes.
Found 58 images belonging to 2 classes.


In [58]:
history = model.fit(train_generator,
                    validation_data = validation_generator,
                    epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# ARC

In [None]:
root_dir = '/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged'

if os.path.exists(root_dir):
  shutil.rmtree(root_dir)

def create_train_val_dirs(root_path):

  # List of labels
  labels = ['bd', 'fi', 'ha', 'he']

  # Create train directory
  train_dir = os.path.join(root_path, 'training')
  os.makedirs(train_dir, exist_ok=True)

  for label in labels:
      label_dir = os.path.join(train_dir, label)
      os.makedirs(label_dir, exist_ok=True)

  # Create validation directory
  val_dir = os.path.join(root_path, 'validation')
  os.makedirs(val_dir, exist_ok=True)

  for label in labels:
      label_dir = os.path.join(val_dir, label)
      os.makedirs(label_dir, exist_ok=True)

  
try:
  create_train_val_dirs(root_path=root_dir)
except FileExistsError:
  print("Error")

In [None]:
for rootdir, dirs, files in os.walk(root_dir):
    for subdir in dirs:
        print(os.path.join(rootdir, subdir))

/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/training
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/validation
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/training/bd
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/training/fi
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/training/ha
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/training/he
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/validation/bd
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/validation/fi
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/validation/ha
/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/validation/he


## Try to Crop

In [None]:
# function to crop image
# note: the amount of croped pixels were approxed by me (nopal) 
#       by measure the excess pixels
def crop_image(image):
    crop_width = 1920 - 170 - 150  # Calculate the resulting width after trimming
    crop_height = 1080  # Height remains the same
    crop_location = (170, 0)  # Starting position of the crop

    # Crop the image using array indexing
    cropped_image = image[crop_location[1]:crop_location[1] + crop_height,
                          crop_location[0]:crop_location[0] + crop_width, :]

    return cropped_image

In [None]:
# function to split dataset into training and validation
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
    files = os.listdir(SOURCE_DIR)

    verified_files = []
    for file in files:
        if os.path.getsize(os.path.join(SOURCE_DIR, file)) == 0:
            print(f"{file} is zero length, so ignoring.")
        else:
            verified_files.append(file)

    # Shuffle the files
    random.shuffle(verified_files)

    # Calculate the split index
    split_idx = int(SPLIT_SIZE * len(verified_files))

    # Split the files
    train_files = verified_files[:split_idx]
    val_files = verified_files[split_idx:]

    # Copy train files
    for file in train_files:
        src_path = os.path.join(SOURCE_DIR, file)
        dst_path = os.path.join(TRAINING_DIR, file)
        # Load the image using TensorFlow
        image = tf.io.read_file(src_path)
        image = tf.image.decode_image(image)
        # Crop the image
        cropped_image = crop_image(image)
        # Save the cropped image to the destination directory
        tf.io.write_file(dst_path, tf.image.encode_png(cropped_image))

    # Copy validation files
    for file in val_files:
        src_path = os.path.join(SOURCE_DIR, file)
        dst_path = os.path.join(VALIDATION_DIR, file)
        # Load the image using TensorFlow
        image = tf.io.read_file(src_path)
        image = tf.image.decode_image(image)
        # Crop the image
        cropped_image = crop_image(image)
        # Save the cropped image to the destination directory
        tf.io.write_file(dst_path, tf.image.encode_png(cropped_image))

In [None]:
TRAINING_DIR = "/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/training"
VALIDATION_DIR = "/content/drive/MyDrive/Datasets/Bangkit - Capstone/normal dataset/arranged/validation"

LABELS = ['bd', 'fi', 'ha', 'he']

TRAINING_LABEL_DIRS = [os.path.join(TRAINING_DIR, label) for label in LABELS]
VALIDATION_LABEL_DIRS = [os.path.join(VALIDATION_DIR, label) for label in LABELS]

# Empty directories in case this cell runs multiple times
for label_dir in TRAINING_LABEL_DIRS:
    if len(os.listdir(label_dir)) > 0:
        for file in os.scandir(label_dir):
            os.remove(file.path)

for label_dir in VALIDATION_LABEL_DIRS:
    if len(os.listdir(label_dir)) > 0:
        for file in os.scandir(label_dir):
            os.remove(file.path)

# Define proportion of images used for training
split_size = 0.8

# Run the function
for label in LABELS:
    source_dir = eval(f'{label}_dir')
    train_dir = os.path.join(TRAINING_DIR, label)
    val_dir = os.path.join(VALIDATION_DIR, label)
    split_data(source_dir, train_dir, val_dir, split_size)

# Check the number of images in each directory

# Original source directories should contain unchanged images
for label in LABELS:
    source_dir = eval(f'{label}_dir')
    print(f"Original {label}'s directory has {len(os.listdir(source_dir))} images")

# Training and validation splits
for i, label in enumerate(LABELS):
    print(f"There are {len(os.listdir(TRAINING_LABEL_DIRS[i]))} images of {label} for training")
    print(f"There are {len(os.listdir(VALIDATION_LABEL_DIRS[i]))} images of {label} for validation")

Original bd's directory has 12 images
Original fi's directory has 11 images
Original ha's directory has 13 images
Original he's directory has 26 images
There are 9 images of bd for training
There are 3 images of bd for validation
There are 8 images of fi for training
There are 3 images of fi for validation
There are 10 images of ha for training
There are 3 images of ha for validation
There are 20 images of he for training
There are 6 images of he for validation


## Image Data Generator

In [None]:
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):

  # Instantiate the ImageDataGenerator class and set the desired augmentation parameters
  train_datagen = ImageDataGenerator(
      rescale=1.0/255.,
      shear_range=0.2,
      rotation_range=360,
      horizontal_flip=True,
      vertical_flip=True
  )

  # Pass in the appropriate arguments to the flow_from_directory method
  train_generator = train_datagen.flow_from_directory(
      directory=TRAINING_DIR,
      batch_size=10,
      class_mode='categorical',
      target_size=(128, 128)
  )

  # Instantiate the ImageDataGenerator class and set the rescale parameter
  validation_datagen = ImageDataGenerator(rescale=1.0/255.)

  # Pass in the appropriate arguments to the flow_from_directory method
  validation_generator = validation_datagen.flow_from_directory(
      directory=VALIDATION_DIR,
      batch_size=10,
      class_mode='categorical',
      target_size=(128, 128)
  )

  return train_generator, validation_generator

In [None]:
# Test the generators
train_generator, validation_generator = train_val_generators(TRAINING_DIR, VALIDATION_DIR)

Found 47 images belonging to 4 classes.
Found 15 images belonging to 4 classes.


## Try Using Restnet

In [None]:
from tensorflow.keras.applications import ResNet50

# Load the pre-trained ResNet50 model without the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
from tensorflow.keras import models, layers

# Create a new model by adding your own classifier on top of the pre-trained ResNet model
model = models.Sequential()
model.add(base_model)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, epochs=10, validation_data=validation_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Modelling

In [None]:
def create_model():
  model = tf.keras.models.Sequential([ 
    # This is the first convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(128, 128, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(4, activation='softmax')
])

  
  model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
                loss=tf.keras.losses.binary_crossentropy,
                metrics=['accuracy']) 

  return model

In [None]:
model = create_model()

history = model.fit(train_generator,
                    epochs=100,
                    verbose=1,
                    validation_data=validation_generator)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100

KeyboardInterrupt: ignored

## Try to crop