In [None]:
import os
import zipfile
import random
import shutil
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from shutil import copyfile
import matplotlib.pyplot as plt

In [None]:
#Download the dataset from gDrive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
zip_file = '/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1.zip'
zip_ref   = zipfile.ZipFile(zip_file, 'r')
zip_ref.extractall('/content/drive/Shareddrives/Capstone Project/ML/Data/')
zip_ref.close()

In [None]:
source = '/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/'
sourceAcnes = os.path.join(source, 'acnes')
sourceBlackheads = os.path.join(source, 'blackheads')
sourceDarkSpots = os.path.join(source, 'darkspots')
sourceWrinkles = os.path.join(source, 'wrinkles')

print(f"There are {len(os.listdir(sourceAcnes))} images of acnes.")
print(f"There are {len(os.listdir(sourceBlackheads))} images of blackheads.")
print(f"There are {len(os.listdir(sourceDarkSpots))} images of darkspots.")
print(f"There are {len(os.listdir(sourceWrinkles))} images of wrinkles.")

There are 999 images of acnes.
There are 150 images of blackheads.
There are 303 images of darkspots.
There are 300 images of wrinkles.


In [None]:
import os
import shutil

# Define root directory
root_dir = '/content/drive/Shareddrives/Capstone Project/skin-case/'

# Empty directory to prevent FileExistsError if the function is run several times
if os.path.exists(root_dir):
    shutil.rmtree(root_dir)

# Create train and validation directories
def create_train_val_dirs(root_path):
    # train and validation directories for skin-case
    train_dir = os.path.join(root_dir, 'training')
    os.makedirs(train_dir, exist_ok=True)
    val_dir = os.path.join(root_dir, 'validation')
    os.makedirs(val_dir, exist_ok=True)
    
    # train directories for acnes
    train_acnes_dir = os.path.join(train_dir, 'acnes')
    os.makedirs(train_acnes_dir, exist_ok=True)
    # train directories for blackheads
    train_blackheads_dir = os.path.join(train_dir, 'blackheads')
    os.makedirs(train_blackheads_dir, exist_ok=True)
    # train directories for darkspots
    train_darkspots_dir = os.path.join(train_dir, 'darkspots')
    os.makedirs(train_darkspots_dir, exist_ok=True)
    # train directories for wrinkles
    train_wrinkles_dir = os.path.join(train_dir, 'wrinkles')
    os.makedirs(train_wrinkles_dir, exist_ok=True)

    # validation directories for acnes
    val_acnes_dir = os.path.join(val_dir, 'acnes')
    os.makedirs(val_acnes_dir, exist_ok=True)
    # validation directories for blackheads
    val_blackheads_dir = os.path.join(val_dir, 'blackheads')
    os.makedirs(val_blackheads_dir, exist_ok=True)
    # validation directories for darkspots
    val_darkspots_dir = os.path.join(val_dir, 'darkspots')
    os.makedirs(val_darkspots_dir, exist_ok=True)
    # validation directories for wrinkles
    val_wrinkles_dir = os.path.join(val_dir, 'wrinkles')
    os.makedirs(val_wrinkles_dir, exist_ok=True)

try:
    create_train_val_dirs(root_path=root_dir)
except FileExistsError:
    print("You should not be seeing this since the upper directory is removed beforehand")

# Print all directories
#for rootdir, dirs, files in os.walk(root_dir):
#    for subdir in dirs:
#        print(os.path.join(rootdir, subdir))

In [None]:
def split_data(SOURCE_DIR, TRAINING_DIR, VALIDATION_DIR, SPLIT_SIZE):
  files = []
  for filename in os.listdir(SOURCE_DIR):
    file = SOURCE_DIR + filename
    if os.path.getsize(file) > 0:
      files.append(filename)
    else:
      print(filename + ' is zero length, so ignoring.') 
    
    all_files = len(files)
    train_length = int(all_files * SPLIT_SIZE)
    test_length = int(all_files - train_length)
    shuffled = random.sample(files, all_files)
    train_set = shuffled[0:train_length]
    test_set = shuffled[train_length:]

  for filename in train_set:
    src_file = SOURCE_DIR + filename
    dest_file = TRAINING_DIR + filename
    copyfile(src_file, dest_file)

  for filename in test_set:
    src_file = SOURCE_DIR + filename
    dest_file = VALIDATION_DIR + filename
    copyfile(src_file, dest_file)

  pass

In [None]:
# Test your split_data function

# Define paths
ACNES_SOURCE_DIR = "/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/acnes/"
BLACKHEADS_SOURCE_DIR = "/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/blackheads/"
DARKSPOTS_SOURCE_DIR = "/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/darkspots/"
WRINKLES_SOURCE_DIR = "/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/wrinkles/"

TRAINING_DIR = "/content/drive/Shareddrives/Capstone Project/skin-case/training"
VALIDATION_DIR = "/content/drive/Shareddrives/Capstone Project/skin-case/validation"

TRAINING_ACNES_DIR = os.path.join(TRAINING_DIR, "acnes/")
VALIDATION_ACNES_DIR = os.path.join(VALIDATION_DIR, "acnes/")

TRAINING_BLACKHEADS_DIR = os.path.join(TRAINING_DIR, "blackheads/")
VALIDATION_BLACKHEADS_DIR = os.path.join(VALIDATION_DIR, "blackheads/")

TRAINING_DARKSPOTS_DIR = os.path.join(TRAINING_DIR, "darkspots/")
VALIDATION_DARKSPOTS_DIR = os.path.join(VALIDATION_DIR, "darkspots/")

TRAINING_WRINKLES_DIR = os.path.join(TRAINING_DIR, "wrinkles/")
VALIDATION_WRINKLES_DIR = os.path.join(VALIDATION_DIR, "wrinkles/")

# Empty directories in case you run this cell multiple times
if len(os.listdir(TRAINING_WRINKLES_DIR)) > 0:
  for file in os.scandir(TRAINING_WRINKLES_DIR):
    os.remove(file.path)

if len(os.listdir(VALIDATION_WRINKLES_DIR)) > 0:
  for file in os.scandir(VALIDATION_WRINKLES_DIR):
    os.remove(file.path)

# Define proportion of images used for training
split_size = .8


In [None]:
# Run the function
# NOTE: Messages about zero length images should be printed out
split_data(ACNES_SOURCE_DIR, TRAINING_ACNES_DIR, VALIDATION_ACNES_DIR, split_size)
split_data(BLACKHEADS_SOURCE_DIR, TRAINING_BLACKHEADS_DIR, VALIDATION_BLACKHEADS_DIR, split_size)
split_data(DARKSPOTS_SOURCE_DIR, TRAINING_DARKSPOTS_DIR, VALIDATION_DARKSPOTS_DIR, split_size)
split_data(WRINKLES_SOURCE_DIR, TRAINING_WRINKLES_DIR, VALIDATION_WRINKLES_DIR, split_size)

# Your function should perform copies rather than moving images so original directories should contain unchanged images
print(f"\n\nOriginal acne's directory has {len(os.listdir(ACNES_SOURCE_DIR))} images")

# Training and validation splits. Check that the number of images matches the expected output.
print(f"There are {len(os.listdir(TRAINING_WRINKLES_DIR))} images of acnes for training")
print(f"There are {len(os.listdir(VALIDATION_WRINKLES_DIR))} images of acnes for validation")



Original acne's directory has 999 images
There are 240 images of acnes for training
There are 60 images of acnes for validation


In [None]:
def train_val_generators(TRAINING_DIR, VALIDATION_DIR):
  """
  Creates the training and validation data generators
  
  Args:
    TRAINING_DIR (string): directory path containing the training images
    VALIDATION_DIR (string): directory path containing the testing/validation images
    
  Returns:
    train_generator, validation_generator - tuple containing the generators
  """
  ### START CODE HERE

  # Instantiate the ImageDataGenerator class (don't forget to set the arguments to augment the images)
  train_datagen = ImageDataGenerator(rescale=1./255.,
                                     rotation_range=40,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     shear_range=0.2,
                                     zoom_range=0.2,
                                     horizontal_flip=True,
                                     fill_mode='nearest')

  # Pass in the appropriate arguments to the flow_from_directory method
  train_generator = train_datagen.flow_from_directory(directory=TRAINING_DIR,
                                                      batch_size=57,
                                                      class_mode='categorical',
                                                      target_size=(150, 150))

  # Instantiate the ImageDataGenerator class (don't forget to set the rescale argument)
  validation_datagen = ImageDataGenerator(rescale=1./255.)

  # Pass in the appropriate arguments to the flow_from_directory method
  validation_generator = validation_datagen.flow_from_directory(directory=VALIDATION_DIR,
                                                                batch_size=20,
                                                                class_mode='categorical',
                                                                target_size=(150, 150))
  ### END CODE HERE
  return train_generator, validation_generator

In [None]:
# Test your generators
train_generator, validation_generator = train_val_generators(TRAINING_DIR, VALIDATION_DIR)

Found 1401 images belonging to 4 classes.
Found 351 images belonging to 4 classes.


In [None]:
# Define a Callback class that stops training once accuracy reaches 99.9%
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('val_acc')>0.87):
      print("\nReached 99.9% accuracy so cancelling training!")
      self.model.stop_training = True

In [None]:
# GRADED FUNCTION: create_model
def create_model():
  # DEFINE A KERAS MODEL TO CLASSIFY CATS V DOGS
  # USE AT LEAST 3 CONVOLUTION LAYERS

  ### START CODE HERE

  model = tf.keras.models.Sequential([ 
      # Conv2D and MaxPooling2D layers
      tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)),
      tf.keras.layers.MaxPooling2D(2,2),
      #tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
      #tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      # Dense layers
      # tf.keras.layers.Flatten(),
      #tf.keras.layers.Dropout(0.2),
      #tf.keras.layers.Dense(units=64, activation='relu', input_shape=(150,150,3)),
      #tf.keras.layers.Conv2D(16, (3,3), activation='relu', input_shape=(150,150,3)),
      tf.keras.layers.MaxPooling2D(2,2),
      tf.keras.layers.Flatten(),
      #tf.keras.layers.Dense(512, activation='relu'),
      tf.keras.layers.Dense(256, activation='relu'),
      tf.keras.layers.Dense(4, activation='softmax')
  ])
  
  model.compile(optimizer='Adam',
                loss='categorical_crossentropy',
                metrics=['acc']) 
  
  return model

In [None]:
# Get the untrained model
model = create_model()

# Train the model
# Note that this may take some time.
callbacks = myCallback()
history = model.fit(train_generator,
                    epochs=50,
                    verbose=2,
                    validation_data=validation_generator,
                    callbacks=callbacks)

Epoch 1/50
25/25 - 103s - loss: 0.7514 - acc: 0.6888 - val_loss: 0.4671 - val_acc: 0.7350 - 103s/epoch - 4s/step
Epoch 2/50
25/25 - 96s - loss: 0.4952 - acc: 0.7459 - val_loss: 0.4545 - val_acc: 0.8177 - 96s/epoch - 4s/step
Epoch 3/50
25/25 - 96s - loss: 0.4656 - acc: 0.7773 - val_loss: 0.4018 - val_acc: 0.8462 - 96s/epoch - 4s/step
Epoch 4/50
25/25 - 99s - loss: 0.4542 - acc: 0.7994 - val_loss: 0.3946 - val_acc: 0.8632 - 99s/epoch - 4s/step
Epoch 5/50
25/25 - 105s - loss: 0.4264 - acc: 0.8137 - val_loss: 0.3584 - val_acc: 0.8575 - 105s/epoch - 4s/step
Epoch 6/50
25/25 - 100s - loss: 0.4280 - acc: 0.8187 - val_loss: 0.3864 - val_acc: 0.8490 - 100s/epoch - 4s/step
Epoch 7/50
25/25 - 96s - loss: 0.4272 - acc: 0.8059 - val_loss: 0.3826 - val_acc: 0.8547 - 96s/epoch - 4s/step
Epoch 8/50
25/25 - 96s - loss: 0.4426 - acc: 0.7980 - val_loss: 0.3724 - val_acc: 0.8632 - 96s/epoch - 4s/step
Epoch 9/50
25/25 - 99s - loss: 0.4085 - acc: 0.8237 - val_loss: 0.3509 - val_acc: 0.8547 - 99s/epoch - 4s/

In [None]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 74, 74, 32)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 72, 72, 64)        18496     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 36, 36, 64)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 34, 34, 128)       73856     
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 17, 17, 128)     

In [None]:
## NOTE: If you are using Safari and this cell throws an error,
## please skip this block and run the next one instead.

import numpy as np
from google.colab import files
from tensorflow.keras.utils import load_img, img_to_array

uploaded = files.upload()

for fn in uploaded.keys():
 
  # predicting images
  path = '/content/' + fn
  img = load_img(path, target_size=(150, 150))
  x = img_to_array(img)
  x /= 255
  x = np.expand_dims(x, axis=0)

  images = np.vstack([x])
  classes = model.predict(images, batch_size=10)
  print(classes[0])

Saving WIN_20230528_21_14_27_Pro.jpg to WIN_20230528_21_14_27_Pro.jpg
[4.4545406e-05 3.7424285e-02 4.4873339e-01 5.1379776e-01]


In [None]:
# # RESIZE USING Python Imaging Library (PIL)
# # GAUSA DI RUN, DI ATAS UDH RESIZE PAKE GENERATOR
# from PIL import Image
# import os

# SOURCE_DIR = '/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/wrinkles/'
# DEST_DIR = '/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/wrinkles_resized/'

# # Create the destination directory if it doesn't exist
# if not os.path.exists(DEST_DIR):
#     os.makedirs(DEST_DIR)

# # Set the desired size for the resized images
# new_size = (224, 224)

# # Iterate over each file in the source directory
# for filename in os.listdir(SOURCE_DIR):
#     file_path = os.path.join(SOURCE_DIR, filename)
#     dest_file_path = os.path.join(DEST_DIR, filename)

#     # Open the image
#     image = Image.open(file_path)

#     # Resize the image
#     resized_image = image.resize(new_size)

#     # Save the resized image to the destination directory
#     resized_image.save(dest_file_path)

#     print(f"Resized image saved: {dest_file_path}")


In [None]:
# # for deleting folder
# import shutil
# import os

# folder_path = '/content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/blackhead/resized'

# # Remove the folder
# shutil.rmtree(folder_path)

# # Verify if the folder is removed
# if not os.path.exists(folder_path):
#     print(f"Folder {folder_path} successfully removed.")
# else:
#     print(f"Failed to remove the folder {folder_path}.")


Folder /content/drive/Shareddrives/Capstone Project/ML/Data/dataset1/blackhead/resized successfully removed.
