<a href="https://colab.research.google.com/github/OliviaHelena10/skincare_recommendations/blob/main/Colab_Predict_Skins.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries

In [32]:
# Basic DS libraries
import pandas as pd
import numpy as np

# Will help to list our files
from glob import glob
import pathlib
import PIL

# Will help us with the images
import cv2
import os
import tensorflow as tf
import matplotlib.pyplot as plt

# Organising our Data

 Getting our data path:

In [33]:
# Here we are accessing our data from google drive
path_train = '/content/drive/MyDrive/skin_search/train'
path_test = '/content/drive/MyDrive/skin_search/test'
path_validation = '/content/drive/MyDrive/skin_search/valid'

Saving the paths in variables:

In [34]:
train_dir = pathlib.Path(path_train)
test_dir = pathlib.Path(path_test)
validation_dir = pathlib.Path(path_validation)

Checking the categories:

In [35]:
path = '/content/drive/MyDrive/skin_search/train'
data_dir = pathlib.Path(path)


# Iterating through the train subfolders and getting their names just in case
train_subfolders = [f.name for f in data_dir.iterdir() if f.is_dir()]
print(f'train: {train_subfolders}')

train: ['dry', 'normal', 'oily']


In [36]:
for subfolder in train_subfolders:
  path =  train_dir / subfolder
  images = list(path.glob('*.JPG'))
  print(f'{subfolder}: {len(images)} imagens')

dry: 0 imagens
normal: 0 imagens
oily: 0 imagens


In [37]:
# Checking our Datas

for subfolder in train_subfolders:                # iterates through the subfolders: dry, normal and oily
  path =  train_dir / subfolder                   # gets the path of each image
  images = list(path.glob('*.jpg'))               # gets the images of each subfolder
  print(f'\n{subfolder}: {len(images)} imagens')  # prints the number of data in each subfolder

  if images:
    img = PIL.Image.open(str(images[0]))          # opens the first image in the list
    img_array = np.array(img)                     # converts the image to a numpy array
    print(f'dimensões da primeira imagem em {subfolder} : {img_array.shape}\n') # gets our image dimensions
  print("------------------------------------------------------")


dry: 662 imagens
dimensões da primeira imagem em dry : (640, 640, 3)

------------------------------------------------------

normal: 1114 imagens
dimensões da primeira imagem em normal : (640, 640, 3)

------------------------------------------------------

oily: 1015 imagens
dimensões da primeira imagem em oily : (640, 640, 3)

------------------------------------------------------


# Image preprocessing for model training

Defining Variables:

In [38]:
# We must maintain tha same proportions of the images width and height
image_width = 640
image_height = 640


# Images always have 3 color dimensions RGB pr BGR        ( R for Red  -  G for Green  -  B for Blue )
image_color_channel = 3
image_color_channel_size = 255
image_size = (image_width, image_height)
img_shape = image_size + (image_color_channel,)

batch_size = 128            # number of features that I will bring at a time from my dataset
epocas = 32                 # number of times that I will iterate trought my dataset
learning_rate = 0.0001

class_names = ['dry', 'normal', 'oily']

Separating train, test and validation data

In [39]:
train = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    seed = 568,         # this will get random weights for the transformations
    image_size = (image_height,image_width),
    batch_size = batch_size
)

Found 2792 files belonging to 3 classes.


In [40]:
test = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    seed = 568,         # this will get random weights for the transformations
    image_size = (image_height,image_width),
    batch_size = batch_size
)


Found 134 files belonging to 3 classes.


In [41]:
validation = tf.keras.utils.image_dataset_from_directory(
    validation_dir,
    seed = 568,         # this will get random weights for the transformations
    image_size = (image_height,image_width),
    batch_size = batch_size
)

Found 262 files belonging to 3 classes.


# Creating a Machine Learning Supervised Learning model - type Image Classification

In [42]:
model = tf.keras.models.Sequential([

    # Input:
    tf.keras.layers.Input(shape=(640, 640,  3)),                                # This are the dimensions from our input data

    # Reescaling Layers
    tf.keras.layers.Rescaling(1./255),                                          # Normalizing our data by reescaling the pixels

    # Padding Layers
    tf.keras.layers.Flatten(),                                                  # Flattening our data to 1 dimension

    # Hidden Layers:
    tf.keras.layers.Dense(128,activation=tf.nn.relu),                           # Dense Layer will do the "math" for getting our results

    # Output:
    tf.keras.layers.Dense(3, activation=tf.nn.softmax)                          # Softmax activation is used for cathegorical data and 3 are the dimensions in our dataset
])

In [43]:
model.summary()

In [44]:
model.compile(optimizer = tf.keras.optimizers.Adam(),           # Adam optimization is a stochastic gradient descent method that is based on adaptive estimation of first-order and second-order moments
              loss='sparse_categorical_crossentropy',           # Will show the lesses of our training
              metrics=['accuracy'])                             # List of metrics to be evaluated by the model during training and testing.

In [46]:
epocas = 32

history = model.fit(
    train,
    validation_data = validation,
    epochs = epocas # qtd de iterações q o algoritmo irá fazer
)

Epoch 1/32
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 797ms/step - accuracy: 0.3802 - loss: 87.2206 - val_accuracy: 0.3435 - val_loss: 26.9111
Epoch 2/32
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 654ms/step - accuracy: 0.3820 - loss: 43.3286 - val_accuracy: 0.4237 - val_loss: 82.4885
Epoch 3/32
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 616ms/step - accuracy: 0.3822 - loss: 59.9828 - val_accuracy: 0.3130 - val_loss: 22.9077
Epoch 4/32
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 641ms/step - accuracy: 0.4301 - loss: 27.3718 - val_accuracy: 0.4122 - val_loss: 33.8740
Epoch 5/32
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 624ms/step - accuracy: 0.4183 - loss: 41.1149 - val_accuracy: 0.4122 - val_loss: 34.5378
Epoch 6/32
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 613ms/step - accuracy: 0.4299 - loss: 48.2581 - val_accuracy: 0.4046 - val_loss: 23.5929
Epoch 7/32