# Gender classification using Convolutional Neural Network (ConvNet)

In [None]:
# Update the tensorflow
!pip install --upgrade tensorflow

# The tensorflow version in this project is : 2.7.0


### 1) Import the libraries

In [2]:
# import libraries
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
#from tensorflow import *
print(tf.__version__)
print("Done!")

2.7.0
Done!


### 2) Load the dataset from directories

In [4]:
###### 1) Upload dataset from directories
DATADIR = "data/train/"
CATEGORIES = ['female', 'male']

IMG_SIZE = 256
batch_size= 32 # Others batch sizes 64, 128, and 256.

ds_train_ = tf.keras.preprocessing.image_dataset_from_directory(DATADIR,
    labels="inferred",
    label_mode = "binary", # int or categorical
    class_names = CATEGORIES,
    #color_mode ="grayscale", # Uncomment thi to wirk in the gray scale
    batch_size = batch_size,
    image_size = (IMG_SIZE,IMG_SIZE), # reshape if not in this size
    shuffle = True,
    seed= 123, # to maintain the same data when spliyying the data
    validation_split = 0.2,
    subset = "training",
)


ds_valid_ = tf.keras.preprocessing.image_dataset_from_directory(DATADIR,
    labels="inferred",
    label_mode = "binary", # Other label modes are: int or categorical
    class_names = CATEGORIES,
    #color_mode ="grayscale", # Uncomment thi to wirk in the gray scale
    batch_size = batch_size,
    image_size = (IMG_SIZE,IMG_SIZE), # reshape if not in this size
    shuffle = True,
    seed= 123, # to maintain the same results for different runs and data when splitting the data 
    validation_split = 0.2,
    subset = "validation",
)

print("Done!")

Found 14000 files belonging to 2 classes.
Using 11200 files for training.
Found 14000 files belonging to 2 classes.
Using 2800 files for validation.
Done!


### 3) Preprocess the data

###### Configure the dataset for performance

Let's make sure to use buffered prefetching so you can yield data from disk without having I/O become blocking. These are two important methods you should use when loading data:

+ **Dataset.cache** keeps the images in memory after they're loaded off disk during the first epoch. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache.
+ **Dataset.prefetch** overlaps data preprocessing and model execution while training.

> ***For more details, please visit***  [Tensorflow tutorials: classificattion](https://www.tensorflow.org/tutorials/images/classification)

In [5]:
# Data Pipeline
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE # Search for this 
ds_train = (
    ds_train_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)
ds_valid = (
    ds_valid_
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

print("Done!")

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Done!


### 4) Create the model

The Sequential model consists of three basic convolution blocks (tf.keras.layers.Conv2D) with a max pooling layer (tf.keras.layers.MaxPooling2D) in each of them (the base) followed by a head of Dense layers. There's a fully-connected layer (tf.keras.layers.Dense) with 128 units on top of it that is activated by a ReLU activation function ('relu'). 

> *NP: This model has not been tuned for high accuracy.*


In [7]:
from tensorflow import keras
from tensorflow.keras import layers

IMG_SIZE = 256
batch_size= 32 # Others batch sizes 64, 128, and 256.

model = keras.Sequential([

    # First Convolutional Block
    layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same',
                  # give the input dimensions in the first layer
                  # [height, width, color channels(RGB)]
                  input_shape=[IMG_SIZE,IMG_SIZE, 3]), # Please recheck the number of channels in your input images
    layers.MaxPool2D(),

    # Second Convolutional Block
    layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Third Convolutional Block
    layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    layers.MaxPool2D(),

    # Classifier Head
    layers.Flatten(),
    layers.Dense(units=128, activation="relu"),
    layers.Dense(units=1, activation="sigmoid"),
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 32)      2432      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 64, 64, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 64, 64, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 32, 32, 128)      0

In [None]:
# Model with data augumentation

from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    layers.InputLayer(input_shape=[128, 128, 3]),
    
    # Data Augmentation
    preprocessing.RandomContrast(factor=0.10),
    preprocessing.RandomFlip(mode='horizontal'),
    preprocessing.RandomRotation(factor=0.10),
    
    #preprocessing.RandomContrast(factor=0.3),
   # preprocessing.RandomFlip(mode='horizontal'), # meaning, left-to-right
    #preprocessing.RandomFlip(mode='vertical'), # meaning, top-to-bottom
    #preprocessing.RandomWidth(factor=0.15), # horizontal stretch
   # preprocessing.RandomRotation(factor=0.20),
    #preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),

    # Block One
    layers.BatchNormalization(renorm=True),
    layers.Conv2D(filters=64, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Block Two
    layers.BatchNormalization(renorm=True),
    layers.Conv2D(filters=128, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Block Three
    layers.BatchNormalization(renorm=True),
    layers.Conv2D(filters=256, kernel_size=3, activation='relu', padding='same'),
    layers.Conv2D(filters=256, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),

    # Head
    layers.BatchNormalization(renorm=True),
    layers.Flatten(),
    layers.Dense(8, activation='relu'),
    layers.Dense(1, activation='sigmoid'),
])


# Compile and fit the model 
optimizer = tf.keras.optimizers.Adam(epsilon=0.01)
model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=50,
)

# Plot learning curves
import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

### 5) Compile the model

I choosed the tf.keras.optimizers.Adam optimizer and binary_crossentropy loss function. To view training and validation accuracy for each training epoch, pass the metrics argument to Model.compile.

In [10]:
# Compile and fit the model 
optimizer = tf.keras.optimizers.Adam(epsilon=0.01)
model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)

### 6) train the model 



In [None]:
history = model.fit(
    ds_train,
    validation_data=ds_valid,
    epochs=50,
)

Epoch 1/50
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Epoch 2/50
  4/350 [..............................] - ETA: 1:26:12 - loss: 0.5832 - binary_accuracy: 0.6484

### 7) Plot learning curves

In [None]:
import pandas as pd

history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();