### ***GENDER CLASSIFICATION on CelebA***
* The Dataset used is [CelebFaces Attributes (CelebA) Dataset](https://www.kaggle.com/datasets/jessicali9530/celeba-dataset), uploaded by Jessica Li on Kaggle.



### IMPORTS

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import random
import os
import time
os.environ['TF_CPP_MIN_LOG_LEVEL']='2' 
import tensorflow as tf
from tensorflow.keras import Model, Input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator as Idg, img_to_array
from tensorflow.keras.models import Sequential, Model 
from tensorflow.keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications.inception_v3 import InceptionV3,preprocess_input 
from tensorflow.keras.optimizers import RMSprop
from shutil import copy
warnings.filterwarnings("ignore")

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

### LOADING THE DATA

In [4]:
path = '../input/celeba-dataset/img_align_celeba/img_align_celeba'
print(f"Size of data: {len(os.listdir(path))}")

### DATA VISUALIZATION

In [14]:
fig, axes = plt.subplots(nrows=2,ncols=5,figsize=(14,8))
img_path = [os.path.join(path, img) for img in np.random.choice(os.listdir(path),size=10)]

for i,img in enumerate(img_path):
    axes[i//5][i%5].imshow(image.load_img(img))
fig.suptitle("10 Random Images from the Dataset")
fig.show()

In [18]:
df = pd.read_csv('../input/celeba-dataset/list_attr_celeba.csv')
partition = pd.read_csv('../input/celeba-dataset/list_eval_partition.csv')

In [19]:
df.head()

In [20]:
partition.head()

In [23]:
# GENDER DISTRIBUTION
fig, axes = plt.subplots(figsize=(12,5))
sns.countplot(y='Male',data=df,ax=axes)
axes.set_yticklabels(['female', 'male'])
plt.ylabel('Gender')
fig.suptitle('Gender ratio')
fig.show()

In [24]:
try:
    os.mkdir('./img_align_celeba')
    os.mkdir(os.path.join('./img_align_celeba','train'))
    os.mkdir(os.path.join('./img_align_celeba','validate'))
    os.mkdir(os.path.join('./img_align_celeba','test'))
    os.mkdir(os.path.join('./img_align_celeba','train','Male'))
    os.mkdir(os.path.join('./img_align_celeba','train','Female'))
    os.mkdir(os.path.join('./img_align_celeba','validate','Male'))
    os.mkdir(os.path.join('./img_align_celeba','validate','Female'))
    os.mkdir(os.path.join('./img_align_celeba','test','Male'))
    os.mkdir(os.path.join('./img_align_celeba','test','Female'))
except OSError as error:
    print(error)

### CREATION of TRAIN TEST VALIDATION Based on GENDER

In [25]:
maleTrain,femaleTrain = np.random.choice(df[df['Male'] == 1].loc[:162770,'image_id'],size=15000),\
                np.random.choice(df[df['Male'] == -1].loc[:162770,'image_id'],size=15000)
maleVal,femaleVal = np.random.choice(df[df['Male'] == 1].loc[162770:182637,'image_id'],size=3000),\
                np.random.choice(df[df['Male'] == -1].loc[162770:182637,'image_id'],size=3000)
maleTest,femaleTest = np.random.choice(df[df['Male'] == 1].loc[182637:202599,'image_id'],size=3000),\
                np.random.choice(df[df['Male'] == -1].loc[182637:202599,'image_id'],size=3000)

In [26]:
len(maleTrain), len(femaleTrain), len(maleVal), len(femaleVal), len(maleTest), len(maleTest)

In [27]:
def splitData(srcDir,desDir,data,size):
    for id in data:
        imgPath = os.path.join(srcDir,id)
        try:
            image.load_img(imgPath)
            copy(imgPath,os.path.join(desDir,id))
            if len(os.listdir(desDir)) == size:
                break
        except:
            print(f'{id} File not valid')
    print(f'{size} Copied successfully!')

In [28]:
maleTrainDir,femaleTrainDir = os.path.join('./img_align_celeba','train','Male'),\
                                os.path.join('./img_align_celeba','train','Female')
maleValDir,femaleValDir = os.path.join('./img_align_celeba','validate','Male'),\
                                os.path.join('./img_align_celeba','validate','Female')
maleTestDir,femaleTestDir = os.path.join('./img_align_celeba','test','Male'),\
                                os.path.join('./img_align_celeba','test','Female')
print('Copying files to directory...')
splitData(path,maleTrainDir,maleTrain,10000)
splitData(path,femaleTrainDir,femaleTrain,10000)
splitData(path,maleValDir,maleVal,2500)
splitData(path,femaleValDir,femaleVal,2500)
splitData(path,maleTestDir,maleTest,2500)
splitData(path,femaleTestDir,femaleTest,2500)

In [38]:
fig, axes = plt.subplots(nrows=2,ncols=5,figsize=(14,8))
img_path = [os.path.join(maleTrainDir, img) for img in np.random.choice(os.listdir(maleTrainDir),size=10)]

for i,img in enumerate(img_path):
    axes[i//5][i%5].imshow(image.load_img(img))
fig.suptitle("10 Random images from Male Training Data")
fig.show()

In [39]:
fig, axes = plt.subplots(nrows=2,ncols=5,figsize=(14,8))
img_path = [os.path.join(femaleTrainDir, img) for img in np.random.choice(os.listdir(femaleTrainDir),size=10)]

for i,img in enumerate(img_path):
    axes[i//5][i%5].imshow(image.load_img(img))
fig.suptitle("10 random images from Female Training Data")
fig.show()

In [41]:
fig, axes = plt.subplots(nrows=2,ncols=5,figsize=(14,8))
img_path = [os.path.join(maleValDir, img) for img in np.random.choice(os.listdir(maleValDir),size=10)]

for i,img in enumerate(img_path):
    axes[i//5][i%5].imshow(image.load_img(img))
fig.suptitle("10 Random Images from Male Validation Data")
fig.show()

In [44]:
fig, axes = plt.subplots(nrows=2,ncols=5,figsize=(14,8))
img_path = [os.path.join(femaleValDir, img) for img in np.random.choice(os.listdir(femaleValDir),size=10)]

for i,img in enumerate(img_path):
    axes[i//5][i%5].imshow(image.load_img(img))
fig.suptitle("10 Random Images from Female Validation Data")
fig.show()

In [47]:
fig, axes = plt.subplots(nrows=2,ncols=5,figsize=(14,8))
img_path = [os.path.join(maleTestDir, img) for img in np.random.choice(os.listdir(maleTestDir),size=10)]

for i,img in enumerate(img_path):
    axes[i//5][i%5].imshow(image.load_img(img))
fig.suptitle("10 Random Images from Male Testing Data")
fig.show()

In [48]:
fig, axes = plt.subplots(nrows=2,ncols=5,figsize=(14,8))
img_path = [os.path.join(femaleTestDir, img) for img in np.random.choice(os.listdir(femaleTestDir),size=10)]

for i,img in enumerate(img_path):
    axes[i//5][i%5].imshow(image.load_img(img))
fig.suptitle("10 Random Images from Female Testing Data")
fig.show()

### DATA AUGUMENTATION

In [50]:
dataGen = Idg(
              rescale=1/255.0,width_shift_range=0.2,
              height_shift_range=0.2,rotation_range=20,
              shear_range=0.2,zoom_range=0.2,
              horizontal_flip=True,fill_mode='nearest'
            )
trainDataGen = Idg(
              preprocessing_function=preprocess_input,
              width_shift_range=0.2,height_shift_range=0.2,
              rotation_range=30,shear_range=0.2,
              zoom_range=0.2,horizontal_flip=True
            )
valDataGen = Idg(
              preprocessing_function=preprocess_input,
            )
testDataFen = Idg(
              preprocessing_function=preprocess_input,
            )

In [54]:
img = image.load_img('../input/celeba-dataset/img_align_celeba/img_align_celeba/000042.jpg')
x = img_to_array(img)
x = x.reshape((1,)+x.shape)
fig, axes = plt.subplots(3,5,figsize=(28,14))
for i,batch in enumerate(dataGen.flow(x, batch_size=1)):
    if i//5 == 3:
        break
    axes[i//5][i%5].imshow(batch.reshape(218, 178, 3))
fig.suptitle('Data Augmentation Sample')
fig.show()

In [55]:
train = trainDataGen.flow_from_directory(
                './img_align_celeba/train',
                target_size=(150, 150), batch_size=256,
               )
val = valDataGen.flow_from_directory(
               './img_align_celeba/validate',
               target_size=(150, 150), batch_size=64,
              )
test = testDataFen.flow_from_directory(
               './img_align_celeba/test',
               target_size=(150, 150), batch_size=64,
              )

### MODEL CREATION

In [57]:
inputs = Input(shape=(150, 150, 3))
baseModel = InceptionV3(include_top=False,weights='imagenet',input_shape=(150,150,3))
baseModel.trainable = False
print('Adding custom layers...')
x = baseModel(inputs,training=False)
x = GlobalAveragePooling2D()(x)
x = Dense(1024,activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512,activation='relu')(x)
output = Dense(2,activation='softmax')(x)
model = Model(inputs=inputs,outputs=output)
print('Model Created!')

In [58]:
print('Model Summary:-')
model.summary()

In [59]:
print('Compiling model...')
model.compile(optimizer=RMSprop(learning_rate=0.01),
              loss='categorical_crossentropy',metrics=['accuracy'])

### MODEL TRAINING

In [None]:

callbacks = [ModelCheckpoint(filepath='weights.best.inc.male.hdf5', 
                               verbose=1, save_best_only=True),\
             EarlyStopping(mode='min', patience=5,min_delta=1e-3)]

t = time.process_time()
print('Training Started...')
history = model.fit(train,
                    validation_data=val,
                    epochs = 50,
                    callbacks = callbacks,
                    verbose = 1
                )
elaspedTime = time.process_time() - t

In [None]:
print(f'Time Taken for training: {elaspedTime} sec')

### MODEL EVALUATION

In [None]:
fig, axes = plt.subplots(1,2,figsize=(14,8))
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs = range(len(acc))
sns.lineplot(x=epochs,y=acc,linestyle='--',ax=axes[0])
sns.lineplot(x=epochs,y=val_acc,linestyle='-.',ax=axes[0])
sns.lineplot(x=epochs,y=loss,linestyle='--',ax=axes[1])
sns.lineplot(x=epochs,y=val_loss,linestyle='-.',ax=axes[1])
axes[0].legend(['Training Acc', 'Validation Acc'])
axes[1].legend(['Training Loss', 'Validation Loss'])
axes[1].set_ylim([0, 1])
fig.suptitle('Training Metrics')
fig.show()

In [None]:
results = [model.evaluate(train, batch_size=256),model.evaluate(val, batch_size=256),model.evaluate(test, batch_size=256)]
print(f'Train: loss = {results[0][0]}, accuracy = {results[0][1]*100:.2f}%')
print(f'Val: loss = {results[1][0]}, accuracy = {results[1][1]*100:.2f}%')
print(f'Test: loss = {results[2][0]}, accuracy = {results[2][1]*100:.2f}%')