# Required Paths and Libraries

In [None]:
from google.colab import drive
drive.mount('/content/drive') 

Mounted at /content/drive


In [None]:
# Unzip the files from google drive...
#!unzip "/content/drive/MyDrive/Data-BFW/bfw-faces-noncrop.zip" -d "/content/drive/MyDrive/images/"

In [1]:
# Importing the primary libraries.
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
import cv2
from random import shuffle
from tqdm import tqdm
from google.colab.patches import cv2_imshow

In [2]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential

from google.colab.patches import cv2_imshow
from sklearn import model_selection

# Data Prep - 00(CSV)

In [None]:
df = pd.DataFrame(columns = ['Path', 'Gender'])
print(df)

Empty DataFrame
Columns: [Path, Gender]
Index: []


In [None]:
for root, dirs, files in os.walk("/content/drive/MyDrive/images/"):
  for name in files:
    path = os.path.join(root, name)
    #print(path)
    #print(name)
    lst = path.split('/')
    lst1 = lst[9]
    lst2 = lst1.split('_')
    lst3 = lst2[1]
    if path.endswith('.jpg'):
        if lst3 == 'males':
            gender = int(1)
            df = df.append({'Path': path, 'Gender': gender}, ignore_index=True)
        elif lst3 == 'females':
            gender = int(0)
            df = df.append({'Path': path, 'Gender': gender}, ignore_index=True)

In [None]:
df.head(2)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Path    20000 non-null  object
 1   Gender  20000 non-null  object
dtypes: object(2)
memory usage: 312.6+ KB


In [None]:
df1 = df.drop('Path', 1)

df1 = df1.astype(float)

In [None]:
df = pd.concat([df['Path'],df1], axis=1)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Path    20000 non-null  object 
 1   Gender  20000 non-null  float64
dtypes: float64(1), object(1)
memory usage: 312.6+ KB


In [None]:
df.isna().sum()

Path      0
Gender    0
dtype: int64

In [None]:
df.to_csv('bfw_data_gender_final.csv')

# Data Prep - 01 (From CSV)

In [None]:
# Read the CSV.
daf = pd.read_csv('/content/bfw_data_gender_final.csv',
                        usecols=['Path', 'Gender']).values

In [None]:
daf

In [None]:
def my_label(path):
  lst = path.split('/')
  lst1 = lst[9]
  lst2 = lst1.split('_')
  lst3 = lst2[1]
  if path.endswith('.jpg'):
      if lst3 == 'males':
          gender = int(1)
          return gender
      elif lst3 == 'females':
          gender = int(0)
          return gender

In [None]:
X = []  # the features, or inputs
y = []  # the labels, or outputs


for row in daf:
    image_name = row[0] 
    image_smoker = row[1]
    # resizing our original images to 224x224, and turning them into numpy arrays
    image = load_img( image_name, target_size=(224,224,3))
    input_arr = img_to_array(image)
    input_arr = np.array(input_arr).reshape(-1)
    X.append(input_arr)
    y.append(my_label(image_name))

# Data Prep - 02(Direct from path) For **ML Model**

In [3]:
def my_label(path):          # This function is based on my directory.
  lst = path.split('/')
  lst1 = lst[9]
  lst2 = lst1.split('_')
  lst3 = lst2[1]
  if path.endswith('.jpg'):
      if lst3 == 'males':
          gender = int(1)
          return gender
      elif lst3 == 'females':
          gender = int(0)
          return gender

In [None]:
data = []  # empty list to store the image.
i = 0

# Considering only --- (1. asian females, 2.asian males) # We don't


for root, dirs, files in os.walk('/content/drive/MyDrive/images/'): # Path to the images.
    for name in files:                  
        path = os.path.join(root, name)
        img = cv2.imread(path) 
        if img is not None: 
          img = cv2.resize(img, (224, 224))
          img = np.array(img).reshape(-1)
          #print(img.shape)
          data.append([np.array(img), my_label(path)])  # Storing the images as an array with their labels in a list
                                                        # using the "my_label" function (defined on upper cell).
          i = i+1
          print(i)
          print(path)

In [5]:
len(data)

5000

In [6]:
# Shuffling the data randomly for training.
shuffle(data)

In [7]:
# Total 20000 data. (80% training data, 20% testing data)
train_data = data[:4500]
test_data = data[4500:]

# training data.

x_train = np.array([i[0] for i in train_data]) 
print(x_train.shape)
y_train = np.array([i[1] for i in train_data])

# testing data.

x_test = np.array([i[0] for i in test_data])   
print(x_test.shape)
y_test = np.array([i[1] for i in test_data])

(4500, 150528)
(500, 150528)


In [8]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((4500, 150528), (4500,), (500, 150528), (500,))

# Simple ML Approach

In [9]:
# Using Normalization.

# normalizing the image array inputs
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

In [10]:
x_train.shape, x_test.shape

((4500, 150528), (500, 150528))

## Random Forest(C)

In [None]:
# importing random forest classifier from ensemble module.
from sklearn.ensemble import RandomForestClassifier

In [None]:
model = RandomForestClassifier(n_estimators=100, 
                               bootstrap = True)

In [None]:
# Fit on training data
model.fit(x_train, y_train)

RandomForestClassifier()

In [None]:
# Performing predictions on the test dataset
y_pred = model.predict(x_test)

In [None]:
# metrics are used to find accuracy or error
from sklearn import metrics  
  
# using metrics module for accuracy calculation
print("ACCURACY OF THE MODEL: ", metrics.accuracy_score(y_test, y_pred))

ACCURACY OF THE MODEL:  0.7


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

print("=== Confusion Matrix ===")
print(confusion_matrix(y_test, y_pred))
print('\n')

print("=== Classification Report ===")
print(classification_report(y_test, y_pred))
print('\n')

=== Confusion Matrix ===
[[176  72]
 [ 78 174]]


=== Classification Report ===
              precision    recall  f1-score   support

           0       0.69      0.71      0.70       248
           1       0.71      0.69      0.70       252

    accuracy                           0.70       500
   macro avg       0.70      0.70      0.70       500
weighted avg       0.70      0.70      0.70       500





In [None]:
#from sklearn.externals import joblib
import joblib
smoke_model = 'Simple_GENDER_detector_model_Random_Forest'
joblib.dump(model, smoke_model)

['Simple_GENDER_detector_model_Random_Forest']

## SVM(C)

In [None]:
x_train.shape, x_test.shape

((4500, 150528), (500, 150528))

In [None]:
#Import svm model
from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(kernel='rbf') # Linear Kernel

#Train the model using the training sets
clf.fit(x_train, y_train)

In [None]:
y_pred = clf.predict(x_test)

In [None]:
from sklearn import metrics

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

print("=== Confusion Matrix ===")
print(confusion_matrix(y_test, y_pred))
print('\n')

print("=== Classification Report ===")
print(classification_report(y_test, y_pred))
print('\n')

In [None]:
print("Precision:",metrics.precision_score(y_test, y_pred))

print("Recall:",metrics.recall_score(y_test, y_pred))

In [None]:
import joblib
smoke_model = 'Simple_Gender_detector_model_SVM'
joblib.dump(clf, smoke_model)

## XGBoost(C)

In [11]:
import xgboost as xgb
from xgboost import XGBClassifier

In [12]:
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')

In [None]:
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
from sklearn import metrics

accuracy = metrics.accuracy_score(y_test, y_pred)
accuracy

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

print("=== Confusion Matrix ===")
print(confusion_matrix(y_test, y_pred))
print('\n')

print("=== Classification Report ===")
print(classification_report(y_test, y_pred))
print('\n')

In [None]:
print("Precision:",metrics.precision_score(y_test, y_pred))

print("Recall:",metrics.recall_score(y_test, y_pred))

In [None]:
from sklearn.externals import joblib
smoke_model = 'Simple_Gender_detector_model_XGBoost'
joblib.dump(model, smoke_model)