In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'gender-classification-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F446365%2F844929%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240217%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240217T133522Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D361bdbc6315d829610576d74731efb0ce37bddbcd1036c07afa44a77ce9e163dd8768b0bd5bedf5354e3a179faa9bd117a19b72e3adf36602866f03584fb02b6ae964804e663adf749ac299103ed5592ff69b23b8a25426585c196099d9d1db8f9510b9c1f5e8107a6c9851031d26e111da47d95fd742a94b69fec942360e26e92fc0bcb02c50bb88f11c9bf4f0cbcb7a30a2b1ffb5f7cd4962fb663627f962921a496dc1f666d5f462d75029d176c45842554ef00da35896e9799422fe7180ff6b8f503f9d37249176af16e6d16d352fbcbb559462e05356c12ab49405ebe0c905079b06b7987a6c66f7c625ec0f07392fafe0f8edd835eab53c6749a0548ef,resnet50:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F504915%2F936546%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240217%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240217T133522Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D0a69c62f2ec7b863b9382edbb17ed6a59290827def295292aa4d2d1b28fa371d209859b85dfd4acd15517bd3328de490db9a132db0a01a2aa9d096b914192d83848f46a412c1585c75097f2741d2580d42a8bcbf9442410bc1bc4ae08b6e5365cfaeea18c84741a41d3415240507a424b7cddb7282357e3f04551af92476c0e67112c2df61fcfef0d7ccc69461387425243326a8cab454efa38f1da11e68138007a7c8c6477c5f1849854816db25bd7feaab20781a7cc2ae74d89c38c966a73e2647838b05586cebb997cd31e53cab14f765a31ff3cb06281cdc3b1ca6e9c5d5516f6a69eaa64432984604c7f480d5a39d0b30f5bea88f00c21f9ec6155d174f'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading gender-classification-dataset, 282512091 bytes compressed
Downloaded and uncompressed: gender-classification-dataset
Downloading resnet50, 87541466 bytes compressed
Downloaded and uncompressed: resnet50
Data source import complete.


In [2]:
# do the necessary imports

import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.preprocessing.image import img_to_array
from keras.utils import to_categorical

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import random
import cv2
import os
from PIL import Image

from keras.layers import *
from keras.models import *
import keras


In [3]:
# Hyper - parameters

epochs = 100
lr = 1e-3
batch_size = 64
#img_dims = (96,96,3)

data = []
labels = []

In [4]:
size = 224

In [5]:
from keras.preprocessing.image import ImageDataGenerator

**RESNET 50**

In [6]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array

resnet_weights_path = '../input/resnet50/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'


In [7]:
# CALLBACKS

from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau

es = EarlyStopping(patience=5, monitor = 'val_accuracy')
rlp = ReduceLROnPlateau(patience=5, monitor = 'val_accuracy')

callbacks = [es, rlp]

In [8]:
train_datagen = ImageDataGenerator(horizontal_flip=True,
                                   width_shift_range = 0.4,
                                   height_shift_range = 0.4,
                                   zoom_range=0.3,
                                   rotation_range=20,
                                   rescale = 1./255
                                   )

test_gen = ImageDataGenerator(rescale = 1./255)

image_size = 224
batch_size = 64

train_generator = train_datagen.flow_from_directory(
        '../input/gender-classification-dataset/Training',
        target_size=(image_size, image_size),
        batch_size=batch_size,
        class_mode='binary')

validation_generator = test_gen.flow_from_directory(
    '../input/gender-classification-dataset/Validation',
    target_size = (image_size, image_size),
    batch_size = batch_size,
    class_mode = 'binary'
)

num_classes = len(train_generator.class_indices)
print('Numer of classes:' ,num_classes)
print('Class labels: ', train_generator.class_indices)




"""
train_datagen = ImageDataGenerator(
    rescale = 1./255,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True
)

test_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = train_datagen.flow_from_directory(
    train_path,
    target_size = (size, size),
    batch_size = batch_size,
    class_mode = 'binary'
)

validation_generator = test_datagen.flow_from_directory(
    valid_path,
    target_size = (size, size),
    batch_size = batch_size,
    class_mode = 'binary'
)

"""

Found 47009 images belonging to 2 classes.
Found 11649 images belonging to 2 classes.
Numer of classes: 2
Class labels:  {'female': 0, 'male': 1}


"\ntrain_datagen = ImageDataGenerator(\n    rescale = 1./255, \n    shear_range = 0.2, \n    zoom_range = 0.2,\n    horizontal_flip = True\n)\n\ntest_datagen = ImageDataGenerator(rescale = 1./255)\n\ntrain_generator = train_datagen.flow_from_directory(\n    train_path, \n    target_size = (size, size),\n    batch_size = batch_size,\n    class_mode = 'binary'\n)\n\nvalidation_generator = test_datagen.flow_from_directory(\n    valid_path,\n    target_size = (size, size),\n    batch_size = batch_size, \n    class_mode = 'binary'\n)\n\n"

In [9]:
model = Sequential()

model.add(ResNet50(include_top=False, pooling='avg', weights='imagenet'))
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(2048, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(1024, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(1, activation='sigmoid'))

model.layers[0].trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resnet50 (Functional)       (None, 2048)              23587712  
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 batch_normalization (Batch  (None, 2048)              8192      
 Normalization)                                                  
                                                                 
 dense (Dense)               (None, 2048)              4196352   
                                                                 
 batch_normalization_1 (Bat  (None, 2048)              8192      
 chNormalization)                                                
                                                                 
 dense_1 (Dense)             (None, 1024)              2

In [11]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model.fit_generator(train_generator, steps_per_epoch = int(47000/64) + 1  , epochs = 30, validation_data = validation_generator, callbacks = callbacks)

  model.fit_generator(train_generator, steps_per_epoch = int(47000/64) + 1  , epochs = 30, validation_data = validation_generator, callbacks = callbacks)


Epoch 1/30
  8/735 [..............................] - ETA: 2:39:27 - loss: 2.3650 - accuracy: 0.5059

In [None]:
model.save('model3.h5')

w/o pretrained weights

In [None]:
model2 = Sequential()

model2.add(ResNet50(include_top=False, pooling='avg', weights=None))
model2.add(Flatten())
model2.add(BatchNormalization())
model2.add(Dense(2048, activation='relu'))
model2.add(BatchNormalization())
model2.add(Dense(1024, activation='relu'))
model2.add(BatchNormalization())
model2.add(Dense(1, activation='sigmoid'))

model2.layers[0].trainable = True

In [None]:
model2.summary()

In [None]:
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model2.fit_generator(train_generator, steps_per_epoch = int(47000/64) + 1  , epochs = 50, validation_data = validation_generator, callbacks = callbacks)

In [None]:
model2.save('model4.h5')