<a href="https://colab.research.google.com/github/JanjaTomic/AgeGender/blob/main/agegender.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, Input, BatchNormalization
from PIL import Image
from keras.preprocessing.image import load_img, img_to_array
import zipfile

In [2]:
!pip install kaggle



In [3]:
from google.colab import files

uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [4]:
# Source and destination paths
source_path = '/content/kaggle.json'
destination_path = '/root/.kaggle/kaggle.json'

# Create the .kaggle directory if it doesn't exist
kaggle_dir = '/root/.kaggle/'
if not os.path.exists(kaggle_dir):
    os.makedirs(kaggle_dir)

# Move the kaggle.json file to /root/.kaggle/
os.rename(source_path, destination_path)

In [5]:
!kaggle datasets download -d jangedoo/utkface-new

Downloading utkface-new.zip to /content
100% 331M/331M [00:16<00:00, 23.0MB/s]
100% 331M/331M [00:16<00:00, 20.6MB/s]


In [6]:
with zipfile.ZipFile('/content/utkface-new.zip', 'r') as zip_ref:
  zip_ref.extractall('/content')

In [18]:
## Function returns paths to images and their associated age and gender labels
def get_image_paths_age_gender_labels(image_directory):
    image_paths = []
    age_labels = []
    gender_labels = []
    for filename in tqdm(os.listdir(image_directory )):
        image_path = os.path.join(image_directory,filename)
        temporary_data = filename.split("_")
        age = int(temporary_data[0])
        gender = int(temporary_data[1])
        image_paths.append(image_path)
        age_labels.append(age)
        gender_labels.append(gender)
    return image_paths, age_labels, gender_labels

### EDA ###

## Function takes the first 16 images of the dataset and displays them
def show_examples(df, gender_dictionary, start_image_index):
    number_of_images = 16
    plt.figure(figsize=(number_of_images,number_of_images))
    files = df.iloc[start_image_index:start_image_index + number_of_images]
    i = 0
    for index, image, age, gender in files.itertuples():
        i+=1
        plt.subplot(4,4, i)
        var_image = load_img(image)
        var_image = np.array(var_image)
        plt.imshow(var_image)
        plt.title(f"Age: {age} Gender: {gender_dictionary[gender]}")
        plt.axis("off")
    plt.show()

# Plotting bar graph for gender
def plot_gender(df):
    plt.figure(figsize=(10, 5))
    df['gender'].value_counts().plot(kind='bar', color=['blue', 'pink'])
    plt.title('Gender Distribution')
    plt.xlabel('Gender')
    plt.ylabel('Count')
    plt.show()

# Plotting age histogram
def plot_age(df):
    plt.figure(figsize=(15, 5))
    df['age'].plot(kind='hist', bins=20, edgecolor='black')
    plt.title('Age Distribution')
    plt.xlabel('Age')
    plt.ylabel('Count')
    plt.show()

# Extracting features from the images
def extract_features(images, height, width):
    features = []
    for image in tqdm(images):
        var_img = load_img(image, grayscale = True)
        var_img = var_img.resize((height,width), Image.ANTIALIAS)
        var_img = np.array(var_img)
        ## Normalization of images
        var_img = var_img / 255.0
        features.append(var_img)
    features = np.array(features)
    features = features.reshape(len(features), height, width, 1)
    ## from float64 to float32
    features = np.array(features, dtype=np.float32)
    return features

###STARA

# Creating a custom model
def create_model(input_shape):
    inputs = Input(input_shape)
    conv_1 = Conv2D(32, kernel_size=(3,3), activation='relu')(inputs)
    maxp_1 = MaxPooling2D(pool_size=(2,2))(conv_1)
    conv_2 = Conv2D(64, kernel_size=(3,3), activation='relu')(maxp_1)
    maxp_2 = MaxPooling2D(pool_size=(2,2))(conv_2)
    conv_3 = Conv2D(128, kernel_size=(3,3), activation='relu')(maxp_2)
    maxp_3 = MaxPooling2D(pool_size=(2,2))(conv_3)
    conv_4 = Conv2D(256, kernel_size=(3,3), activation='relu')(maxp_3)
    maxp_4 = MaxPooling2D(pool_size=(2,2))(conv_4)

    flatten = Flatten()(maxp_4)

    dense_1 = Dense(1024, activation='relu')(flatten)
    dense_2 = Dense(512, activation='relu')(flatten)

    dropout_1 = Dropout(0.3)(dense_1)
    dropout_2 = Dropout(0.3)(dense_2)

    output_1 = Dense(1, activation='sigmoid', name="gender_out")(dropout_1)
    output_2 = Dense(1, activation='relu', name="age_out")(dropout_2)

    model = Model(inputs=[inputs], outputs=[output_1, output_2])
    return model



##DOBAR GENDER, JAKO JAKO  LOŠ AGE
"""
def create_model(input_shape):
    inputs = Input(input_shape)

    conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
    conv_1 = BatchNormalization()(conv_1)

    conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(conv_1)
    conv_2 = BatchNormalization()(conv_2)
    maxp_1 = MaxPooling2D(pool_size=(2, 2))(conv_2)

    conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu')(maxp_1)
    conv_3 = BatchNormalization()(conv_3)

    conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu')(conv_3)
    conv_4 = BatchNormalization()(conv_4)
    maxp_2 = MaxPooling2D(pool_size=(2, 2))(conv_4)

    conv_5 = Conv2D(512, kernel_size=(3, 3), activation='relu')(maxp_2)
    conv_5 = BatchNormalization()(conv_5)

    flatten = Flatten()(conv_5)

    dense_1 = Dense(1024, activation='relu')(flatten)
    dropout_1 = Dropout(0.5)(dense_1)

    dense_2 = Dense(512, activation='relu')(dropout_1)
    dropout_2 = Dropout(0.5)(dense_2)

    output_1 = Dense(1, activation='sigmoid', name="gender_out")(dropout_1)
    output_2 = Dense(1, activation='relu', name="age_out")(dropout_2)

    model = Model(inputs=[inputs], outputs=[output_1, output_2])
    return model
"""

## 65% I 3%
"""
def create_model(input_shape):
    inputs = Input(input_shape)

    conv_1 = Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same')(inputs)
    conv_1 = BatchNormalization()(conv_1)

    conv_2 = Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same')(conv_1)
    conv_2 = BatchNormalization()(conv_2)
    maxp_1 = MaxPooling2D(pool_size=(2, 2))(conv_2)

    conv_3 = Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same')(maxp_1)
    conv_3 = BatchNormalization()(conv_3)

    conv_4 = Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same')(conv_3)
    conv_4 = BatchNormalization()(conv_4)
    maxp_2 = MaxPooling2D(pool_size=(2, 2))(conv_4)

    conv_5 = Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same')(maxp_2)
    conv_5 = BatchNormalization()(conv_5)
    maxp_3 = MaxPooling2D(pool_size=(2, 2))(conv_5)

    conv_6 = Conv2D(1024, kernel_size=(3, 3), activation='relu', padding='same')(maxp_3)
    conv_6 = BatchNormalization()(conv_6)
    maxp_4 = MaxPooling2D(pool_size=(2, 2))(conv_6)

    # Flatten and Dense Layers
    flatten = Flatten()(maxp_4)

    dense_1 = Dense(1024, activation='relu')(flatten)
    dropout_1 = Dropout(0.5)(dense_1)

    dense_2 = Dense(512, activation='relu')(dropout_1)
    dropout_2 = Dropout(0.5)(dense_2)

    # Output Layers
    output_1 = Dense(1, activation='sigmoid', name="gender_out")(dropout_1)
    output_2 = Dense(1, activation='relu', name="age_out")(dropout_2)

    # Creating the Model
    model = Model(inputs=[inputs], outputs=[output_1, output_2])
    return model
"""



def display_acc_graph(history):
    acc = history.history["gender_out_accuracy"]
    val_acc = history.history["val_gender_out_accuracy"]
    epochs = range(len(acc))
    plt.plot(epochs, acc, 'b', label="Training Accuracy")
    plt.plot(epochs, val_acc, 'r', label="Validation Accuracy")
    plt.title("Accuracy Graph")
    plt.legend()
    plt.figure()

def display_loss_graph(history):
    loss = history.history["gender_out_loss"]
    val_loss = history.history["val_gender_out_loss"]
    epochs = range(len(loss))
    plt.plot(epochs, loss, 'b', label="Training Loss")
    plt.plot(epochs, val_loss, 'r', label="Validation Loss")
    plt.title("Loss Graph")
    plt.legend()
    plt.show()

In [19]:
## Set image Directory
image_directory = "/content/UTKFace"

image_paths, age_labels, gender_labels = get_image_paths_age_gender_labels(image_directory)

## Set Dictionary
gender_dictionary = {0:"Male",1:"Female"}

## Structure of df: INDEX, PICTURE FILEPATH, AGE LABEL, GENDER LABEL
df = pd.DataFrame()
df["image"], df["age"], df["gender"] = image_paths, age_labels, gender_labels
##print(df)

  0%|          | 0/23708 [00:00<?, ?it/s]

In [None]:
### EDA ###

##show_examples(df, gender_dictionary, 500)

##plot_age(df)

##plot_gender(df)

In [20]:
## Worth trying changing the height and width to a smaller size and upping float32 to 64
## back in the extract features function
image_height = 124
image_width = 124

In [None]:
X = extract_features(df["image"],image_height,image_width)
## Normalization not used due to it being done in the function
## X = X/255.0

y_gender = np.array(df["gender"])
y_age = np.array(df["age"])

  0%|          | 0/23708 [00:00<?, ?it/s]

In [17]:
input_shape = (image_height,image_width,1)

model = create_model(input_shape)

model.compile(loss=["binary_crossentropy", "mae"], optimizer="adam", metrics=["accuracy"])

history = model.fit(x=X, y=[y_gender, y_age], batch_size=32, epochs=5, validation_split=0.2)

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [None]:
acc = history.history["gender_out_accuracy"]
val_acc = history.history["val_gender_out_accuracy"]

display_acc_graph(history)

display_loss_graph(history)