# Imports

In [1]:
import os
import pandas as pd
import numpy as np

In [22]:
import tensorflow
from tensorflow import keras
from tensorflow.keras import Model
from tensorflow.keras.layers import *
from tensorflow.keras.applications.vgg16 import VGG16

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.optimizers import *

# Import Dataset

#### Local

In [26]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

!pip install kaggle kagglehub

!kaggle datasets download -d jangedoo/utkface-new

import zipfile
zip_ref = zipfile.ZipFile('utkface-new.zip', 'r')
zip_ref.extractall('.')
zip_ref.close()

!rm -rf 'utkface-new.zip'

The syntax of the command is incorrect.
'cp' is not recognized as an internal or external command,
operable program or batch file.


Dataset URL: https://www.kaggle.com/datasets/jangedoo/utkface-new
License(s): copyright-authors


'rm' is not recognized as an internal or external command,
operable program or batch file.


#### Colab

# Prepare dataset for model training and testing

The images are named as age, gender, ethnicity

In [3]:
folder_path = 'data_problem3'

In [4]:
df = pd.DataFrame(columns=['file_name', 'age', 'gender'])
df.head()

Unnamed: 0,file_name,age,gender


In [5]:
i = 0
for file in os.listdir(folder_path):
    print(file)
    i += 1
    if i > 5:
        break

100_1_0_20170110183726390.jpg.chip.jpg
100_1_2_20170105174847679.jpg.chip.jpg
101_1_2_20170105174739309.jpg.chip.jpg
10_0_0_20161220222308131.jpg.chip.jpg
10_0_0_20170103200329407.jpg.chip.jpg
10_0_0_20170103200522151.jpg.chip.jpg


In [6]:
for file in os.listdir(folder_path):
    
    if not(file.endswith('.jpg')):
        continue

    parts = file.split('_')
    age = parts[0]
    gender = parts[1]
    df = pd.concat([df, pd.DataFrame({'file_name': [file], 'age': [age], 'gender': [gender]})], ignore_index=True)

In [7]:
df['age'] = df['age'].astype(int)
df['gender'] = df['gender'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9780 entries, 0 to 9779
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  9780 non-null   object
 1   age        9780 non-null   int64 
 2   gender     9780 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 229.3+ KB


In [8]:
df.head()

Unnamed: 0,file_name,age,gender
0,100_1_0_20170110183726390.jpg.chip.jpg,100,1
1,100_1_2_20170105174847679.jpg.chip.jpg,100,1
2,101_1_2_20170105174739309.jpg.chip.jpg,101,1
3,10_0_0_20161220222308131.jpg.chip.jpg,10,0
4,10_0_0_20170103200329407.jpg.chip.jpg,10,0


In [9]:
df.to_csv('data_problem3.csv')

In [10]:
df.head()

Unnamed: 0,file_name,age,gender
0,100_1_0_20170110183726390.jpg.chip.jpg,100,1
1,100_1_2_20170105174847679.jpg.chip.jpg,100,1
2,101_1_2_20170105174739309.jpg.chip.jpg,101,1
3,10_0_0_20161220222308131.jpg.chip.jpg,10,0
4,10_0_0_20170103200329407.jpg.chip.jpg,10,0


In [11]:
df.shape

(9780, 3)

In [12]:
train_df = df.sample(frac=1,random_state=0).iloc[:7000]
test_df = df.sample(frac=1,random_state=0).iloc[7000:]

In [13]:
train_df.shape

(7000, 3)

In [14]:
test_df.shape

(2780, 3)

In [15]:
train_df.to_csv('train_data.csv')
test_df.to_csv('test_data.csv')

# Generate Data for Training

In [16]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=30,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [19]:
train_dataset = train_datagen.flow_from_dataframe(train_df,
                                                    directory=folder_path,
                                                    x_col='file_name',
                                                    y_col=['age','gender'],
                                                    target_size=(224,224),
                                                    class_mode='multi_output')

test_dataset = test_datagen.flow_from_dataframe(test_df,
                                                    directory=folder_path,
                                                    x_col='file_name',
                                                    y_col=['age','gender'],
                                                    target_size=(224,224),
                                                  class_mode='multi_output')

Found 7000 validated image filenames.
Found 2780 validated image filenames.


# Define model architecture

In [20]:
conv_base = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

conv_base.trainable = False

conv_base.summary()

In [23]:
flatten = Flatten(name='flatten_layer')(conv_base.output)

fc_l1 = Dense(units=512, activation='relu', name='fc_l1')(flatten)
fc_l2 = Dense(units=256, activation='relu', name='fc_l2')(fc_l1)
fc_l3 = Dense(units=128, activation='relu', name='fc_l3')(fc_l2)
fc_l4 = Dense(units=64, activation='relu', name='fc_l4')(fc_l3)
fc_l5 = Dense(units=32, activation='relu', name='fc_l5')(fc_l4)
output_age = Dense(units=1, activation='linear', name='output_layer_age')(fc_l5)
output_gender = Dense(units=1, activation='sigmoid', name='output_layer_gender')(fc_l5)

model = Model(inputs=conv_base.input, outputs=[output_age, output_gender])
model.name = 'vgg16_transfer_learning'
model.summary()

# Train model

In [24]:
model.compile(
    loss={'output_age': 'mse', 'output_gender': 'binary_crossentropy'},
    optimizer=RMSprop(learning_rate=1e-5),
    metrics={'output_age': 'mae', 'output_gender': 'accuracy'}
    )

In [25]:
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset).history
model.save('model_problem3.keras')

  self._warn_if_super_not_called()


TypeError: `output_signature` must contain objects that are subclass of `tf.TypeSpec` but found <class 'list'> which is not.

# Test Results

In [None]:
plt.plot(history['loss'], label='Training', color='red')
plt.plot(history['val_loss'], label='Validation', color='blue')
plt.legend()
plt.show()
plt.savefig('loss.png')