# Imports

In [1]:
import os
import pandas as pd
import numpy as np

# Import Dataset

In [None]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

In [None]:
!pip install kaggle kagglehub

In [None]:
!kaggle datasets download -d jangedoo/utkface-new

In [None]:
import zipfile
zip_ref = zipfile.ZipFile('utkface-new.zip', 'r')
zip_ref.extractall('.')
zip_ref.close()

In [None]:
!rm -rf 'utkface-new.zip'

# Prepare dataset for model training and testing

The images are named as age, gender, ethnicity

In [2]:
df = pd.DataFrame(columns=['file_name', 'age', 'gender'])
df.head()

Unnamed: 0,file_name,age,gender


In [3]:
dataset_folder_path = 'data_problem3'
files = os.listdir(dataset_folder_path)

In [4]:
i = 0
for file in files:
    print(file)
    i += 1
    if i > 5:
        break

9_1_2_20161219204347420.jpg.chip.jpg
1_1_2_20161219154612988.jpg.chip.jpg
16_0_0_20170104003740977.jpg.chip.jpg
1_1_3_20161219230734016.jpg.chip.jpg
26_1_1_20170103181931657.jpg.chip.jpg
2_0_2_20161219141650121.jpg.chip.jpg


In [5]:
for file in files:
    
    if not(file.endswith('.jpg')):
        continue

    parts = file.split('_')
    age = parts[0]
    gender = parts[1]
    df = pd.concat([df, pd.DataFrame({'file_name': [file], 'age': [age], 'gender': [gender]})], ignore_index=True)

In [6]:
df['age'] = df['age'].astype(int)
df['gender'] = df['gender'].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9780 entries, 0 to 9779
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  9780 non-null   object
 1   age        9780 non-null   int64 
 2   gender     9780 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 229.3+ KB


In [7]:
df.head()

Unnamed: 0,file_name,age,gender
0,9_1_2_20161219204347420.jpg.chip.jpg,9,1
1,1_1_2_20161219154612988.jpg.chip.jpg,1,1
2,16_0_0_20170104003740977.jpg.chip.jpg,16,0
3,1_1_3_20161219230734016.jpg.chip.jpg,1,1
4,26_1_1_20170103181931657.jpg.chip.jpg,26,1


In [8]:
df.to_csv('data_problem3.csv')

In [9]:
df.head()

Unnamed: 0,file_name,age,gender
0,9_1_2_20161219204347420.jpg.chip.jpg,9,1
1,1_1_2_20161219154612988.jpg.chip.jpg,1,1
2,16_0_0_20170104003740977.jpg.chip.jpg,16,0
3,1_1_3_20161219230734016.jpg.chip.jpg,1,1
4,26_1_1_20170103181931657.jpg.chip.jpg,26,1


# Define model architecture

conv_base = VGG16(
    weights='imagenet',
    include_top=False,
    input_shape=(224, 224, 3)
)

conv_base.trainable = False

conv_base.summary()

In [None]:
flatten = Flatten(name='flatten_layer')(conv_base.output)

fc_l1 = Dense(units=512, activation='relu', name='fc_l1')(flatten)
fc_l2 = Dense(units=256, activation='relu', name='fc_l2')(fc_l1)
fc_l3 = Dense(units=128, activation='relu', name='fc_l3')(fc_l2)
fc_l4 = Dense(units=64, activation='relu', name='fc_l4')(fc_l3)
fc_l5 = Dense(units=32, activation='relu', name='fc_l5')(fc_l4)
output_age = Dense(units=1, activation='linear', name='output_layer_age')(fc_l5)
output_gender = Dense(units=1, activation='sigmoid', name='output_layer_gender')(fc_l5)

model = Model(inputs=conv_base.input, outputs=[output_age, output_gender])
model.name = 'vgg16_transfer_learning'
model.summary()

In [None]:
model.compile(
    loss={'output_age': 'mse', 'output_gender': 'binary_crossentropy'},
    optimizer=RMSprop(learning_rate=1e-5),
    metrics={'output_age': 'mae', 'output_gender': 'accuracy'}
    )

In [None]:
history = model.fit(train_data, epochs=10, validation_data=).history
model.save('model_problem3.keras')

# Train model

# Test Results

In [None]:
plt.plot(history['loss'], label='Training', color='red')
plt.plot(history['val_loss'], label='Validation', color='blue')
plt.legend()
plt.show()
plt.savefig('loss.png')