In [10]:
import tensorflow as tf
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import keras 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder,MinMaxScaler, LabelEncoder
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import os
#for penalizing wrong weights heavily
from keras.regularizers import l2

In [11]:
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [12]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split


In [13]:
data_dir = 'crop_part1'

In [14]:
def load_utkface_dataset(data_dir):
    images = []
    age_labels = []
    gender_labels = []
    
    for img_name in os.listdir(data_dir):
        # Parse filename to extract age and gender
        age, gender, _ = img_name.split('_')[:3]
        
        img_path = os.path.join(data_dir, img_name)
        img = cv2.imread(img_path)
        
        if img is not None:
            # Resize image to (128, 128)
            img = cv2.resize(img, (128, 128))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img_color = cv2.merge([img, img, img])
            img_color = img_color / 255.0  # Normalize pixel values
            
            images.append(img_color)
            age_labels.append(int(age))  # Age as a continuous variable
            gender_labels.append(int(gender))  # Gender as 0 or 1

    
    
    images = np.array(images)
    age = np.array(age_labels)
    gender = np.array(gender_labels)
    #images = images.reshape(-1, 128, 128, 1)
    return images ,age , gender


In [15]:
images, ages, genders = load_utkface_dataset(data_dir)

print(f'Number of images: {len(images)}')
print(f'Number of ages: {len(ages)}')
print(f'Number of genders: {len(genders)}')

assert len(images) == len(ages) == len(genders), "The lengths of images, ages, and genders must be the same."


Number of images: 9780
Number of ages: 9780
Number of genders: 9780


In [16]:
#images, ages, genders = load_utkface_dataset(data_dir)

In [17]:
X_train, X_test, y_age_train, y_age_test, y_gender_train, y_gender_test = train_test_split(
    images, ages, genders, test_size=0.2, random_state=42
)

In [18]:
IMAGE_SIZE = (128, 128)  # Resize images to this size
BATCH_SIZE = 16
EPOCHS = 50

In [19]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(128, 128, 3))

for layer in base_model.layers:
    layer.trainable = False

x = Flatten()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)

# Age output (regression)
age_output = Dense(1, name='age_output')(x)

# Gender output (classification)
gender_output = Dense(1, activation='sigmoid', name='gender_output')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=[age_output, gender_output])    

In [20]:
model.compile(optimizer=Adam(),
              loss={'age_output': 'mean_squared_error', 'gender_output': 'binary_crossentropy'},
              metrics={'age_output': 'accuracy', 'gender_output': 'accuracy'})

In [21]:
# Use EarlyStopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

# Train the model
history = model.fit(X_train, 
          {'age_output': y_age_train, 'gender_output': y_gender_train}, 
          validation_split=0.2,
          epochs=EPOCHS, 
          batch_size=BATCH_SIZE,
          callbacks=[early_stopping])


Epoch 1/50
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 783ms/step - age_output_accuracy: 0.1004 - age_output_loss: 408.5904 - gender_output_accuracy: 0.5100 - gender_output_loss: 1.4328 - loss: 410.0234 - val_age_output_accuracy: 0.1022 - val_age_output_loss: 174.6275 - val_gender_output_accuracy: 0.6741 - val_gender_output_loss: 0.6043 - val_loss: 175.4202
Epoch 2/50
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 797ms/step - age_output_accuracy: 0.0810 - age_output_loss: 190.3491 - gender_output_accuracy: 0.5781 - gender_output_loss: 0.8412 - loss: 191.1910 - val_age_output_accuracy: 0.0562 - val_age_output_loss: 179.2603 - val_gender_output_accuracy: 0.6166 - val_gender_output_loss: 0.6548 - val_loss: 180.0381
Epoch 3/50
[1m392/392[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 876ms/step - age_output_accuracy: 0.0763 - age_output_loss: 156.2971 - gender_output_accuracy: 0.5799 - gender_output_loss: 0.7059 - loss: 157.0039 - val

KeyboardInterrupt: 

In [None]:
history_frame = pd.DataFrame(history.history)

# Plot loss and val_loss
history_frame.loc[:, ['loss', 'val_loss']].plot()

# Plot accuracy and val_accuracy
history_frame.loc[:, ['accuracy', 'val_accuracy']].plot()
plt.show()

In [None]:
model.save('')