In [1]:
import os
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical

In [2]:
dataset_path = r"C:\Users\bhard\ROHAN [ML & DS]\DL Practice Works\Glora_ face ethnicity\UTKFace"
img_size = (48, 48)
ethnicities = 5

# Initialize lists
images = []
ethnicity_labels = []

# Loop through images
for file in os.listdir(dataset_path):
    try:
        file_name = file.split(".")[0]
        parts = file_name.split("_")

        ethnicity = int(parts[2])

        if ethnicity < 0 or ethnicity >= ethnicities:
            print(f"Skipping {file}: Invalid ethnicity value {ethnicity}")
            continue
        img_path = os.path.join(dataset_path, file)
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, img_size)
        img = img / 255.0

        # Append to lists
        images.append(img)
        ethnicity_labels.append(ethnicity)

    except Exception as e:
        print(f"Skipped {file} due to error: {e}")


Skipping 39_1_20170116174525125.jpg.chip.jpg: Invalid ethnicity value 20170116174525125
Skipping 61_1_20170109142408075.jpg.chip.jpg: Invalid ethnicity value 20170109142408075
Skipping 61_1_20170109150557335.jpg.chip.jpg: Invalid ethnicity value 20170109150557335


In [4]:
images = np.array(images).reshape(-1, 48, 48, 3)  
ethnicity_labels = to_categorical(ethnicity_labels, num_classes=5)

In [5]:
print(f"images shape: {len(images)}")
print(f"ethnicity_labels shape: {len(ethnicity_labels)}")

images = np.array(images[:23705])
ethnicity_labels = np.array(ethnicity_labels[:23705])


images shape: 23705
ethnicity_labels shape: 23705


In [7]:
ethnicity_labels = np.argmax(ethnicity_labels, axis=1)
unique_classes, counts = np.unique(ethnicity_labels, return_counts=True)
for cls, count in zip(unique_classes, counts):
    print(f"Ethnicity {cls}: {count} samples")


Ethnicity 0: 10078 samples
Ethnicity 1: 4526 samples
Ethnicity 2: 3434 samples
Ethnicity 3: 3975 samples
Ethnicity 4: 1692 samples


In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    images, ethnicity_labels, test_size=0.2, random_state=42
)


In [9]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

ethnicity_classes = np.array([0, 1, 2, 3, 4])

ethnicity_counts = np.array([10078, 4526, 3434, 3975, 1692])

class_weights = compute_class_weight(class_weight="balanced", classes=ethnicity_classes, y=np.repeat(ethnicity_classes, ethnicity_counts))

class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

print(class_weight_dict)


{0: 0.47043064100019844, 1: 1.0475033141847105, 2: 1.3806057076295866, 3: 1.1927044025157232, 4: 2.8020094562647753}


In [20]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

# Input
input_layer = Input(shape=(48, 48, 3))

# CNN Layers
x = Conv2D(32, (3, 3), activation='relu', kernel_regularizer='l2')(input_layer)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)

x = Conv2D(64, (3, 3), activation='relu', kernel_regularizer='l2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)

x = Conv2D(128, (3, 3), activation='relu', kernel_regularizer='l2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2))(x)

x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)

# Output for ethnicity
ethnicity_output = Dense(5, activation='softmax', name='ethnicity_output')(x)

# Model
model = Model(inputs=input_layer, outputs=ethnicity_output)

# Compile
model.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()




Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 48, 48, 3)]       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 46, 46, 32)        896       
                                                                 
 batch_normalization_3 (Batc  (None, 46, 46, 32)       128       
 hNormalization)                                                 
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 23, 23, 32)       0         
 2D)                                                             
                                                                 
 conv2d_4 (Conv2D)           (None, 21, 21, 64)        18496     
                                                                 
 batch_normalization_4 (Batc  (None, 21, 21, 64)       256 

In [17]:
y_train

array([0, 2, 0, ..., 0, 1, 1], dtype=int64)

In [21]:
from tensorflow.keras.utils import to_categorical

y_train_onehot = to_categorical(y_train, num_classes=5)
y_test_onehot = to_categorical(y_test, num_classes=5)

sample_weights_ethnicity = np.array([class_weight_dict[label] for label in y_train])

fitting = model.fit(
    X_train,
    y_train_onehot, 
    validation_data=(X_test, y_test_onehot),  
    sample_weight=sample_weights_ethnicity,
    epochs=20,
    batch_size=32
)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [22]:
model.save("ethnicity_model.h5")
