In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import tensorflow as tf


In [2]:
data_dir = r"Path_of_data_image"


In [3]:
def load_all_data(data_dir):
    images = []
    labels = []
    for race_label in race_labels:
        race_dir = os.path.join(data_dir, race_label)
        for image_file in os.listdir(race_dir):
            if image_file.endswith('.jpg'):
                image_path = os.path.join(race_dir, image_file)
                image = load_img(image_path, target_size=(128, 128))
                image = img_to_array(image)
                images.append(image)
                labels.append(race_label)
    return np.array(images), np.array(labels)

In [4]:
# Load the training dataset
train_images, train_labels = load_all_data(train_data_dir)

# Load the testing dataset
test_images, test_labels = load_all_data(test_data_dir)

# Normalize the images
train_images = train_images / 255.0
test_images = test_images / 255.0

Loaded 12151 training images
Loaded 3041 testing images


In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

datagen.fit(X_train)


In [6]:
# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(np.unique(labels)), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
class_weights = {i: len(labels) / (len(np.unique(labels)) * np.bincount(labels)[i]) for i in np.unique(labels)}

history = model.fit(datagen.flow(X_train, y_train, batch_size=32), epochs=45, validation_data=(X_test, y_test), class_weight=class_weights)


Epoch 1/45


  self._warn_if_super_not_called()


[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m169s[0m 435ms/step - accuracy: 0.1999 - loss: 1.6417 - val_accuracy: 0.2818 - val_loss: 1.6093
Epoch 2/45
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 365ms/step - accuracy: 0.1905 - loss: 1.6108 - val_accuracy: 0.2121 - val_loss: 1.6096
Epoch 3/45
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 381ms/step - accuracy: 0.2017 - loss: 1.6155 - val_accuracy: 0.2690 - val_loss: 1.6081
Epoch 4/45
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 479ms/step - accuracy: 0.2533 - loss: 1.6053 - val_accuracy: 0.1891 - val_loss: 1.6087
Epoch 5/45
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 324ms/step - accuracy: 0.2374 - loss: 1.6037 - val_accuracy: 0.3058 - val_loss: 1.5901
Epoch 6/45
[1m380/380[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m189s[0m 494ms/step - accuracy: 0.3005 - loss: 1.5910 - val_accuracy: 0.3676 - val_loss: 1.5201
Epoch 7/45
[1m

In [10]:
#  Evaluate the model
loss, accuracy = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {accuracy}')

# Save the entire model
model.save(r'Path_of_file\ethnicity_detection_model.keras')



[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 69ms/step - accuracy: 0.4795 - loss: 1.2971
Test accuracy: 0.5251561999320984


In [13]:
# Predict on new data
def predict_image(image_path):
    image = load_img(image_path, target_size=(128, 128))
    image = img_to_array(image) / 255.0
    image = np.expand_dims(image, axis=0)
    prediction = model.predict(image)
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    return predicted_label[0]

# Example usage
image_path = r"Path_of_test_image\1.jpeg"
predicted_ethnicity = predict_image(image_path)
print(f'Predicted Ethnicity: {predicted_ethnicity}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
Predicted Ethnicity: White
