In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
dataset_path = r'C:\Users\Dell\Desktop\MANI\ndp\UTKFace'


In [3]:
ethnicity_map = {
    '0': 'US',        # White replaced with US
    '1': 'African',   # Black replaced with African
    '2': 'Asian',
    '3': 'Indian',
    '4': 'Others'
}

In [4]:
def load_images_from_folder(dataset_path):
    images = []
    labels = []
    
    for filename in os.listdir(dataset_path):
        if filename.endswith(".jpg"):
            parts = filename.split('_')
            
            if len(parts) < 4:
                continue  # Skip files that don't follow the expected naming pattern
            
            ethnicity_label = parts[2]  # Extract ethnicity label from the filename (third part)
            
            if ethnicity_label in ethnicity_map:
                img_path = os.path.join(dataset_path, filename)
                img = cv2.imread(img_path)  # Read image (use grayscale if needed)
                img = cv2.resize(img, (48, 48))  # Resize to 48x48
                img = img.astype('float32') / 255.0  # Normalize the image to [0, 1]
                
                images.append(img)
                labels.append(ethnicity_map[ethnicity_label])  # Use the ethnicity name as label
    
    images = np.array(images)
    labels = np.array(labels)
    return images, labels


In [5]:
images, labels = load_images_from_folder(dataset_path)


In [6]:
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# One-hot encode the labels
labels_one_hot = to_categorical(labels_encoded, num_classes=len(ethnicity_map))


In [7]:
X_train, X_val, y_train, y_val = train_test_split(images, labels_one_hot, test_size=0.2, random_state=42)


In [8]:
def build_nationality_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 3)))  # 3 channels for color images
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(len(ethnicity_map), activation='softmax'))  # Output layer for nationality classification
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [9]:
nationality_model = build_nationality_model()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:

from tensorflow.keras.callbacks import EarlyStopping

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping and increased epochs
nationality_model.fit(
    X_train, 
    y_train, 
    epochs=50,  # Increased number of epochs
    batch_size=32, 
    validation_data=(X_val, y_val), 
    callbacks=[early_stopping]  # Added early stopping
)



Epoch 1/50
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 42ms/step - accuracy: 0.5007 - loss: 1.2818 - val_accuracy: 0.6826 - val_loss: 0.8829
Epoch 2/50
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 55ms/step - accuracy: 0.7199 - loss: 0.7977 - val_accuracy: 0.7435 - val_loss: 0.7384
Epoch 3/50
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 55ms/step - accuracy: 0.7633 - loss: 0.6848 - val_accuracy: 0.7593 - val_loss: 0.6971
Epoch 4/50
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 63ms/step - accuracy: 0.7884 - loss: 0.6153 - val_accuracy: 0.7669 - val_loss: 0.6830
Epoch 5/50
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 43ms/step - accuracy: 0.8079 - loss: 0.5618 - val_accuracy: 0.7775 - val_loss: 0.6455
Epoch 6/50
[1m593/593[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 41ms/step - accuracy: 0.8198 - loss: 0.5166 - val_accuracy: 0.7802 - val_loss: 0.6443
Epoch 7/50
[1m5

<keras.src.callbacks.history.History at 0x22b63532fd0>

In [11]:
loss, accuracy = nationality_model.evaluate(X_val, y_val)
print(f"Validation Accuracy: {accuracy*100:.2f}%")


[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.7764 - loss: 0.6577
Validation Accuracy: 78.02%


In [12]:
nationality_model.save('nationality_detection_model.h5')




In [13]:
def predict_nationality_from_image(model, image_path):
    img = cv2.imread(image_path)  # Read image
    if img is None:
        print(f"Error: Unable to load image at {image_path}")
        return None  # Return None or raise an exception if you prefer
    
    img = cv2.resize(img, (48, 48))  # Resize to 48x48
    img = img.astype('float32') / 255.0  # Normalize the image
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    
    predictions = model.predict(img)
    predicted_class = np.argmax(predictions)  # Get the predicted class index
    nationality = label_encoder.inverse_transform([predicted_class])[0]  # Map index back to label
    
    return nationality

In [15]:
test_image_path = r"C:\Users\Dell\Desktop\MANI\ndp\UTKFace\1_0_0_20170110212733875.jpg.chip.jpg"  # Replace with your image path
predicted_nationality = predict_nationality_from_image(nationality_model, test_image_path)

if predicted_nationality:
    print(f"The predicted nationality is: {predicted_nationality}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
The predicted nationality is: US
