In [2]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import euclidean_distances
from tqdm import tqdm

# Load predefined skin tones dataset
df_skin_tones = pd.read_csv("skin_shades_india.csv")

# Convert HEX to RGB
def hex_to_rgb(hex_code):
    hex_code = hex_code.lstrip('#')  # Remove '#' if present
    return [int(hex_code[i:i+2], 16) for i in (0, 2, 4)]

df_skin_tones['RGB'] = df_skin_tones['HEX Code'].apply(hex_to_rgb)


In [3]:
def extract_skin_region(image):
    """Apply skin detection using HSV color filtering."""
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # Define skin color range in HSV
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)

    # Apply mask to detect skin region
    mask = cv2.inRange(hsv, lower_skin, upper_skin)
    
    # Extract the skin region
    skin = cv2.bitwise_and(image, image, mask=mask)
    return skin

def get_average_skin_color(image):
    """Extract the skin region and compute the average RGB color."""
    skin = extract_skin_region(image)
    
    # Convert to RGB (from BGR)
    skin_rgb = cv2.cvtColor(skin, cv2.COLOR_BGR2RGB)
    
    # Reshape the image to a list of pixels
    pixels = skin_rgb.reshape((-1, 3))
    
    # Remove black pixels (background)
    pixels = pixels[np.all(pixels != [0, 0, 0], axis=1)]
    
    if len(pixels) == 0:
        return None  # No skin detected
    
    # Compute the mean RGB color
    avg_color = np.mean(pixels, axis=0).astype(int)
    return avg_color.tolist()


In [4]:
dataset_path = "./train"  # Update with your dataset path

X = []  # Features (RGB values)
y = []  # Labels (Skin tone class)
hex_codes = []  # Corresponding HEX codes

# Iterate through all images in the dataset
for label in os.listdir(dataset_path):  # Assuming folders are named by skin tone
    label_path = os.path.join(dataset_path, label)
    
    if not os.path.isdir(label_path):
        continue  # Skip if it's not a folder
    
    for img_name in tqdm(os.listdir(label_path)):
        img_path = os.path.join(label_path, img_name)
        
        # Read image
        img = cv2.imread(img_path)
        if img is None:
            continue  # Skip unreadable images
        
        # Get average skin color
        avg_color = get_average_skin_color(img)
        if avg_color is None:
            continue  # Skip images where no skin is detected
        
        # Find the closest predefined skin tone
        distances = euclidean_distances([avg_color], np.array(df_skin_tones['RGB'].tolist()))
        closest_index = np.argmin(distances)
        
        # Get corresponding skin tone and HEX code
        skin_tone_class = df_skin_tones.iloc[closest_index]['Skin Shade Description']
        hex_code = df_skin_tones.iloc[closest_index]['HEX Code']
        
        # Append to dataset
        X.append(avg_color)
        y.append(skin_tone_class)
        hex_codes.append(hex_code)

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)
hex_codes = np.array(hex_codes)


100%|██████████| 500/500 [00:17<00:00, 28.47it/s]
100%|██████████| 500/500 [00:18<00:00, 27.41it/s]
100%|██████████| 500/500 [00:16<00:00, 31.14it/s]


In [5]:
# Encode labels as numbers
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train Random Forest model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")


Model Accuracy: 90.33%


In [6]:
def predict_skin_tone(image_path):
    img = cv2.imread(image_path)
    avg_color = get_average_skin_color(img)
    
    if avg_color is None:
        return "No skin detected", None
    
    # Predict skin tone class
    predicted_label = clf.predict([avg_color])[0]
    predicted_skin_tone = label_encoder.inverse_transform([predicted_label])[0]
    
    # Get closest HEX code
    distances = euclidean_distances([avg_color], np.array(df_skin_tones['RGB'].tolist()))
    closest_index = np.argmin(distances)
    predicted_hex_code = df_skin_tones.iloc[closest_index]['HEX Code']
    
    return predicted_skin_tone, predicted_hex_code

# Test on a new image
image_path = "./train/Brown/10_0_1_20170110223455893.jpg.chip.jpg"
predicted_tone, predicted_hex = predict_skin_tone(image_path)
print(f"Predicted Skin Tone: {predicted_tone}")
print(f"Predicted HEX Code: {predicted_hex}")


Predicted Skin Tone: Medium Skin
Predicted HEX Code: #D19A6A


In [7]:
import joblib

# Save the trained model
joblib.dump(clf, "skin_tone_model.pkl")

# Save the label encoder as well
joblib.dump(label_encoder, "label_encoder.pkl")


['label_encoder.pkl']