# Multitask Age / Gender / Ethnicity Model (On-Device Pipeline)
This notebook trains a multi-head CNN on the Kaggle Age-Gender-Ethnicity face dataset CSV (pixels as 48x48 grayscale).
Heads: (1) Age bucket classification, (2) Gender classification, (3) Ethnicity classification.
Output: single TFLite model suitable for on-device inference in Flutter.

## Ethical & Bias Advisory
Ethnicity classification can amplify societal biases; deployment MUST be *explicitly opt-in* and clearly disclosed. Provide UI disclaimers, never use for sensitive decisions, and allow instant disable. Consider *not* exposing ethnicity unless a compelling, user-beneficial feature exists (e.g., adaptive lighting).

## Dataset Expectations
CSV columns: `age`, `gender` (0=Male,1=Female), `ethnicity` (0..4), `pixels` (space-separated 2304 grayscale values).
Place the CSV at `assets/models/age_gender_ethnicity.csv` before running.

In [4]:
import os, numpy as np, pandas as pd, tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
CSV_PATH = '../assets/models/age_gender_ethnicity.csv'  # adjust if needed
assert os.path.exists(CSV_PATH), f'CSV not found at {CSV_PATH}'
df = pd.read_csv(CSV_PATH)
print(df.head())
print('Rows:', len(df))
# Basic stats
print(df[['age','gender','ethnicity']].describe())

ModuleNotFoundError: No module named 'numpy'

In [5]:
# Convert pixel strings to normalized tensors
IMG_H = IMG_W = 48
def parse_pixels(pstr):
    vals = np.fromstring(pstr, sep=' ', dtype=np.uint8)
    assert vals.size == IMG_H*IMG_W, 'Unexpected pixel length'
    img = vals.reshape(IMG_H, IMG_W).astype('float32') / 255.0
    # replicate grayscale to 3 channels for better transfer features
    return np.stack([img, img, img], axis=-1)
X = np.stack([parse_pixels(p) for p in df['pixels']], axis=0)
ages_raw = df['age'].values
# Age buckets (coarse)
age_bins = [0,13,19,30,50,120]
age_bucket = np.digitize(ages_raw, age_bins) - 1  # 0..4
gender = df['gender'].values.astype('int32')  # 0/1
ethnicity = df['ethnicity'].values.astype('int32')  # 0..4
print('Shapes:', X.shape, age_bucket.shape, gender.shape, ethnicity.shape)

NameError: name 'np' is not defined

In [None]:
# Train/val split
from sklearn.model_selection import train_test_split
X_train, X_val, age_tr, age_val, gen_tr, gen_val, eth_tr, eth_val = train_test_split(
    X, age_bucket, gender, ethnicity, test_size=0.15, random_state=42, stratify=age_bucket)
print('Train size:', X_train.shape, 'Val size:', X_val.shape)

In [6]:
# Data pipeline with simple augmentation
BATCH = 128
train_ds = tf.data.Dataset.from_tensor_slices((X_train, age_tr, gen_tr, eth_tr))
val_ds = tf.data.Dataset.from_tensor_slices((X_val, age_val, gen_val, eth_val))
def augment(img, a, g, e):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, 0.2)
    return img, a, g, e
train_ds = train_ds.shuffle(2048).map(augment).batch(BATCH).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.batch(BATCH).prefetch(tf.data.AUTOTUNE)

NameError: name 'tf' is not defined

In [7]:
# Model architecture: lightweight backbone + three heads
inputs = keras.Input(shape=(48,48,3))
x = layers.Conv2D(32,3,activation='relu')(inputs)
x = layers.Conv2D(32,3,activation='relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Conv2D(64,3,activation='relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Conv2D(128,3,activation='relu')(x)
x = layers.GlobalAveragePooling2D()(x)
base = layers.Dropout(0.3)(x)
age_head = layers.Dense(64,activation='relu')(base)
age_out = layers.Dense(5,activation='softmax', name='age_out')(age_head)
gender_head = layers.Dense(32,activation='relu')(base)
gender_out = layers.Dense(2,activation='softmax', name='gender_out')(gender_head)
eth_head = layers.Dense(64,activation='relu')(base)
eth_out = layers.Dense(5,activation='softmax', name='ethnicity_out')(eth_head)
model = keras.Model(inputs, [age_out, gender_out, eth_out])
model.summary()

NameError: name 'keras' is not defined

In [8]:
losses = {
  'age_out': 'sparse_categorical_crossentropy',
  'gender_out': 'sparse_categorical_crossentropy',
  'ethnicity_out': 'sparse_categorical_crossentropy'
}
metrics = {
  'age_out': 'accuracy',
  'gender_out': 'accuracy',
  'ethnicity_out': 'accuracy'
}
model.compile(optimizer=keras.optimizers.Adam(1e-3), loss=losses, metrics=metrics)
EPOCHS=25
history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS)

NameError: name 'model' is not defined

In [9]:
# Evaluate
eval_res = model.evaluate(val_ds)
print('Eval:', eval_res)

NameError: name 'model' is not defined

In [10]:
# TFLite conversion
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # quantization aware (dynamic)
tflite_model = converter.convert()
open('../assets/models/age_gender_ethnicity.tflite','wb').write(tflite_model)
print('Saved TFLite model size (KB):', len(tflite_model)/1024)

NameError: name 'tf' is not defined