## Analyzing Fairness of the Model
**NOTE:** Feel free to change the code template. 

In [1]:
import cv2
import os
import numpy as np
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications.inception_v3 import InceptionV3

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
!tar -xvf "/content/gdrive/MyDrive/UTKface_inthewild/part1.tar.gz"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
part1/14_0_2_20170103201051263.jpg
part1/30_1_0_20170104170158777.jpg
part1/62_0_0_20170104212928133.jpg
part1/27_1_0_20170103230354912.jpg
part1/65_1_0_20170110123252108.jpg
part1/60_1_0_20170110151441645.jpg
part1/44_1_3_20170109141426511.jpg
part1/62_0_3_20170104220837477.jpg
part1/25_1_0_20170111182452788.jpg
part1/10_1_0_20170109204244904.jpg
part1/27_1_0_20170105183939496.jpg
part1/1_0_2_20161219212557190.jpg
part1/9_0_0_20170110221659430.jpg
part1/67_1_0_20170110140730634.jpg
part1/62_1_0_20170110160643751.jpg
part1/1_1_3_20161220220534186.jpg
part1/39_0_2_20170104204400674.jpg
part1/27_1_3_20170104223400455.jpg
part1/37_0_4_20170104000748917.jpg
part1/2_0_2_20161219162357438.jpg
part1/2_0_2_20161219222714623.jpg
part1/52_0_2_20170104184356222.jpg
part1/60_1_0_20170110122626700.jpg
part1/32_1_0_20170103181503793.jpg
part1/41_0_0_20170109012220690.jpg
part1/1_0_4_20161221201411850.jpg
part1/60_0_0_20170111203255875.

In [4]:
path= '/content/gdrive/MyDrive/part1'

In [5]:
# Function to check if a string can be converted to an integer
def is_integer(string):
    try:
        int(string)
        return True
    except ValueError:
        return False

# Define function to load UTKFace dataset
def load_dataset(path):
    data = []
    labels = []
    for image in os.listdir(path):
        img_path = os.path.join(path, image)
        if img_path.endswith('.jpg'):
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (100, 100)) 
            label = (image.split('/')[-1].split('_')[2])
            # Check if the extracted label is an integer
            if is_integer(label):
                label = int(label)
                data.append(img)
                labels.append(label)
    return np.array(data), np.array(labels)

# Define function to split dataset into train and test sets
def split_train_test(X_train, y_train):
    test_size = 0.2
    return train_test_split(X_train, y_train, test_size=test_size, random_state=42)

# Define function to load InceptionV3 model
def load_model():
    return InceptionV3(weights='imagenet', include_top=False, input_shape=(100, 100, 3))

In [6]:
# Load UTKFace dataset
X_train, y_train = load_dataset('part1')

# Preprocess image data and labels
X_train = X_train.astype('float32') / 255.0
y_train = tf.keras.utils.to_categorical(y_train, num_classes=5)

X_train, X_val, y_train, y_val = split_train_test(X_train, y_train)

# Load InceptionV3 model with custom classifier
base_model = load_model()
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(5, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Define hyperparameters
loss_func = 'categorical_crossentropy'
optim = 'SGD'
batch_size = 20
epochs = 15

# Compile and train the model
model.compile(optimizer=optim, loss=loss_func, metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f34ee55dc60>

In [7]:
# Load the validation set and preprocess it
X_val, y_val = load_dataset('part1')
X_val = X_val.astype('float32') / 255.0
y_val = tf.keras.utils.to_categorical(y_val, num_classes=5)

# Make predictions using the trained model
y_pred = model.predict(X_val)

# Convert the one-hot encoded vectors to integer labels
y_val_labels = np.argmax(y_val, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

# Initialize an empty dictionary to store the results
results_dict = {}

# Compute the number of positive/negative predictions for each race and store in the dictionary
races = ['White', 'Black', 'Asian', 'Indian', 'Others']
for i, race in enumerate(races):
    race_indices = np.where(y_val_labels == i)[0]
    race_predictions = y_pred_labels[race_indices]
    num_positives = np.sum(race_predictions == i)
    num_negatives = len(race_indices) - num_positives
    results_dict[race] = {'positive': num_positives, 'negative': num_negatives}

print(results_dict)

{'White': {'positive': 4423, 'negative': 973}, 'Black': {'positive': 381, 'negative': 41}, 'Asian': {'positive': 1553, 'negative': 150}, 'Indian': {'positive': 1196, 'negative': 297}, 'Others': {'positive': 953, 'negative': 168}}


In [8]:
# Evaluate the 'statistical parity' of the model by evaluating the conditional probability of positive predictions across each race.
total_positives = np.sum([results_dict[race]['positive'] for race in races])
total_negatives = np.sum([results_dict[race]['negative'] for race in races])
for race in races:
    positive_prob = results_dict[race]['positive'] / (results_dict[race]['positive'] + results_dict[race]['negative'])
    print(f"Race: {race}, Probability of positive prediction: {positive_prob}")

Race: White, Probability of positive prediction: 0.8196812453669384
Race: Black, Probability of positive prediction: 0.9028436018957346
Race: Asian, Probability of positive prediction: 0.9119201409277745
Race: Indian, Probability of positive prediction: 0.8010716677829873
Race: Others, Probability of positive prediction: 0.8501338090990187


Since the probability of positive prediction doesn't differ much among the races, so we can conclude that statistical parity is present.

In [9]:
# Evaluate the 'equalized odds' of the model by evaluating the conditional probability of positive predictions across each race.
for race in races:
    race_indices = np.where(y_val_labels == races.index(race))[0]
    race_predictions = y_pred_labels[race_indices]
    num_true_positives = np.sum((race_predictions == races.index(race)) & (y_val_labels[race_indices] == races.index(race)))
    num_false_positives = np.sum((race_predictions == races.index(race)) & (y_val_labels[race_indices] != races.index(race)))
    true_positive_prob = num_true_positives / (num_true_positives + num_false_positives)
    print(f"Race: {race}, Probability of true positive prediction: {true_positive_prob}")

Race: White, Probability of true positive prediction: 1.0
Race: Black, Probability of true positive prediction: 1.0
Race: Asian, Probability of true positive prediction: 1.0
Race: Indian, Probability of true positive prediction: 1.0
Race: Others, Probability of true positive prediction: 1.0


So, equalized odds value of 1 means that the model achieves perfect equality in the rates of true positive (TPR) and false positive (FPR) predictions across different races.

In [10]:
##TODO: Evaluate the 'calibration' of the model by evaluating the conditional probability of positive predictions across each race.
for race in races:
    race_indices = np.where(y_val_labels == i)[0]
    race_predictions = y_pred_labels[race_indices]
    num_positives = np.sum(race_predictions == i)
    positive_prob = num_positives / len(race_indices)
    print(f"Race: {race}, Probability of positive prediction: {positive_prob}")

Race: White, Probability of positive prediction: 0.8501338090990187
Race: Black, Probability of positive prediction: 0.8501338090990187
Race: Asian, Probability of positive prediction: 0.8501338090990187
Race: Indian, Probability of positive prediction: 0.8501338090990187
Race: Others, Probability of positive prediction: 0.8501338090990187


A calibration value of 0.85: suggests that the model's predicted probabilities are somewhat underconfident or slightly overconfident compared to the actual probabilities of the predicted outcomes.