# Quickend

## 0) Initial Setup

We start with loading the required packages.

In [None]:
# !pip install tensorflow
import numpy as np
import pandas as pd
import json
import os
from tqdm import tqdm

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

from PIL import Image, ImageFilter,ImageEnhance

import tensorflow as tf
import tensorflow.keras as K
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import VGG19


import tensorflow_datasets as tfds
from tensorflow.keras.utils import image_dataset_from_directory

## 1) Set paths to data & models

In [None]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
# load data
base = '.'
LOADPATH = base + '/data/'
SAVEPATH = base + '/models/'

# Converting labels to np array
cat = [ 'skin_tone','age','gender']
lbs = [LabelBinarizer() for i in range(len(cat))]
length = width = 224

## 2) Load Models

In [None]:
# function to initialize a VGG19
def initializeModel():
    v19_model = VGG19(include_top=False, weights='imagenet', input_tensor=K.Input(shape=[length,width,3]))

    for layer in v19_model.layers:
        layer.trainable = False
    model = K.models.Sequential()
    model.add(v19_model)
    return model

vgg_model = initializeModel()

In [None]:
class PredictionModel():
    def __init__(self, vgg_model):
        self.vgg_model = vgg_model

        self.cat = ['skin_tone','age','gender']
        self.loss = ['categorical_crossentropy' for i in range(len(cat))]
        self.metrics = [['accuracy'] for i in range(len(cat))]
        self.models = []

        self.models.append(tf.keras.models.load_model('./models/model_skin_tone5.h5'))
        self.models.append(tf.keras.models.load_model('./models/model_age5.h5'))
        self.models.append(tf.keras.models.load_model('./models/model_gender5.h5'))
            
    def predict(self, newX, prepped=False):
        if not prepped:
          newX = self.vgg_model.predict(newX)
        predictions = [model.predict(newX) for model in self.models]
        return predictions

In [None]:
mymodel = PredictionModel(vgg_model)

## 3) predict test data

In [None]:
# load labels data
TESTPATH = base + '/data/test/'
df_test = pd.read_csv(base+'/test_labels.csv')

# Convert labels to np array
print("Converting test labels to np array")
testY = []
for i in range(len(cat)):
    lab = lbs[i].fit_transform(df_test[cat[i]])
    if lab.shape[1]==1:
        testY.append(np.hstack((1-lab,lab)))
    else:
        testY.append(lab)
        
# load and convert images into np array
print("Loading test images")
nt = df_test.shape[0]
all_imgs = [image.load_img(TESTPATH+df_test.iloc[i]['name'], target_size=(length,width)) for i in range(nt)]

print("Converting test images to np array")
testX = np.empty([nt, length, width, 3], dtype=float)
for i in range(nt):
    testX[i,:] = image.img_to_array(all_imgs[i])
testX = K.applications.vgg19.preprocess_input(testX)

Converting test labels to np array
Loading test images
Converting test images to np array


In [None]:
pred = mymodel.predict(testX)
nt = df_test.shape[0]

predY = [[np.argmax(pred[i][j,:]) for j in range(nt)] for i in range(len(cat))]
predLabels = [[lbs[i].classes_[j] for j in predY[i]] for i in range(len(cat))]



# 4) Score Model

Based on the above metric, we now calculate the score to evaluate your submission. This score will be displayed in your public leaderboard.

In [None]:
# calculate accuracy
acc = {}
for i in range(3):
    icat = cat[i]
    iacc = accuracy_score(df_test[cat[i]], predLabels[i])
    acc[icat] = iacc

# calculate disparity
def disparity_score(ytrue, ypred):
    cm = confusion_matrix(ytrue,ypred)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    all_acc = list(cm.diagonal())
    return max(all_acc) - min(all_acc)

disp = {}
for i in range(3):
    icat = cat[i]
    idisp = disparity_score(df_test[cat[i]], predLabels[i])
    disp[icat] = idisp
disp

results = {'accuracy': acc, 'disparity': disp}
results

{'accuracy': {'skin_tone': 0.5106666666666667,
  'age': 0.7256666666666667,
  'gender': 0.8943333333333333},
 'disparity': {'skin_tone': 0.6515679442508711,
  'age': 0.23909145088551076,
  'gender': 0.06804756425009595}}

In [None]:
def getScore(results):
    acc = results['accuracy']
    disp = results['disparity']
    ad = 2*acc['gender']*(1-disp['gender']) + 4*acc['age']*(1-disp['age']**2) + 10*acc['skin_tone']*(1-disp['skin_tone']**5)
    return ad
    
submission = {
    'score': getScore(results),
    'metrics': results
}
submission

{'score': 8.910652680775119,
 'metrics': {'accuracy': {'skin_tone': 0.5106666666666667,
   'age': 0.7256666666666667,
   'gender': 0.8943333333333333},
  'disparity': {'skin_tone': 0.6515679442508711,
   'age': 0.23909145088551076,
   'gender': 0.06804756425009595}}}

In [None]:
with open("/content/gdrive/MyDrive/Richard/Stressed/BiasBounty/rilu979_score.json", "w") as f:
    json.dump(submission, f, indent=4)