In [None]:
%load_ext tensorboard
%tensorboard --logdir logs

In [None]:
import datetime, os
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras import preprocessing
import pandas as pd
from PIL import Image
from keras.utils import load_img
from keras.utils import img_to_array
# I don't think we need this function
# from keras.utils import array_to_img

In [None]:
training_csv = pd.read_csv('fairface_label_train.csv')
training_csv = training_csv.iloc[1:200]
training_csv = training_csv.reset_index()
training_images = []
for index, row in training_csv.iterrows():
    image = Image.open(row['file']).convert("L") # load_img(row['file'], color_mode="grayscale", target_size=(32,32))
    data = np.asarray(image)
    training_images.append(data)
#     print(f"Loaded image: {type(image)}")
#     print(f"image #{index} file name: {row['file']}")
#     print(f"image #{index} format: {image.format}")
#     print(f"image #{index} size: {image.size}")
    image_array = np.asarray(image) # img_to_array(image) (using the keras methods gives float32 type instead of uint8, not sure which we need)
#     print(f"Numpy array: {type(image_array)}")
#     print(f"image array #{index} type: {image_array.dtype}")
#     print(f"image array #{index} shape: {image_array.shape}")
testing_csv = pd.read_csv('fairface_label_val.csv')
testing_csv = testing_csv.iloc[1:50]
testing_csv = testing_csv.reset_index()
testing_images = []
for index, row in testing_csv.iterrows():
    image = Image.open(row['file']).convert("L")
    data = np.asarray(image)
    testing_images.append(data)

In [None]:
# Min-Max Scale the training_images based on their min and max values (per image, not just scaled from 0 to 255)
#                                                at least according to the google chat  ^^^
temp = []
for i in training_images:
    mymaxes = []
    mymins = []
    for j in i:
        mymaxes.append(max(j))
        mymins.append(min(j))
    mymax = max(mymaxes)
    mymin = min(mymins)
    tempcol = []
    for j in i:
        temprow = []
        for k in j:
            temprow.append((k - mymin) / (mymax - mymin))
        tempcol.append(temprow)
    temp.append(tempcol)
training_images = temp
training_images = np.array(training_images)
# Scale the testing_images the same way
temp = []
for i in testing_images:
    mymaxes = []
    mymins = []
    for j in i:
        mymaxes.append(max(j))
        mymins.append(min(j))
    mymax = max(mymaxes)
    mymin = min(mymins)
    tempcol = []
    for j in i:
        temprow = []
        for k in j:
            temprow.append((k - mymin) / (mymax - mymin))
        tempcol.append(temprow)
    temp.append(tempcol)
testing_images = temp
testing_images = np.array(testing_images)

In [None]:
#also set up other vectors for training/validation
#Gender
genders = training_csv["gender"]
gens = []
for i in range(len(genders)):
    genset = np.zeros(2)
    if "Female" in genders[i]:
        genset[0] = 1
    else:
        genset[1] = 1
    gens.append(genset)
genders = gens.copy()

gendersVal = testing_csv["gender"]
gens = []
for i in range(len(gendersVal)):
    genset = np.zeros(2)
    if "Female" in gendersVal[i]:
        genset[0] = 1
    else:
        genset[1] = 1
    gens.append(genset)
gendersVal = gens.copy()

#Race
racs = []
races = training_csv["race"]
for i in races:
    if i not in racs:
        racs.append(i)
tempRaces = []
for i in range(len(races)):
    racset = np.zeros(len(racs))
    racset[racs.index(races[i])] = 1
    tempRaces.append(racset)
races = tempRaces.copy()

racesVal = testing_csv["race"]
tempRaces = []
for i in range(len(racesVal)):
    racset = np.zeros(len(racs))
    racset[racs.index(racesVal[i])] = 1
    tempRaces.append(racset)
racesVal = tempRaces.copy()
numRaces = len(racs)

genders = np.array(genders)
gendersVal = np.array(gendersVal)
races = np.array(races)
racesVal = np.array(racesVal)

## Task 1: Fully Connected Neural Network
##### 1. Build a feed forward neural network with the following specs (for 2 separate tasks)
###### a. Hidden layer 1: 1024 neurons with tanh activation function in each 
###### b. Hidden layer 2: 512 neurons with sigmoid activation function in each 
###### c. Hidden layer 3: 100 neurons with reLu
###### d. Output layer: n (depending on the task) representing n classes, using softmax activation

In [None]:
# fmodel = Sequential()
# fmodel.add(keras.Input(shape=(32,32,1)))
# fmodel.add(layers.Dense(1024, activation='tanh'))
# fmodel.add(layers.Dense(512, activation='sigmoid'))
# fmodel.add(layers.Dense(100, activation='relu'))
# # find n for the task, add this layer
# fmodel.add(layers.Dense(n, activation='softmax'))

### Task 1, Classification Task A: Gender

In [None]:
# classify gender
famodel = Sequential()
famodel.add(keras.Input(shape=(32,32,1)))
famodel.add(layers.Flatten())
famodel.add(layers.Dense(1024, activation='tanh'))
famodel.add(layers.Dense(512, activation='sigmoid'))
famodel.add(layers.Dense(100, activation='relu'))
n = 2
famodel.add(layers.Dense(n, activation='softmax'))
# min max already taken care of, use mini-batch gradient descent to optimize categorical cross entropy, 
# record and graph each epoch's epoch-accuracy and epoch-loss for both the training and validation set
# also graph the confusion matrix
opt = keras.optimizers.SGD(learning_rate=0.01)
famodel.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S_Task1Genders"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)

famodel.fit(training_images, genders, epochs=50, batch_size=10, validation_data=(testing_images, gendersVal), callbacks=[tensorboard_callback])

### Task 1, Classification Task B: Race

In [None]:
# classify race
fbmodel = Sequential()
fbmodel.add(keras.Input(shape=(32,32,1)))
fbmodel.add(layers.Flatten())
fbmodel.add(layers.Dense(1024, activation='tanh'))
fbmodel.add(layers.Dense(512, activation='sigmoid'))
fbmodel.add(layers.Dense(100, activation='relu'))
# find n for the task, add this layer
n = numRaces
fbmodel.add(layers.Dense(n, activation='softmax'))
# min max already taken care of, use mini-batch gradient descent to optimize categorical cross entropy, 
# record and graph each epoch's epoch-accuracy and epoch-loss for both the training and validation set
# also graph the confusion matrix
opt = keras.optimizers.SGD(learning_rate=0.01)
fbmodel.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S_Task1Races"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)

fbmodel.fit(training_images, races, epochs=100, batch_size=10, validation_data=(testing_images, racesVal), callbacks=[tensorboard_callback])
# if you have the setup and training in different blocks in the notebook, 
# you can accidentally train it multiple times, compounding how well it gets trained, 
# so keep the training in the same block as the setup

## Task 2: Convolutional Network
#### 2. Build a convolutional network (also for 2 tasks)
###### a. conv layer with 40 feature detectors, kernel size 5x5, reLu activation, stride=1, no padding
###### b. max pooling layer size 2x2
###### c. fully connected layer, 100 neurons reLu activation
###### d. output layer: n neurons (depending on task), using softmax activation

In [None]:
# cmodel = Sequential()
# cmodel.add(layers.Conv2D(filters=40, kernel_size=(5,5), activation='relu', input_shape=(32,32,1), strides=1, padding='valid'))
# cmodel.add(layers.MaxPooling2D(pool_size=2, strides=2, padding='valid'))
# cmodel.add(layers.Dense(100, activation='relu'))
# #find n for the task
# cmodel.add(layers.Dense(n, activation='softmax'))

### Task 2, Classification Task A: Gender

In [None]:
# classify gender
camodel = Sequential()
camodel.add(layers.Conv2D(filters=40, kernel_size=(5,5), activation='relu', input_shape=(32,32,1), strides=1, padding='valid'))
camodel.add(layers.MaxPooling2D(pool_size=2, strides=2, padding='valid'))
camodel.add(layers.Flatten())
camodel.add(layers.Dense(100, activation='relu'))
n = 2
camodel.add(layers.Dense(n, activation='softmax'))

opt = keras.optimizers.SGD(learning_rate=0.01)
camodel.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S_Task2Genders"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)

camodel.fit(training_images, genders, epochs=50, batch_size=10, validation_data=(testing_images, gendersVal), callbacks=[tensorboard_callback])

### Task 2, Classification Task B: Race

In [None]:
# classify race
cbmodel = Sequential()
cbmodel.add(layers.Conv2D(filters=40, kernel_size=(5,5), activation='relu', input_shape=(32,32,1), strides=1, padding='valid'))
cbmodel.add(layers.MaxPooling2D(pool_size=2, strides=2, padding='valid'))
cbmodel.add(layers.Flatten())
cbmodel.add(layers.Dense(100, activation='relu'))
n = numRaces
cbmodel.add(layers.Dense(n, activation='softmax'))

opt = keras.optimizers.SGD(learning_rate=0.01)
cbmodel.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S_Task2Races"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)

cbmodel.fit(training_images, races, epochs=50, batch_size=10, validation_data=(testing_images, racesVal), callbacks=[tensorboard_callback])

## Task 3: Our own Convolutional Network
### Can we build our own network architecture to do better than either cmodel?

### Classification Task A: Gender

In [None]:
# classify gender
task3amodel = Sequential()
task3amodel.add(layers.Conv2D(filters=50, kernel_size=(7,7), activation='relu', input_shape=(32,32,1), strides=1, padding='same'))
task3amodel.add(layers.MaxPooling2D(pool_size=2, strides=2, padding='valid'))
task3amodel.add(layers.Conv2D(filters=100, kernel_size=(3,3), activation='relu', strides=1, padding='valid'))
task3amodel.add(layers.Flatten())
task3amodel.add(layers.Dense(256, activation='relu'))
#task3amodel.add(layers.Dense(128, activation='relu'))
task3amodel.add(layers.Dense(64, activation='relu'))

n = 2
task3amodel.add(layers.Dense(n, activation='softmax'))

opt = keras.optimizers.SGD(learning_rate=0.01)
task3amodel.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S_Task3Genders"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)

task3amodel.fit(training_images, genders, epochs=50, batch_size=10, validation_data=(testing_images, gendersVal), callbacks=[tensorboard_callback])

### Classification Task B: Race

In [None]:
# classify race
cbmodel = Sequential()
cbmodel.add(layers.Conv2D(filters=50, kernel_size=(7,7), activation='relu', input_shape=(32,32,1), strides=1, padding='same'))
cbmodel.add(layers.MaxPooling2D(pool_size=2, strides=2, padding='valid'))
cbmodel.add(layers.Conv2D(filters=50, kernel_size=(3,3), activation='relu', strides=1, padding='valid'))
cbmodel.add(layers.Flatten())
cbmodel.add(layers.Dense(120, activation='relu'))

n = numRaces
cbmodel.add(layers.Dense(n, activation='softmax'))

opt = keras.optimizers.SGD(learning_rate=0.01)
cbmodel.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S_Task3Races"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)

cbmodel.fit(training_images, races, epochs=50, batch_size=10, validation_data=(testing_images, racesVal), callbacks=[tensorboard_callback])

## Task 4: Our own Convolutional Network to do both classifications simultaneously
### We can't use Sequential() for this one

In [69]:
# classify race
task4inputs = keras.Input(shape=(32,32,1))
task4mainbranch = layers.Conv2D(filters=50, kernel_size=(7,7), activation='relu', strides=1, padding='same')(task4inputs)
task4mainbranch = layers.MaxPooling2D(pool_size=2, strides=2, padding='valid')(task4mainbranch)
task4mainbranch = layers.Conv2D(filters=100, kernel_size=(3,3), activation='relu', strides=1, padding='valid')(task4mainbranch)
task4mainbranch = layers.Flatten()(task4mainbranch)
task4gender = layers.Dense(100, activation='relu')(task4mainbranch)
task4gender = layers.Dense(2, activation='softmax', name="gender")(task4gender)
task4race = layers.Dense(100, activation='relu')(task4mainbranch)
task4race = layers.Dense(numRaces, activation='softmax', name="race")(task4race)
task4model = keras.Model(inputs=task4inputs, outputs=[task4gender,task4race], name="task4_model")
print(task4model.summary())

opt = keras.optimizers.SGD(learning_rate=0.01)
task4model.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S_Task4"))
tensorboard_callback = keras.callbacks.TensorBoard(logdir, histogram_freq=1)

task4model.fit(training_images, {"gender":genders,"race":races}, epochs=50, batch_size=10, validation_data=(testing_images, {"gender":gendersVal,"race":racesVal}), callbacks=[tensorboard_callback])

Model: "task4_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_13 (InputLayer)          [(None, 32, 32, 1)]  0           []                               
                                                                                                  
 conv2d_108 (Conv2D)            (None, 32, 32, 50)   2500        ['input_13[0][0]']               
                                                                                                  
 max_pooling2d_53 (MaxPooling2D  (None, 16, 16, 50)  0           ['conv2d_108[0][0]']             
 )                                                                                                
                                                                                                  
 conv2d_109 (Conv2D)            (None, 14, 14, 100)  45100       ['max_pooling2d_53[0][0

KeyboardInterrupt: 

## Task 5: Variational Auto-Encoder
### Use these specs (with some flexibility
###### a. at least 2 convolutional layers and 2 deconvolution layers
###### b. latent dimension should be at least 5
###### c. loss should be either MSE or binary cross entropy

In [None]:
training_csv = pd.read_csv('fairface_label_train.csv')
# training_csv = training_csv.iloc[1:25]
# training_csv = training_csv.reset_index()
training_images = []
x = 0
for index, row in training_csv.iterrows():
    oldx = x
    x = float(index)/86745 * 100
    if int(x) > int(oldx) or x == 0:
        print(float(index)/86745 * 100)
    image = Image.open(row['file']).convert("L") # load_img(row['file'], color_mode="grayscale", target_size=(32,32))
    data = np.asarray(image)
    training_images.append(data)
#     print(f"Loaded image: {type(image)}")
#     print(f"image #{index} file name: {row['file']}")
#     print(f"image #{index} format: {image.format}")
#     print(f"image #{index} size: {image.size}")
    image_array = np.asarray(image) # img_to_array(image) (using the keras methods gives float32 type instead of uint8, not sure which we need)
#     print(f"Numpy array: {type(image_array)}")
#     print(f"image array #{index} type: {image_array.dtype}")
#     print(f"image array #{index} shape: {image_array.shape}")
testing_csv = pd.read_csv('fairface_label_val.csv')
# testing_csv = testing_csv.iloc[1:25]
# testing_csv = testing_csv.reset_index()
testing_images = []
x = 0
for index, row in testing_csv.iterrows():
    oldx = x
    x = float(index)/10955 * 100
    if int(x) > int(oldx) or x == 0:
        print(float(index)/10955 * 100)
    image = Image.open(row['file']).convert("L")
    data = np.asarray(image)
    testing_images.append(data)