In [20]:
#!pip install tensorflow
import pandas as pd
import json
import imageio
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

In [21]:
users = pd.read_csv("labeled_users.csv")
users.dropna(inplace=True)
users = users[users['race'] != 5]
users['race'] -= 1
raceDict = users.set_index('user_id').to_dict()['race']
users['race'].value_counts()

3.0    3114
0.0     363
1.0     234
2.0     134
Name: race, dtype: int64

In [25]:
with open("User demo profiles.json", encoding="utf8") as file:
    text = file.read()
    profiles = json.loads(text)

In [26]:
pics = []
labels = []
for profile in profiles:
    id = profile['id']
    if id in raceDict:
        path = profile['img_path']
        try:
            im = imageio.imread(path)
            pics.append(im)
            race = raceDict[id]
            labels.append(race)
        except FileNotFoundError:
            continue
pics = np.array(pics)
labels = np.array(labels)

In [27]:
pics.shape, labels.shape

((3274, 224, 224, 3), (3274,))

In [28]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(pics, labels, test_size=.2)

In [29]:
model = keras.Sequential()

model.add(keras.layers.Conv2D(filters=3, kernel_size=(3,3), activation='relu', input_shape=(224,224,3)))
model.add(keras.layers.Dropout(.25))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(20, activation='relu'))
model.add(keras.layers.Dense(4, activation='softmax'))
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 3)       84        
                                                                 
 dropout (Dropout)           (None, 222, 222, 3)       0         
                                                                 
 flatten (Flatten)           (None, 147852)            0         
                                                                 
 dense (Dense)               (None, 20)                2957060   
                                                                 
 dense_1 (Dense)             (None, 4)                 84        
                                                                 
Total params: 2,957,228
Trainable params: 2,957,228
Non-trainable params: 0
_________________________________________________________________


2021-12-04 19:02:41.138272: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [30]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f7f18ad24c0>

In [36]:
from sklearn.model_selection import KFold

acc_per_fold = []
loss_per_fold = []

# Merge inputs and targets
inputs = np.concatenate((x_train, x_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

# Define the K-fold Cross Validator
kfold = KFold(n_splits=5, shuffle=True)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(inputs, targets):
    
    # generate model architecture
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(filters=3, kernel_size=(3,3), activation='relu', input_shape=(224,224,3)))
    model.add(keras.layers.Dropout(.25))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(20, activation='relu'))
    model.add(keras.layers.Dense(4, activation='softmax'))
    model.summary()
    
    # compile the model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


  # Generate a print
    print('------------------------------------------------------------------------')
    print(f'Training for fold {fold_no} ...')

  # Fit data to model
    history = model.fit(inputs[train], targets[train],
              batch_size=32,
              epochs=10)

  # Generate generalization metrics
    scores = model.evaluate(inputs[test], targets[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])

  # Increase fold number
    fold_no = fold_no + 1




Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 222, 222, 3)       84        
                                                                 
 dropout_4 (Dropout)         (None, 222, 222, 3)       0         
                                                                 
 flatten_4 (Flatten)         (None, 147852)            0         
                                                                 
 dense_8 (Dense)             (None, 20)                2957060   
                                                                 
 dense_9 (Dense)             (None, 4)                 84        
                                                                 
Total params: 2,957,228
Trainable params: 2,957,228
Non-trainable params: 0
_________________________________________________________________
--------------------------------------------

Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Score for fold 4: loss of 3.176032781600952; accuracy of 76.1832058429718%
Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 222, 222, 3)       84        
                                                                 
 dropout_8 (Dropout)         (None, 222, 222, 3)       0         
                                                                 
 flatten_8 (Flatten)         (None, 147852)            0         
                                                                 
 dense_16 (Dense)            (None, 20)                2957060   
                                                                 
 dense_17 (Dense)            (None, 4)                 84        
                                                                 
Total params: 2,957,22

In [17]:
from sklearn.metrics import accuracy_score
y_pred = np.argmax(model.predict(x_test), axis=1)
accuracy_score(y_pred, y_test)

0.815267175572519

In [18]:
!pip install pydot
!pip install graphviz
!pip install pydotplus
tf.keras.utils.plot_model(model)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
