In [9]:
import pandas as pd
import json
import imageio
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

In [10]:
users = pd.read_csv("labeled_users.csv")
users.dropna(inplace=True)
users = users[users['race'] != 5]
users['race'] -= 1
raceDict = users.set_index('user_id').to_dict()['race']
users['race'].value_counts()

3.0    3114
0.0     363
1.0     234
2.0     134
Name: race, dtype: int64

In [11]:
with open("User demo profiles.json", encoding="utf8") as file:
    text = file.read()
    profiles = json.loads(text)

In [12]:
pics = []
labels = []
for profile in profiles:
    id = profile['id']
    if id in raceDict:
        path = profile['img_path']
        try:
            im = imageio.imread(path)
            pics.append(im)
            race = raceDict[id]
            labels.append(race)
        except FileNotFoundError:
            continue
pics = np.array(pics)
labels = np.array(labels)

In [13]:
pics.shape, labels.shape

((3274, 224, 224, 3), (3274,))

In [14]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(pics, labels, test_size=.2)

In [15]:
model = keras.Sequential()

model.add(keras.layers.Conv2D(filters=3, kernel_size=(3,3), activation='relu', input_shape=(224,224,3)))
model.add(keras.layers.Dropout(.25))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(20, activation='relu'))
model.add(keras.layers.Dense(4, activation='softmax'))
model.summary()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 222, 222, 3)       84        
                                                                 
 dropout_1 (Dropout)         (None, 222, 222, 3)       0         
                                                                 
 flatten_1 (Flatten)         (None, 147852)            0         
                                                                 
 dense_2 (Dense)             (None, 20)                2957060   
                                                                 
 dense_3 (Dense)             (None, 4)                 84        
                                                                 
Total params: 2,957,228
Trainable params: 2,957,228
Non-trainable params: 0
_________________________________________________________________


In [16]:
model.fit(x_train, y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1762421a2e0>

In [17]:
from sklearn.metrics import accuracy_score
y_pred = np.argmax(model.predict(x_test), axis=1)
accuracy_score(y_pred, y_test)

0.815267175572519

In [18]:
!pip install pydot
!pip install graphviz
!pip install pydotplus
tf.keras.utils.plot_model(model)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')
