In [34]:
import h5py
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import balanced_accuracy_score
from PIL import Image
import pickle
from tensorflow.keras.models import Sequential, Model, load_model, model_from_json
from tensorflow.keras.layers import Dense, Dropout, Input, Activation, Conv2D, Conv1D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import RMSprop, SGD, Adagrad, Adadelta, Adam
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.applications import ResNet50, ResNet101, ResNet152
from tensorflow.keras import metrics
from tensorflow.keras import losses


# from collections import Counter
# counter_object = Counter(labels_list)
# keys = counter_object.keys()
# print(keys, counter_object.values())
# len(labels_list)

WOMEN = ['Ewelina', 'Kasia']
SEXIST = ['Ewelina', 'Kasia', 'przemek', 'Mikolaj']
x = []
y = []

def preprocess_dataset(path_to_pickle, n_classes=0, commands=True, women=True):
    global x, y
    with open(path_to_pickle, 'rb') as f:
        data = pickle.load(f)

    samples = [t[0] for t in data]
    commands = [t[1] for t in data]
    names = [t[2] for t in data]

    labels = commands if commands else names

    if women:
        labels = names
        selected_labels = SEXIST
    elif n_classes > 0:
        selected_labels = np.random.choice(np.unique(labels), n_classes)
    else:
        selected_labels = labels

    x = []
    y = []
    for sample, label in zip(samples, labels):
        if label in selected_labels:
            x.append(sample.T)
            if label in WOMEN and women is True:
                y.append('Woman')
            elif women:
                y.append('Man')
            else:
                y.append(label)

    # x = np.array(x, dtype=object)

    # samples_array = np.zeros(shape=len(x), dtype=object)
    # labels_list = []
    #
    # for sample in range(len(samples)):
    #     samples_array[sample] = samples[str(sample)][()]
    #     labels_list.append(labels[str(sample)][()].decode('utf-8'))

    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(y)

    x_image_size = int(np.amin([sample.shape[1] for sample in x]) + np.amax([sample.shape[1] for sample in x]) / 2)

    resized_dataset = []
    for index, sample in enumerate(x):
        resized_dataset.append(np.asarray(Image.fromarray(sample).resize((int(x_image_size), int(32)))))

    reshaped_dataset = np.array(resized_dataset)

    # reshaped_dataset = np.zeros((resized_dataset.shape[0], resized_dataset[0].shape[0], resized_dataset[0].shape[1]))
    # for index, sample in enumerate(resized_dataset):
    #     reshaped_dataset[index] = sample

    reshaped_dataset = np.reshape(reshaped_dataset,
                                  (reshaped_dataset.shape[0], reshaped_dataset.shape[1], reshaped_dataset.shape[2], 1))

    reshaped_dataset_additional_channels = np.repeat(reshaped_dataset, 3, axis=3)

    x_train, x_test, y_train, y_test = train_test_split(reshaped_dataset_additional_channels, labels_encoded,
                                                        test_size=0.2, shuffle=True)
    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, shuffle=True)

    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_val = x_val.astype('float32')

    y_train = keras.utils.to_categorical(y_train, len(label_encoder.classes_))
    y_test = keras.utils.to_categorical(y_test, len(label_encoder.classes_))
    y_val = keras.utils.to_categorical(y_val, len(label_encoder.classes_))

    return x_train, x_test, x_val, y_train, y_test, y_val, len(label_encoder.classes_)


def train_model(x_train, x_test, x_val, y_train, y_test, y_val, n_classes, path, epochs=50, save=True):
    model = Sequential()
    pretrained_model_for_demo = tf.keras.applications.ResNet50(include_top=False,
                                                                input_shape=x_train.shape[1:],
                                                                pooling='avg', classes=n_classes,
                                                                weights='imagenet')

    for each_layer in pretrained_model_for_demo.layers:
        each_layer.trainable = False

    model.add(pretrained_model_for_demo)
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(n_classes, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(validation_split=0.2)

    model.fit(datagen.flow(x_train, y_train, batch_size=16, subset='training', shuffle=True),
              validation_data=datagen.flow(x_val, y_val, batch_size=16, subset='validation', shuffle=True),
              epochs=epochs)

    image_pred = model.predict(x_test)

    y_categorized = []
    y_tested = []
    for row in range(len(image_pred)):
        y_categorized.append(np.argmax(image_pred[row]))
        y_tested.append(np.argmax(y_test[row]))

    y_categorized = np.reshape(y_categorized, newshape=(len(y_categorized), 1))
    y_tested = np.reshape(y_tested, newshape=(len(y_categorized), 1))

    print(f'The balanced accuracy score of the model is: {balanced_accuracy_score(y_tested, y_categorized)}')

    if save:
        model.save(path)

In [32]:
x_train, x_test, x_val, y_train, y_test, y_val, n_classes = preprocess_dataset(
    path_to_pickle='../data/commands_aug.pickle', n_classes=0, commands=True, women=True)

In [35]:
train_model(x_train, x_test, x_val, y_train, y_test, y_val, 2,
            path='..\models\\resnet152\\peoples_aug_pickle', save=True, epochs=30)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
The balanced accuracy score of the model is: 0.8823076923076922




INFO:tensorflow:Assets written to: ..\models\resnet152\peoples_aug_pickle\assets


INFO:tensorflow:Assets written to: ..\models\resnet152\peoples_aug_pickle\assets
