# Neural Network

In [1]:
!pip install pyyaml numpy scipy matplotlib ipython jupyter pandas sympy nose opencv-python opencv-contrib-python imutils

Collecting pyyaml
[?25l  Downloading https://files.pythonhosted.org/packages/e3/e8/b3212641ee2718d556df0f23f78de8303f068fe29cdaa7a91018849582fe/PyYAML-5.1.2.tar.gz (265kB)
[K     |████████████████████████████████| 266kB 4.9MB/s eta 0:00:01
Collecting scipy
[?25l  Downloading https://files.pythonhosted.org/packages/80/72/a26272b99220804038d8ac4aabe8383cfd969ec548695b0df258058ee919/scipy-1.3.2-cp36-cp36m-manylinux1_x86_64.whl (25.2MB)
[K     |████████████████████████████████| 25.2MB 31.8MB/s eta 0:00:01
Collecting pandas
[?25l  Downloading https://files.pythonhosted.org/packages/52/3f/f6a428599e0d4497e1595030965b5ba455fd8ade6e977e3c819973c4b41d/pandas-0.25.3-cp36-cp36m-manylinux1_x86_64.whl (10.4MB)
[K     |████████████████████████████████| 10.4MB 16.4MB/s eta 0:00:01    |████████████████████▎           | 6.6MB 16.4MB/s eta 0:00:01
[?25hCollecting sympy
[?25l  Downloading https://files.pythonhosted.org/packages/21/21/f4105795ca7f35c541d82c5b06be684dd2f5cb4f508fb487cd7aea4de776/sy

Building wheels for collected packages: pyyaml, imutils, mpmath
  Building wheel for pyyaml (setup.py) ... [?25ldone
[?25h  Created wheel for pyyaml: filename=PyYAML-5.1.2-cp36-cp36m-linux_x86_64.whl size=45393 sha256=aaf6725e2e693cdfa305c5742c54b522d3bb503be8370354071dda92d0568700
  Stored in directory: /root/.cache/pip/wheels/d9/45/dd/65f0b38450c47cf7e5312883deb97d065e030c5cca0a365030
  Building wheel for imutils (setup.py) ... [?25ldone
[?25h  Created wheel for imutils: filename=imutils-0.5.3-cp36-none-any.whl size=26734 sha256=d8a837b0849f7e111ee2aeb7c46ed429e7ebf5f9b57253211963b4bf889eb9fc
  Stored in directory: /root/.cache/pip/wheels/16/84/1f/bf88641293cda2c8be81a5c4b8ca973dd9125a6dc3767417fd
  Building wheel for mpmath (setup.py) ... [?25ldone
[?25h  Created wheel for mpmath: filename=mpmath-1.1.0-cp36-none-any.whl size=532006 sha256=4bf41778a0dd8d674b142ed6212ec02a1d5754a31cc9882dd0123eb6ce2b431a
  Stored in directory: /root/.cache/pip/wheels/63/9d/8e/37c3f6506ed3f152733

In [1]:
import pandas as pd

import tensorflow as tf
import tensorflow.keras as keras
import numpy as np

import yaml

import matplotlib.pyplot as plt

In [2]:
hparams = yaml.safe_load(open('../src/models/hparams.yaml'))

hparams

{'learning_rate': 0.01, 'batch_size': 16, 'num_hidden_layers': 4}

In [6]:
# todo: change this to binary format.

wiki_df = pd.read_csv('../data/processed/wiki_df.csv', sep=';')

wiki_df.head()

Unnamed: 0,full_path,gender,age,img_array
0,17/10000217_1981-05-05_2009.jpg,1.0,28,[255 255 255 ... 144 78 27]
1,12/100012_1948-07-03_2008.jpg,1.0,60,[92 98 93 ... 35 31 30]
2,16/10002116_1971-05-31_2012.jpg,0.0,41,[ 10 30 61 ... 231 237 255]
3,02/10002702_1960-11-09_2012.jpg,0.0,52,[178 122 97 ... 168 112 83]
4,41/10003541_1937-09-27_1971.jpg,1.0,34,[194 189 190 ... 101 103 104]


In [7]:
wiki_df = wiki_df.drop(['img_array'], axis=1)

In [31]:
import cv2
import math
import numpy as np
from tensorflow.keras.utils import Sequence

class WIKISequence(Sequence):
    """Base object for fitting to a sequence of data, such as a dataset.
    Every `Sequence` must implement the `__getitem__` and the `__len__` methods.
    If you want to modify your dataset between epochs you may implement
    `on_epoch_end`.
    The method `__getitem__` should return a complete batch.

    Example: https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence
    """

    def __init__(self, dataset_df, batch_size, base_path = '../data/raw/wiki_crop/'):
        self.base_path = base_path
        self.dataset_df = dataset_df
        self.batch_size = batch_size

    def load_img(self, file_path):
        """Load single image from disk and resize and convert to np array
        :return:
        """
        im = cv2.imread(self.base_path + file_path)
        im = cv2.resize(im, (224, 224), interpolation=cv2.INTER_LINEAR)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        return (np.array(im) / 255.0).astype(np.float32)

    def __getitem__(self, idx):
        """Gets batch at position `index`.
        Arguments:
            index: position of the batch in the Sequence.
        Returns:
            A batch
        """
        batch_df = self.dataset_df[idx * self.batch_size:(idx + 1) * self.batch_size]

        return np.array([self.load_img(full_path) for full_path in batch_df['full_path']]), np.array(batch_df['age'])


    def __len__(self):
        """Number of batch in the Sequence.
        Returns:
            The number of batches in the Sequence.
        """
        return math.ceil(len(self.dataset_df) / self.batch_size)

    def on_epoch_end(self):
        """Method called at the end of every epoch.
        """
        pass

wiki_generator = WIKISequence(wiki_df, 16)
    
print("len", len(wiki_generator))
print("i", wiki_generator[0])

len 1412
i (array([[[[1.        , 1.        , 1.        ],
         [1.        , 1.        , 1.        ],
         [1.        , 1.        , 1.        ],
         ...,
         [0.32941177, 0.34117648, 0.3764706 ],
         [0.34509805, 0.35686275, 0.39215687],
         [0.3647059 , 0.3764706 , 0.4117647 ]],

        [[1.        , 1.        , 1.        ],
         [1.        , 1.        , 1.        ],
         [1.        , 1.        , 1.        ],
         ...,
         [0.3254902 , 0.3372549 , 0.37254903],
         [0.34117648, 0.3529412 , 0.3882353 ],
         [0.36862746, 0.38039216, 0.41568628]],

        [[1.        , 1.        , 1.        ],
         [1.        , 1.        , 1.        ],
         [1.        , 1.        , 1.        ],
         ...,
         [0.32156864, 0.33333334, 0.36862746],
         [0.34117648, 0.3529412 , 0.3882353 ],
         [0.37254903, 0.38431373, 0.41960785]],

        ...,

        [[0.16862746, 0.30980393, 0.52156866],
         [0.1254902 , 0.27058825,

In [5]:
import cv2

# Noramalize image values on range <0.0;1.0>
# check: https://www.tensorflow.org/tutorials/images/cnn
#wiki_df['img_array'] = wiki_df['img_array'] / 255.0
#wiki_df['img_array'] = wiki_df['full_path'].apply(lambda x: np.array(cv2.resize(cv2.imread('../data/raw/wiki_crop/' + x), (224, 224), interpolation=cv2.INTER_LINEAR).reshape(1, -1)[0]))

# TODO CHECK:
# wiki_df['img_array'] = wiki_df['full_path'].apply(lambda x: cv2.resize(cv2.imread('../data/raw/wiki_crop/' + x), (224, 224), interpolation=cv2.INTER_LINEAR).reshape(1, -1)[0])

wiki_df = wiki_df.drop(['img_array'], axis=1)

## Experiment 1

* Ako prvé sa pokúsime vytvoriť NN podobnú VGG. Podľa [WEEK_7 lab](https://github.com/matus-pikuliak/neural_networks_at_fiit/blob/92b24eef8e6444c43a22e8fa51a349b3b1043a7c/week_7/week_7.ipynb), alebo iného tutoriálu
* Natrénujeme ju na už predspracovanom datasete
* Jej výsledok pou%zijeme ako štartovaciu čiaru
* Túto sieť budeme rozširovať o ďalšie vrstvy a parametre
* V projekte ponecháme sieť s najlepším skóre
* Dole pripájam referenčnú ukážku siete. [Zdroj](https://www.pyimagesearch.com/2019/10/28/3-ways-to-create-a-keras-model-with-tensorflow-2-0-sequential-functional-and-model-subclassing/?__s)

In [32]:
from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, Flatten
from tensorflow.keras.layers import Activation, BatchNormalization, Dropout


class MiniVGGNetModel(keras.Model):
    def __init__(self, classes, chanDim=-1):
        # call the parent constructor
        super(MiniVGGNetModel, self).__init__()

        # initialize the layers in the first (CONV => RELU) * 2 => POOL
        # layer set
        self.conv1A = Conv2D(32, (3, 3), padding="same")
        self.act1A = Activation("relu")
        self.bn1A = BatchNormalization(axis=chanDim)
        self.conv1B = Conv2D(32, (3, 3), padding="same")
        self.act1B = Activation("relu")
        self.bn1B = BatchNormalization(axis=chanDim)
        self.pool1 = MaxPooling2D(pool_size=(2, 2))

        # initialize the layers in the second (CONV => RELU) * 2 => POOL
        # layer set
        self.conv2A = Conv2D(32, (3, 3), padding="same")
        self.act2A = Activation("relu")
        self.bn2A = BatchNormalization(axis=chanDim)
        self.conv2B = Conv2D(32, (3, 3), padding="same")
        self.act2B = Activation("relu")
        self.bn2B = BatchNormalization(axis=chanDim)
        self.pool2 = MaxPooling2D(pool_size=(2, 2))

        # initialize the layers in our fully-connected layer set
        self.flatten = Flatten()
        self.dense3 = Dense(512)
        self.act3 = Activation("relu")
        self.bn3 = BatchNormalization()
        self.do3 = Dropout(0.5)

        # initialize the layers in the softmax classifier layer set
        self.dense4 = Dense(classes)
        self.softmax = Activation("softmax")

    def call(self, inputs):
        # build the first (CONV => RELU) * 2 => POOL layer set
        x = self.conv1A(inputs)
        x = self.act1A(x)
        x = self.bn1A(x)
        x = self.conv1B(x)
        x = self.act1B(x)
        x = self.bn1B(x)
        x = self.pool1(x)

        # build the second (CONV => RELU) * 2 => POOL layer set
        x = self.conv2A(x)
        x = self.act2A(x)
        x = self.bn2A(x)
        x = self.conv2B(x)
        x = self.act2B(x)
        x = self.bn2B(x)
        x = self.pool2(x)

        # build our FC layer set
        x = self.flatten(x)
        x = self.dense3(x)
        x = self.act3(x)
        x = self.bn3(x)
        x = self.do3(x)

        # build the softmax classifier
        x = self.dense4(x)
        x = self.softmax(x)

        # return the constructed model
        return x

In [34]:
#Generate target classes

classes = 101 #0 to 100
target = wiki_df['age'].values           ### train/valid Y
target_classes = keras.utils.to_categorical(target, classes)

file_paths = wiki_df['full_path'].values ### train/valid X

N_SAMPLES = len(target)

assert(len(target) == len(file_paths))

len(target_classes)

22578

In [40]:
# Load single image from disk to memmory
# resize and convert to np array
def load_img(x):
    im = cv2.imread('../data/raw/wiki_crop/' + x)
    im = cv2.resize(im, (224, 224), interpolation=cv2.INTER_LINEAR)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    #im = im.reshape(1, -1)[0]
    return (np.array(im) / 255.0).astype(np.float32)


In [35]:
model = MiniVGGNetModel(
    classes = classes)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'])

callbacks = [
    #keras.callbacks.TensorBoard(
    #    log_dir=os.path.join("logs", timestamp()),
    #    histogram_freq=1,
    #    profile_batch=0)
]

# callbacks = []  # If you do not want to log results into TensorBoard

scores = []
epochs = 60
batch_size = 64

In [12]:
print("samples", N_SAMPLES)

train = 3000
validation = 1000

def load_image_data(files):
    data = []
    for file in files:
        data.append(load_img(file))
    return np.array(data)

def load_data(range=(0,0)):
    x = file_paths[range[0]:range[1]]
    x = load_image_data(x)

    # https://stackoverflow.com/questions/49083984/valueerror-can-not-squeeze-dim1-expected-a-dimension-of-1-got-3-for-sparse
    # sparse_categorical_crossentropy
    y = target[range[0]:range[1]] # target_classes
    
    return (x, y)
    
train_images, train_labels = load_data((0, train))
test_images, test_labels = load_data((train, train+validation))

samples 22578


In [10]:
# from: https://stackoverflow.com/a/55666861
def change_to_right(wrong_labels):
    right_labels=[]
    for x in wrong_labels:
        for i in range(0,len(wrong_labels[0])):
            if x[i]==1:
                right_labels.append(i)
    return right_labels

#train_labels = tf.convert_to_tensor(np.array(change_to_right(train_labels)))
#test_labels = tf.convert_to_tensor(np.array(change_to_right(test_labels)))

In [13]:
### train

score = model.fit(
    x=train_images,
    y=train_labels,
    batch_size = batch_size,
    validation_data = (test_images, test_labels),
    callbacks = callbacks,
    epochs = epochs)

scores.append(score)

model.summary()  # Writes number of parameters for each layer at the end of the training

Train on 3000 samples, validate on 1000 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
 128/3000 [>.............................] - ETA: 3:45 - loss: 0.0014 - accuracy: 1.0000

KeyboardInterrupt: 

In [38]:
wiki_generator = WIKISequence(wiki_df[0:1000], 32)
wiki_generator2 = WIKISequence(wiki_df[1000:1500], 32)

model.fit_generator(
    wiki_generator,
     epochs=1,
    validation_data=wiki_generator2
)



<tensorflow.python.keras.callbacks.History at 0x7f4bfc7781d0>

In [39]:
model.summary()

Model: "mini_vgg_net_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              multiple                  896       
_________________________________________________________________
activation (Activation)      multiple                  0         
_________________________________________________________________
batch_normalization (BatchNo multiple                  128       
_________________________________________________________________
conv2d_1 (Conv2D)            multiple                  9248      
_________________________________________________________________
activation_1 (Activation)    multiple                  0         
_________________________________________________________________
batch_normalization_1 (Batch multiple                  128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple           

In [48]:
x = np.array([ load_img(wiki_df['full_path'][0]) ])
y = model.predict(x)

np.argmax(y)

6

In [14]:
#Save model

tf.keras.models.save_model(
    model,
    filepath = "../models/mini-vgg-1.tf",
    overwrite=True,
    include_optimizer=True,
    save_format="tf"
)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ../models/mini-vgg-1.tf/assets


# Experiment 2
Cely VGG model.

Zdroj: https://sefiks.com/2018/08/06/deep-face-recognition-with-keras/

In [6]:
model = keras.models.Sequential()
model.add(keras.layers.ZeroPadding2D((1,1),input_shape=(224,224, 3)))
model.add(keras.layers.Convolution2D(64, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(64, (3, 3), activation='relu'))
model.add(keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(128, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(128, (3, 3), activation='relu'))
model.add(keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(256, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(256, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(256, (3, 3), activation='relu'))
model.add(keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(keras.layers.ZeroPadding2D((1,1)))
model.add(keras.layers.Convolution2D(512, (3, 3), activation='relu'))
model.add(keras.layers.MaxPooling2D((2,2), strides=(2,2)))
 
model.add(keras.layers.Convolution2D(4096, (7, 7), activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Convolution2D(4096, (1, 1), activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Convolution2D(2622, (1, 1)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Activation('softmax'))

In [7]:
model.load_weights('../data/vgg_face_weights.h5')

In [8]:
vgg_face_descriptor = keras.Model(inputs=model.layers[0].input, outputs=model.layers[-1].output)

Convert image as array using opencv

In [9]:
classes = 101 #0 to 100
target = wiki_df['age'].values
target_classes = keras.utils.to_categorical(target, classes)

features = []
 
for i in range(0, wiki_df.shape[0]):
    features.append(wiki_df['img_array'].values[i])
 
features = np.array(features)
features = features.reshape(features.shape[0], 224, 224, 3)

In [12]:
!pip install scikit-learn

Collecting scikit-learn
[?25l  Downloading https://files.pythonhosted.org/packages/a0/c5/d2238762d780dde84a20b8c761f563fe882b88c5a5fb03c056547c442a19/scikit_learn-0.21.3-cp36-cp36m-manylinux1_x86_64.whl (6.7MB)
[K     |████████████████████████████████| 6.7MB 4.7MB/s eta 0:00:01
Collecting joblib>=0.11 (from scikit-learn)
[?25l  Downloading https://files.pythonhosted.org/packages/8f/42/155696f85f344c066e17af287359c9786b436b1bf86029bb3411283274f3/joblib-0.14.0-py2.py3-none-any.whl (294kB)
[K     |████████████████████████████████| 296kB 28.7MB/s eta 0:00:01
[?25hInstalling collected packages: joblib, scikit-learn
Successfully installed joblib-0.14.0 scikit-learn-0.21.3
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [13]:
from sklearn.model_selection import train_test_split

train_x, test_x, train_y, test_y = train_test_split(features, target_classes, test_size=0.30)

In [14]:
for layer in model.layers[:-7]:
    layer.trainable = False

In [15]:
base_model_output = keras.models.Sequential()
base_model_output = keras.layers.Convolution2D(101, (1, 1), name='predictions')(model.layers[-4].output)
base_model_output = keras.layers.Flatten()(base_model_output)
base_model_output = keras.layers.Activation('softmax')(base_model_output)

In [16]:
age_model = keras.Model(inputs=model.input, outputs=base_model_output)

In [17]:
age_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])

In [None]:
scores = []
epochs = 250; batch_size = 50
 
for i in range(epochs):
    print("epoch ",i)
 
    ix_train = np.random.choice(train_x.shape[0], size=batch_size)
 
    score = age_model.fit(train_x[ix_train], train_y[ix_train], epochs=1, validation_data=(test_x, test_y))
 
    scores.append(score)

epoch  0
