In [5]:
import cv2
import numpy as np
from pandas.io.parsers import read_csv
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.utils import shuffle
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout
from keras import models
import keras_tuner as kt

In [None]:
def load_data(validation_split):

    # Load data from csv file into data frame, drop all rows that have missing values
    df = read_csv("training.csv")
    print(df["Image"].count())
    df = df.dropna()
    print(df["Image"].count())

    # Convert the rows of the image column from pixel values separated by spaces to numpy arrays
    df["Image"] = df["Image"].apply(lambda img: np.fromstring(img, sep=" "))

    # Create numpy matrix from image column by stacking the rows vertically
    X_data = np.vstack(df["Image"].values)
    # Normalize pixel values to (0, 1) range
    X_data = X_data / 255
    # Convert to float32, which is the default for Keras
    X_data = X_data.astype("float32")
    # Reshape each row from one dimensional arrays to (height, width, num_channels) = (96, 96, 1)
    X_data = X_data.reshape(-1, 96, 96, 1)
    # Extract labels representing the coordinates of facial landmarks
    Y_data = df[df.columns[:-1]].values

    # Normalize coordinates to (0, 1) range
    Y_data = Y_data / 96
    Y_data = Y_data.astype("float32")

    # Shuffle data
    X_data, Y_data = shuffle(X_data, Y_data)

    # Split data into training set and validation set
    split_index = int(X_data.shape[0] * (1 - validation_split))
    X_train = X_data[:split_index]
    Y_train = Y_data[:split_index]
    X_val = X_data[split_index:]
    Y_val = Y_data[split_index:]

    return X_train, Y_train, X_val, Y_val