08227986

In [4]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mapimg
import pandas as pd
import cv2
import os
import dlib
import pickle
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from keras.preprocessing import image
from sklearn import decomposition
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn import metrics
from skimage.color import rgb2gray
from skimage.feature import hog
from scipy.spatial import procrustes

In [5]:
def shape_to_np(shape, dtype="int"):
    coords = np.zeros((shape.num_parts, 2), dtype=dtype)
    for i in range(0, shape.num_parts):
        coords[i] = (shape.part(i).x, shape.part(i).y)
    return coords

In [6]:
def rect_to_dim(rect):
    w = rect.right() - rect.left()
    h = rect.top() - rect.bottom()
    return (w, h)

In [7]:
def create_feature(img):
    face_detect = dlib.get_frontal_face_detector()
    shape_predict = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = gray.astype('uint8')
    rects = face_detect(gray, 1)
    num_faces = len(rects)
    
    if num_faces == 0:
        return None

    face_areas = np.zeros((1, num_faces))
    face_shapes = np.zeros((136, num_faces), dtype=np.int64)
    
    for (i, rect) in enumerate(rects):
        temp_shape = shape_predict(gray, rect)
        temp_shape = shape_to_np(temp_shape)
        (w, h) = rect_to_dim(rect)
        face_shapes[:, i] = np.reshape(temp_shape, [136])
        face_areas[0, i] = w * h
        dlibout = np.reshape(np.transpose(face_shapes[:, np.argmax(face_areas)]), [68, 2])
    return dlibout

In [20]:
def create_feature_matrix(file_path, df):
    features = []
    labels = []
    image_paths = [os.path.join(file_path, l) for l in os.listdir(file_path)]
    for file_name in df["file_names"]:
        img_path = os.path.join(file_path, file_name)
        img = image.img_to_array(image.load_img(img_path, target_size=None, interpolation='bicubic'))
        feature = create_feature(img)
        if feature is not None:
            features.append(feature)
            temp = file_name.split(".")[0]
            labels.append(df.loc[int(temp), "face_shape"])
    features = np.array(features)
    return features, labels

In [9]:
def calc_label(data_matrix, center_matrix):
    no_data = data_matrix.shape[0]
    K = center_matrix.shape[0]
    label = np.zeros(no_data)
    for i in range(no_data):
        datum = data_matrix[i, :]
        dist2centers = np.zeros(K)
        for k in range(K):
            center=center_matrix[k, :]
            dist=procrustes(datum, center)
            dist2centers[k]=dist
        label[i] = np.argmin(dist2centers)
    return label.astype(np.int)

In [10]:
def cost_func(data_matrix, center_matrix, labels):
    K = center_matrix.shape[0]
    cost = 0.
    for k in range(K):
        data_index = np.where(labels==k)
        cluster_data=data_matrix[data_index,:].reshape((-1, 2))
        cluster_center=center_matrix[k,:]
        for i in range(cluster_data.shape[0]):
            t1, t2, disp = procrustes(cluster_data, cluster_center)
            cost += disp
    cost = cost/K
    return cost

In [11]:
def update_centers(data_matrix, label, K):
    dim = data_matrix.shape[1]
    center_matrix = np.zeros([K, dim])
    for k in range(K):
        index = np.where(label == k)
        cluster_data = data_matrix[index]
        new_center = cluster_data.mean(axis=0)
        center_matrix[k,:] = new_center
    return center_matrix

In [23]:
def newKMeans(data_matrix, K, max_iter):
    no_data = data_matrix.shape[0]
    index = np.arange(no_data)
    np.random.shuffle(index)
    center_matrix = data_matrix[index[:,K]]
    label = calc_label(data_matrix, center_matrix)
    cost = cost_func(data_matrix, center_matrix, label)
    Train = True
    iter = 0
    while Train:
        iter += 1
        center_matrix = update_centers(data_matrix, label, K)
        label = calc_label(data_matrix, center_matrix)
        cost = cost_func(data_matrix, center_matrix, label)
        print(f"iter {iter}: cost = {cost}")
        if iter >= max_iter:
            Train = False
    return label, center_matrix

In [13]:
file_path = "D:\Admin\Documents\Year_4\AMLS\Assessment\dataset_AMLS_20-21\cartoon_set\img"
labels_path = "D:\Admin\Documents\Year_4\AMLS\Assessment\dataset_AMLS_20-21\cartoon_set\labels.csv"

In [14]:
df = pd.read_csv(labels_path)

df = df.drop(columns="Unnamed: 0")
temp = df["file_name"]
df.insert(loc=0, column="file_names", value=temp)
df = df.drop(columns="file_name")
df = df.drop(df.index[1000:])

In [15]:
train, validation, test = \
              np.split(df.sample(frac=1), 
                       [int(.6*len(df)), int(.8*len(df))])

In [16]:
train

Unnamed: 0,file_names,eye_color,face_shape
686,686.png,1,4
177,177.png,3,0
420,420.png,4,3
887,887.png,4,4
535,535.png,4,3
...,...,...,...
360,360.png,1,1
346,346.png,1,2
450,450.png,1,3
648,648.png,4,4


In [17]:
validation

Unnamed: 0,file_names,eye_color,face_shape
555,555.png,2,2
169,169.png,0,2
953,953.png,0,4
933,933.png,0,2
736,736.png,4,3
...,...,...,...
685,685.png,1,1
575,575.png,4,4
181,181.png,2,3
670,670.png,4,3


In [18]:
test

Unnamed: 0,file_names,eye_color,face_shape
366,366.png,0,1
28,28.png,0,0
403,403.png,4,0
308,308.png,4,2
148,148.png,2,2
...,...,...,...
39,39.png,4,4
630,630.png,2,2
8,8.png,1,1
700,700.png,2,1


In [25]:
temp = train.filter(axis='columns', items= ["file_names", "face_shape"])

filter0 = train.loc[:,"face_shape"] == 0
filter0 = train[filter0]
img0 = filter0["file_names"].iloc[0]
img_path = os.path.join(file_path, img0)
img0 = image.img_to_array(image.load_img(img_path, target_size=None, interpolation='bicubic'))
feature0 = create_feature(img0)

filter1 = train.loc[:,"face_shape"] == 1
filter1 = train[filter1]
img1 = filter1["file_names"].iloc[0]
img_path = os.path.join(file_path, img1)
img1 = image.img_to_array(image.load_img(img_path, target_size=None, interpolation='bicubic'))
feature1 = create_feature(img1)

filter2 = train.loc[:,"face_shape"] == 2
filter2 = train[filter2]
img2 = filter2["file_names"].iloc[0]
img_path = os.path.join(file_path, img2)
img2 = image.img_to_array(image.load_img(img_path, target_size=None, interpolation='bicubic'))
feature2 = create_feature(img2)

filter3 = train.loc[:,"face_shape"] == 3
filter3 = train[filter3]
img3 = filter3["file_names"].iloc[0]
img_path = os.path.join(file_path, img3)
img3 = image.img_to_array(image.load_img(img_path, target_size=None, interpolation='bicubic'))
feature3 = create_feature(img3)

filter4 = train.loc[:,"face_shape"] == 4
filter4 = train[filter4]
img4 = filter4["file_names"].iloc[0]
img_path = os.path.join(file_path, img4)
img4 = image.img_to_array(image.load_img(img_path, target_size=None, interpolation='bicubic'))
feature4 = create_feature(img4)

In [21]:
features, labels = create_feature_matrix(file_path, train)

In [24]:
label, center_matrix = newKMeans(features, 5, 20)

IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed