In [116]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications import MobileNetV2
import tensorflow as tf

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

In [117]:
'''Define Model'''
model = MobileNetV2()
model = tf.keras.models.Model(inputs = model.inputs,
                              outputs = model.layers[-2].output)

'''Path to Folder'''
path = 'mst_dataset'
face_dir = sorted(os.listdir('mst_dataset'))

'''Feature Extraction'''
def extract_features(file,model):
    img = load_img(file, target_size = (224,224))
    img = np.expand_dims(np.array(img),0)
    x = img/255
    features = model.predict(x, use_multiprocessing = True)
    return features

'''Perform PCA'''
def perform_PCA(n_comp = 50, features = None):
    pca = PCA(n_components = n_comp, random_state = 22)
    comp = pca.fit_transform(features)
    return comp

'''Z Normalization and Removing Outliers'''
def ZNorm(comp, threshold = 3):
    z_score = (comp - np.mean(comp, axis = 0))/np.std(comp, axis = 0)
    z_score = np.abs(z_score)
    outliers = (z_score > threshold).any(axis = 1)
    comp_cleaned = comp[~outliers]
    
    return comp_cleaned

'''K Means to Retrieve Center'''
def center_c(comp_cleaned, random_state = 22):
    kmeans = KMeans(n_clusters=1, random_state=random_state)
    kmeans.fit(comp_cleaned)

    cluster_centers = kmeans.cluster_centers_
    return cluster_centers

In [None]:
def get_cluster(path = path, dir_list = face_dir, model = model):
    center_point = []
    error_file = {}
    for dir_ in dir_list:
        dir_path = os.path.join(path, dir_)
        images = os.listdir(dir_path)
        error_file[dir_] = []
        
        print(f'Processing {dir_}')
        
        features = []
        for image in images:
            try:
                image_path = os.path.join(dir_path, image)
                feat = extract_features(image_path, model)
                features.append(feat)
            except Exception as e:
                error_file[dir_].append(image)
                print(f'\nError {e} at\n{image}')
        
        features = np.array(features)
        features = features.reshape(-1,1280)
        
        comp = perform_PCA(features = features)
        comp_cleaned = ZNorm(comp)
        
        center = center_c(comp_cleaned)

        center_point.append(center)
        print('Process Completed')
    center_point = np.array(center_point)
    return center_point, error_file

In [108]:
center_point, error = get_cluster()

Processing Tone01
Process Completed
Processing Tone02
Process Completed
Processing Tone03
Process Completed
Processing Tone04
Process Completed
Processing Tone05
Process Completed
Processing Tone06
Process Completed
Processing Tone07
Process Completed
Processing Tone08
Process Completed
Processing Tone09
Process Completed
Processing Tone10
Process Completed


In [141]:
center_point = np.load('./cluster_center.npy')
def centroid_distance(path = path, dir_list = face_dir, model = model):
    center_point = []
    error_file = {}
    for dir_ in dir_list:
        dir_path = os.path.join(path, dir_)
        images = os.listdir(dir_path)
        error_file[dir_] = []
        
        print(f'Processing {dir_}')
        
        features = []
        for image in images:
            try:
                image_path = os.path.join(dir_path, image)
                feat = extract_features(image_path, model)
                features.append(feat)
            except Exception as e:
                error_file[dir_].append(image)
                print(f'\nError {e} at\n{image}')
        
        features = np.array(features)
        features = features.reshape(-1,1280)
        
        comp = perform_PCA(features = features)
        return comp
        break

In [142]:
this_comp = centroid_distance()

Processing Tone01


In [146]:
this_comp[0].shape

(50,)

In [147]:
center_point[0][0].shape

(50,)

In [160]:
for i in range(10):
    dist = np.squeeze(center_point, axis = 1)[i]-this_comp[2]
    dist = dist**2
    print(np.sqrt(np.sum(dist)))

12.968511
13.24963
12.361787
12.255261
12.864468
13.168463
12.4302225
12.107332
12.038676
12.2505665
