# Check t-sne on Person 2

In [8]:
import argparse
import os
import json

import numpy as np
import tensorflow as tf

import model.create_dataset as create_dataset
from model.utils import Params
from model.model_fn import model_fn

In [9]:
model_dir = "experiments/p1_training"
landmark_dir = "data_for_model_person_2/train"

In [10]:
def _get_dataset(landmark_dir, params, class_dict_dir):

    dataset = create_dataset.dataset(landmark_dir, params, class_dict_dir)
    dataset = dataset.batch(params.batch_size)
    dataset = dataset.prefetch(1)

    return dataset


def _get_dataset_size(landmark_dir, image_type):

	size = 0
	for root, dirs, files in os.walk(landmark_dir):
	    files = [f for f in files if "."+image_type in f ]
	    size += len(files)

	tf.logging.info("Found {} {} landmarks in {}".format(size, image_type, landmark_dir))

	return size


def _get_embeddings(landmark_dir, estimator, params, class_dict_dir, landmark_size):
        
    # Compute embeddings
    tf.logging.info("Predicting on "+landmark_dir)    

    predictions = estimator.predict(lambda: _get_dataset(landmark_dir, params, class_dict_dir))

    embeddings = np.zeros((landmark_size, params.embedding_size))
    for i, p in enumerate(predictions):
        embeddings[i] = p['embeddings']

    tf.logging.info("Embeddings shape in "+os.path.basename(landmark_dir)+": {}".format(embeddings.shape))

    return embeddings

In [None]:
if __name__ == '__main__':

    # Load the parameters from json file
    json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)

    # Define the model
    tf.logging.info("Creating the model...")
    config = tf.estimator.RunConfig(tf_random_seed=230,
                                    model_dir=model_dir,
                                    save_summary_steps=params.save_summary_steps)
    estimator = tf.estimator.Estimator(model_fn, params=params, config=config)

    # Create a new folder to save embeddings
    embeddings_dir = os.path.join(model_dir, "landmarks")
    if not os.path.exists(embeddings_dir):
        os.makedirs(embeddings_dir)

    # Get the number of landmarks
    landmark_size = _get_dataset_size(os.path.normpath(landmark_dir), params.image_type)

    # Get embeddings and define tensorflow variables
    embeddings = _get_embeddings(landmark_dir, estimator, params, embeddings_dir, landmark_size)
    embeddings = np.round(embeddings, 6)                

In [12]:
embeddings

array([[ 0.032304, -0.013719, -0.002834, ..., -0.046816, -0.026536,
        -0.033028],
       [ 0.032123, -0.01372 , -0.003483, ..., -0.0451  , -0.027991,
        -0.03247 ],
       [ 0.032567, -0.013745, -0.003429, ..., -0.045663, -0.027912,
        -0.032821],
       ...,
       [ 0.032138, -0.014209, -0.003598, ..., -0.045272, -0.027055,
        -0.033075],
       [ 0.033194, -0.015614, -0.003484, ..., -0.045428, -0.027075,
        -0.032875],
       [ 0.033047, -0.014464, -0.0044  , ..., -0.0447  , -0.026596,
        -0.033183]])

In [13]:
import numpy as np
import os.path

class IdentityMetadata():
    def __init__(self, base, name, file):
        # dataset base directory
        self.base = base
        # identity name
        self.name = name
        # image file name
        self.file = file

    def __repr__(self):
        return self.image_path()

    def image_path(self):
        return os.path.join(self.base, self.name, self.file) 
s='jpg'
a="jpeg"    
b="JPG" 
c="png" 
def load_metadata(path):
    metadata = []
    for i in os.listdir(path):
        #print(i)
        for f in os.listdir(os.path.join(path, i)):
             if f.endswith(s) or f.endswith(a)  or f.endswith(b) or f.endswith(c):
                metadata.append(IdentityMetadata(path, i, f))
    return np.array(metadata)

metadata = load_metadata(landmark_dir)
#print(metadata)
print(metadata.shape[0])

targets = np.array([m.name for m in metadata])

67


In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

%matplotlib inline
X_embedded = TSNE(n_components=2).fit_transform(embeddings)
#print(X_embedded)
fig=plt.figure(figsize=(15,8))
for i, t in enumerate(set(targets)):
    idx = targets == t
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=t)   

plt.legend(bbox_to_anchor=(1, 1));
fig.savefig("cluster_500_epoch.jpg")

# Check t-sne on Person 1, 2, 3 and 5

In [15]:
model_dir = "experiments/p1_training"
landmark_dir = "Images/p1_2"

In [None]:
if __name__ == '__main__':

    # Load the parameters from json file
    json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    tf.reset_default_graph()
    tf.logging.set_verbosity(tf.logging.INFO)

    # Define the model
    tf.logging.info("Creating the model...")
    config = tf.estimator.RunConfig(tf_random_seed=230,
                                    model_dir=model_dir,
                                    save_summary_steps=params.save_summary_steps)
    estimator = tf.estimator.Estimator(model_fn, params=params, config=config)

    # Create a new folder to save embeddings
    embeddings_dir = os.path.join(model_dir, "landmarks")
    if not os.path.exists(embeddings_dir):
        os.makedirs(embeddings_dir)

    # Get the number of landmarks
    landmark_size = _get_dataset_size(os.path.normpath(landmark_dir), params.image_type)

    # Get embeddings and define tensorflow variables
    embeddings = _get_embeddings(landmark_dir, estimator, params, embeddings_dir, landmark_size)
    embeddings = np.round(embeddings, 6)                

In [17]:
import numpy as np
import os.path

class IdentityMetadata():
    def __init__(self, base, name, file):
        # dataset base directory
        self.base = base
        # identity name
        self.name = name
        # image file name
        self.file = file

    def __repr__(self):
        return self.image_path()

    def image_path(self):
        return os.path.join(self.base, self.name, self.file) 
s='jpg'
a="jpeg"    
b="JPG" 
c="png" 
def load_metadata(path):
    metadata = []
    for i in os.listdir(path):
        #print(i)
        for f in os.listdir(os.path.join(path, i)):
             if f.endswith(s) or f.endswith(a)  or f.endswith(b) or f.endswith(c):
                metadata.append(IdentityMetadata(path, i, f))
    return np.array(metadata)

metadata = load_metadata(landmark_dir)
#print(metadata)
print(metadata.shape[0])

targets = np.array([m.name for m in metadata])

774


In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

%matplotlib inline
X_embedded = TSNE(n_components=2).fit_transform(embeddings)
#print(X_embedded)
fig=plt.figure(figsize=(15,8))
for i, t in enumerate(set(targets)):
    idx = targets == t
    plt.scatter(X_embedded[idx, 0], X_embedded[idx, 1], label=t)   

plt.legend(bbox_to_anchor=(1, 1));
fig.savefig("cluster_p1_2_3_5.jpg")