In [1]:
# S3 prefix
prefix = 'facerecogproj'

import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

In [2]:
# Change this to Lifecycle Configurations
!pip install --upgrade pip
!pip install facenet_pytorch

Collecting pip
  Using cached https://files.pythonhosted.org/packages/54/0c/d01aa759fdc501a58f431eb594a17495f15b88da142ce14b5845662c13f3/pip-20.0.2-py2.py3-none-any.whl
[31mfastai 1.0.60 requires nvidia-ml-py3, which is not installed.[0m
Installing collected packages: pip
  Found existing installation: pip 10.0.1
    Uninstalling pip-10.0.1:
      Successfully uninstalled pip-10.0.1
Successfully installed pip-20.0.2
[33mYou are using pip version 20.0.2, however version 20.1b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting facenet_pytorch
  Downloading facenet_pytorch-2.2.9-py3-none-any.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 39.3 MB/s eta 0:00:01
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.2.9


In [3]:
from facenet_pytorch import InceptionResnetV1, MTCNN

## Download the images from bucket

In [4]:
import boto3
import botocore

BUCKET_NAME = 'images-facerecogproj' # replace with your bucket name
s3_res = boto3.resource('s3')

In [14]:
import re
import boto3
import os

def get_s3_keys(bucket):
    """Get a list of keys in an S3 bucket."""
    keys = []
    s3 = boto3.client('s3')
    resp = s3.list_objects_v2(Bucket= bucket)
    for obj in resp['Contents']:
        keys.append(obj['Key'])
    return keys

def download_keys(keys, file, bucket, verbose = False):
    """ Download all the keys in a list to file"""
    create_dirs(keys, file)
    s3 = boto3.client('s3')
    for i,key in enumerate(keys):
        if verbose:
            print(key)
        try:
            # download as local file
            s3.download_file(bucket, keyot, os.path.join(file,key))
            
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                print("The object does not exist.")
            else:
                raise
    return print("{} files were downloaded!".format(i))

def create_dirs(keys, file):
    """ Given a set of keys will create the needed files"""
    if not os.path.exists(file):
        os.mkdir(file)
        
    folders = [re.split("/", key)[:-1] for key in keys]
    unique_folders = [list(x) for x in set(tuple(x) for x in folders)]
    success = 0
    for folders in unique_folders:
        path = os.path.join(file,"/".join(folders))
        if not os.path.exists(path):
            os.makedirs(path)
            success += 1
    return print("{} Folders were created".format(success))

In [12]:
import cv2

def crop(input_img):
    '''
      :return:  a cropped face in numpy array format
    '''
    img = cv2.imread(input_img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mtcnn = MTCNN(select_largest=False, post_process = False, margin = 50)
    img_cropped = mtcnn(img)
    #simple message notifying if a face was detected or not
    if img_cropped is None:
        print(f'Face not detected in file {input_img}')
        return
    img_cropped = img_cropped.permute(1, 2, 0).int().numpy() 
    return img_cropped
  
def plot_crop_face(img_cropped, output_img):
    '''
    :params:
    :return:
    '''
    plt.imshow(img_cropped)  
    plt.axis('off')
    plt.savefig(output_img)

def cropFace(input_img, output_img = None):
    '''
    :params:
    :return:
    '''
    if output_img is None:
        return crop(input_img)
    else:
        img_cropped = crop(input_img)
        plot_crop_face(img_cropped,output_img)

def cropAllAux(file_dir, output_dir, keys = None):
    """
    Crop all the functions given a directory and save it locally or in a bucket

    Args:
       file_dir (str): Name of the directory that contains all images
       output_dir (str): Name of the directory or bucket were the images should be saved
       keys (json): Json with credential keys


    Returns:
       Print statement
    """
     # It creates the folder if it does not exist
    if not keys:
        os.makedirs(output_dir, exist_ok= True)
        
    for file in os.listdir(file_dir):
        if ".jpg" in file:
            user = re.sub("_.*$","",file)
            img_cropped = crop(os.path.join(file_dir,file))
            if img_cropped is not None:
                if not keys:
                    os.makedirs(os.path.join(output_dir,user), exist_ok= True)
                    #print("Saving file {file} in directory {out}".format(file = file, out = output_dir))
                    cv2.imwrite(os.path.join(output_dir,user,file),img_cropped)
                else:
                    tmp_file = "tmp_"+file
                    tmp_path = os.path.join(os.getcwd(),tmp_file)
                    cv2.imwrite(tmp_file, img_cropped) 
                    try:
                        # Uploading to the bucket
                        print("Saving file {file} in bucket {out}".format(file = file, out = output_dir))
                        uploadBucket(tmp_path, output_dir, file, keys = keys)
                        os.remove(tmp_path) 
                    except: 
                        print("error")        
    return print("Done!")

def cropAll(file_dir, output_dir, keys = None):
    """
    Crop all the functions given a directory and save it locally or in a bucket

    Args:
       file_dir (str): Name of the directory that contains all images
       output_dir (str): Name of the directory or bucket were the images should be saved
       keys (json): Json with credential keys


    Returns:
       Print statement
    """
     # It creates the folder if it does not exist
    for roots,dirs,files in os.walk(file_dir):
        cropAllAux(roots, output_dir, keys = None)
    return print("Done!")

In [7]:
keys = get_s3_keys(BUCKET_NAME)

In [18]:
download_keys(keys,"Images2","images-facerecogproj")

3 Folders were created
216 files were downloaded!


In [13]:
cropAll("Images", "Face2", keys = None)

Done!
Done!
Done!
Face not detected in file Images/Azucena/Azucena_53.jpg
Face not detected in file Images/Azucena/Azucena_16.jpg
Face not detected in file Images/Azucena/Azucena_9.jpg
Face not detected in file Images/Azucena/Azucena_34.jpg
Face not detected in file Images/Azucena/Azucena_36.jpg
Face not detected in file Images/Azucena/Azucena_33.jpg
Face not detected in file Images/Azucena/Azucena_17.jpg
Face not detected in file Images/Azucena/Azucena_56.jpg
Face not detected in file Images/Azucena/Azucena_18.jpg
Face not detected in file Images/Azucena/Azucena_32.jpg
Face not detected in file Images/Azucena/Azucena_25.jpg
Face not detected in file Images/Azucena/Azucena_35.jpg
Face not detected in file Images/Azucena/Azucena_14.jpg
Face not detected in file Images/Azucena/Azucena_6.jpg
Face not detected in file Images/Azucena/Azucena_15.jpg
Face not detected in file Images/Azucena/Azucena_55.jpg
Face not detected in file Images/Azucena/Azucena_23.jpg
Face not detected in file Images

## Ready to have some fun!

In [15]:
from facenet_pytorch import InceptionResnetV1, MTCNN
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import os
import argparse
import cv2
import torch
from PIL import Image
import torchvision.transforms as transforms
import re
import random 

In [16]:
resnet = InceptionResnetV1(pretrained='vggface2').eval()

Downloading parameters (1/2)
Downloading parameters (2/2)


In [17]:
def readFaces(file,model,tensor = False):
    face_dict = {}
    for roots,dirs,files in os.walk(file):
        emb_list = []
        for file in files:
            if '.jpg' in file:
                print(file)
                path = os.path.join(roots,file)
                img_emb = embeddings(path,model)
                if not tensor:
                    img_emb = img_emb.detach().numpy()
                    emb_list.append(img_emb)
        face_dict[re.sub("_.*$","",file)] = emb_list
    train, label = [], []
    for key, values in face_dict.items():
        for val in values:
            train.append(val)
            label.append(key)
    return train, label
        

def embeddings(file, model):
    img = Image.open(file).convert('RGB')
    img_tensor = transforms.functional.to_tensor(img)
    embedding = model(img_tensor.unsqueeze(0))[0]
    return embedding

In [21]:
train, label = readFaces("./Face2/", resnet)

Joaquin_59.jpg
Joaquin_34.jpg
Joaquin_35.jpg
Joaquin_31.jpg
Joaquin_19.jpg
Joaquin_69.jpg
Joaquin_42.jpg
Joaquin_33.jpg
Joaquin_49.jpg
Joaquin_54.jpg
Joaquin_60.jpg
Joaquin_6.jpg
Joaquin_39.jpg
Joaquin_11.jpg
Joaquin_53.jpg
Joaquin_65.jpg
Joaquin_15.jpg
Joaquin_37.jpg
Joaquin_21.jpg
Joaquin_1.jpg
Joaquin_4.jpg
Joaquin_13.jpg
Joaquin_10.jpg
Joaquin_40.jpg
Joaquin_27.jpg
Joaquin_68.jpg
Joaquin_32.jpg
Joaquin_52.jpg
Joaquin_29.jpg
Joaquin_44.jpg
Joaquin_26.jpg
Joaquin_9.jpg
Joaquin_41.jpg
Joaquin_47.jpg
Joaquin_46.jpg
Joaquin_30.jpg
Joaquin_70.jpg
Joaquin_57.jpg
Joaquin_20.jpg
Joaquin_58.jpg
Joaquin_28.jpg
Joaquin_22.jpg
Joaquin_16.jpg
Joaquin_61.jpg
Joaquin_23.jpg
Joaquin_8.jpg
Joaquin_51.jpg
Joaquin_66.jpg
Joaquin_62.jpg
Joaquin_43.jpg
Joaquin_45.jpg
Joaquin_17.jpg
Joaquin_63.jpg
Joaquin_3.jpg
Joaquin_67.jpg
Joaquin_24.jpg
Joaquin_18.jpg
Joaquin_64.jpg
Joaquin_36.jpg
Joaquin_25.jpg
Joaquin_55.jpg
Joaquin_7.jpg
Joaquin_48.jpg
Joaquin_50.jpg
Joaquin_14.jpg
Joaquin_38.jpg
Joaquin_5.jpg
Joa

In [None]:
# Shuffling the lists
temp = list(zip(train, label)) 
random.shuffle(temp) 
train_rnd, label_rnd = zip(*temp) 
train_rnd, label_rnd = list(train_rnd), list(label_rnd)

In [None]:
#Splitting training and test
test_num = 10
testX, testY = train_rnd[-test_num:],label_rnd[-test_num:]
trainX, trainY = train_rnd[:-test_num],label_rnd[:-test_num]

In [115]:
# Fitting an SVM model
#model = SVC(kernel='linear', probability=True)
#model.fit(trainX, trainY)
#model.predict_proba(testX)

array([[0.16303605, 0.78415599, 0.05280796],
       [0.94992948, 0.02739326, 0.02267726],
       [0.03150116, 0.94707056, 0.02142828],
       [0.9561988 , 0.01776587, 0.02603533],
       [0.95415555, 0.01496364, 0.03088081],
       [0.01980741, 0.94492273, 0.03526986],
       [0.06191662, 0.90872056, 0.02936282],
       [0.02056926, 0.9596588 , 0.01977194],
       [0.93503805, 0.02733405, 0.03762789],
       [0.01768651, 0.95730366, 0.02500983]])

In [110]:
from sklearn.neural_network import MLPClassifier

NN = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(10,), random_state=1)
NN.fit(trainX, trainY)
NN.predict_proba(testX)

array([[3.33503112e-03, 9.93693898e-01, 2.97107076e-03],
       [9.99999203e-01, 1.17902160e-08, 7.84874196e-07],
       [8.26047911e-07, 9.99990639e-01, 8.53449766e-06],
       [9.99999515e-01, 6.50850760e-10, 4.84818208e-07],
       [9.99999399e-01, 2.41660661e-10, 6.00387974e-07],
       [1.35347293e-07, 9.99978007e-01, 2.18578115e-05],
       [1.56510482e-05, 9.99944676e-01, 3.96729747e-05],
       [5.36867391e-07, 9.99991938e-01, 7.52545384e-06],
       [9.99996318e-01, 1.15813507e-08, 3.67043117e-06],
       [1.40728274e-07, 9.99984012e-01, 1.58471141e-05]])

In [95]:
!wget https://upload.wikimedia.org/wikipedia/commons/c/c1/Lionel_Messi_20180626.jpg

--2020-04-23 20:53:14--  https://upload.wikimedia.org/wikipedia/commons/c/c1/Lionel_Messi_20180626.jpg
Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.154.240, 2620:0:861:ed1a::2:b
Connecting to upload.wikimedia.org (upload.wikimedia.org)|208.80.154.240|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 578583 (565K) [image/jpeg]
Saving to: ‘Lionel_Messi_20180626.jpg’


2020-04-23 20:53:14 (28.9 MB/s) - ‘Lionel_Messi_20180626.jpg’ saved [578583/578583]



In [97]:
mtcnn = MTCNN(select_largest=False, post_process = False, margin = 50)

In [107]:
img_test = cv2.imread("Lionel_Messi_20180626.jpg")
img_test = cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB)
img_test_cropped = mtcnn(img_test)
test_emb = resnet(img_test_cropped.unsqueeze(0))[0]
test_emb = test_emb.detach().numpy()

In [112]:
NN.predict_proba(test_emb.reshape(1,-1))

array([[7.57191977e-01, 3.97304437e-04, 2.42410719e-01]])

## Upload the data

In [55]:
import numpy as np
import os
from sklearn import datasets
import pickle as pkl 

# Load data and join it in a dataframe
data = {'data': train, "label": label}

# Create directory and write csv
os.makedirs('./data', exist_ok=True)
#data.to_csv("./data/data.csv", index = False)

with open('./data/data.pickle', 'wb') as handle:
    pkl.dump(data, handle, protocol=pkl.HIGHEST_PROTOCOL)

In [6]:
with open(os.path.join(args.train, "data.pickle"), 'rb') as handle:
    data = pkl.load(handle)

NameError: name 'os' is not defined

In [82]:
import argparse
import pandas as pd
import os
import pickle as pkl 

from sklearn.neural_network import MLPClassifier
from sklearn.externals import joblib


if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()

    # Hyperparameters are described here. In this simple example we are just including one hyperparameter.
    parser.add_argument('--solver', type=str, default='lbfgs')
    parser.add_argument('--alpha', type=float, default= .00001)
    parser.add_argument('--hidden_layer_sizes', type=tuple, default= (10,))
    parser.add_argument('--random_state', type=int, default= 1)
    

    # Sagemaker specific arguments. Defaults are set in the environment variables.
    parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
    parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])

    args, unknown = parser.parse_known_args()

    # Take the set of files and read them all into a single pandas dataframe
    with open(os.path.join(args.train, "data.pickle"), 'rb') as handle:
        data = pkl.load(handle)

    # labels are in the first column
    train_y = data['label']
    train_X = data['data']

    # Now use scikit-learn's NN to train the model.
    model = MLPClassifier(solver=args.solver, 
                       alpha=args.alpha, 
                       hidden_layer_sizes=args.hidden_layer_sizes, 
                       random_state=args.random_state)
    
    model = model.fit(train_X, train_y)

    # Print the coefficients of the trained classifier, and save the coefficients
    joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))
    
def model_fn(model_dir):
    """Deserialized and return fitted model

    Note that this should have the same name as the serialized model in the main method
    """
    model = joblib.load(os.path.join(model_dir, "model.joblib"))
    return model

In [3]:
from sagemaker.sklearn.estimator import SKLearn

script_path = './model/model.py'

sklearn = SKLearn(
    entry_point=script_path,
    train_instance_type="ml.m4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session
    #hyperparameters={'max_leaf_nodes': 30}
)

In [4]:
import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()
role=sagemaker.get_execution_role()
inputs = sagemaker_session.upload_data(path='data', key_prefix='data/data.pickle', bucket='video-facerecogproj')


In [5]:
sklearn.fit({'train': inputs})

2020-04-25 05:05:05 Starting - Starting the training job...
2020-04-25 05:05:07 Starting - Launching requested ML instances......
2020-04-25 05:06:15 Starting - Preparing the instances for training......
2020-04-25 05:07:11 Downloading - Downloading input data...
2020-04-25 05:07:54 Training - Downloading the training image..[34m2020-04-25 05:08:13,628 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-04-25 05:08:13,634 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-04-25 05:08:13,658 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-04-25 05:08:13,961 sagemaker-containers INFO     Module model does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-04-25 05:08:13,961 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-04-25 05:08:13,961 sagemaker-containers INFO     Generating MANIFEST.in[0m
[34m2020-04-25 05:08:13,962 

In [7]:
predictor = sklearn.deploy(initial_instance_count=1, instance_type="ml.m4.xlarge")

---------------!

In [13]:
import os
import pickle as pkl
with open(os.path.join("data/", "data.pickle"), 'rb') as handle:
    data = pkl.load(handle)

In [16]:
train = data['data']
label = data['label']

In [19]:
print(predictor.predict(train[:10]))

['Joaquin' 'Joaquin' 'Joaquin' 'Joaquin' 'Joaquin' 'Joaquin' 'Joaquin'
 'Joaquin' 'Joaquin' 'Joaquin']


In [20]:
print(label[:10])

['Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin']


In [21]:
sklearn.delete_endpoint()