In [1]:
# Change this to Lifecycle Configurations
!pip install --upgrade pip
!pip install facenet_pytorch

Requirement already up-to-date: pip in /home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages (20.0.2)


In [2]:
from facenet_pytorch import InceptionResnetV1, MTCNN

## Download images from bucket

In [3]:
import boto3
import botocore

BUCKET_NAME = 'images-facerecogproj'
s3_res = boto3.resource('s3')

In [4]:
import re
import boto3
import os

def get_s3_keys(bucket):
    """Get a list of keys in an S3 bucket."""
    keys = []
    s3 = boto3.client('s3')
    resp = s3.list_objects_v2(Bucket= bucket)
    for obj in resp['Contents']:
        keys.append(obj['Key'])
    return keys

def download_keys(keys, file, bucket, verbose = False):
    """ Download all the keys in a list to file"""
    create_dirs(keys, file)
    s3 = boto3.client('s3')
    for i,key in enumerate(keys):
        if verbose:
            print(key)
        try:
            # download as local file
            s3.download_file(bucket, key, os.path.join(file,key))
            
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                print("The object does not exist.")
            else:
                raise
    return print("{} files were downloaded!".format(i))

def create_dirs(keys, file):
    """ Given a set of keys will create the needed files"""
    if not os.path.exists(file):
        os.mkdir(file)
        
    folders = [re.split("/", key)[:-1] for key in keys]
    unique_folders = [list(x) for x in set(tuple(x) for x in folders)]
    success = 0
    for folders in unique_folders:
        path = os.path.join(file,"/".join(folders))
        if not os.path.exists(path):
            os.makedirs(path)
            success += 1
    return print("{} Folders were created".format(success))

In [5]:
keys = get_s3_keys(BUCKET_NAME)

In [6]:
download_keys(keys,"Images","images-facerecogproj")

0 Folders were created
170 files were downloaded!


## Preprocessing

### Cropping the faces of the images

In [9]:
import cv2

def crop(input_img):
    '''
    Crop an image given a file 
    
    Args:
       input_img (str): Name of the directory of the image

    Returns:
       img_cropped (numpy): A numpy array of a cropped image
    '''
    img = cv2.imread(input_img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mtcnn = MTCNN(select_largest=False, post_process = False, margin = 50)
    img_cropped = mtcnn(img)
    if img_cropped is None:
        print(f'Face not detected in file {input_img}')
        return
    img_cropped = img_cropped.permute(1, 2, 0).int().numpy() 
    return img_cropped

def cropAllAux(file_dir, output_dir, keys = None):
    """
    Crop all the functions given a directory and save it locally or in a bucket

    Args:
       file_dir (str): Name of the directory that contains all images
       output_dir (str): Name of the directory or bucket were the images should be saved
       keys (json): Json with credential keys


    Returns:
       Print statement
    """
     # It creates the folder if it does not exist
    if not keys:
        os.makedirs(output_dir, exist_ok= True)
        
    for file in os.listdir(file_dir):
        if ".jpg" in file:
            user = re.sub("_.*$","",file)
            img_cropped = crop(os.path.join(file_dir,file))
            if img_cropped is not None:
                if not keys:
                    os.makedirs(os.path.join(output_dir,user), exist_ok= True)
                    #print("Saving file {file} in directory {out}".format(file = file, out = output_dir))
                    cv2.imwrite(os.path.join(output_dir,user,file),img_cropped)
                else:
                    tmp_file = "tmp_"+file
                    tmp_path = os.path.join(os.getcwd(),tmp_file)
                    cv2.imwrite(tmp_file, img_cropped) 
                    try:
                        # Uploading to the bucket
                        print("Saving file {file} in bucket {out}".format(file = file, out = output_dir))
                        uploadBucket(tmp_path, output_dir, file, keys = keys)
                        os.remove(tmp_path) 
                    except: 
                        print("error")        
    return print("Done!")

def cropAll(file_dir, output_dir, keys = None):
    """
    Crop all the functions given a directory and save it locally or in a bucket

    Args:
       file_dir (str): Name of the directory that contains all images
       output_dir (str): Name of the directory or bucket were the images should be saved
       keys (json): Json with credential keys


    Returns:
       Print statement
    """
     # It creates the folder if it does not exist
    for roots,dirs,files in os.walk(file_dir):
        cropAllAux(roots, output_dir, keys = None)
    return print("Done!")

In [10]:
cropAll("Images", "Face", keys = None)

Done!
Face not detected in file Images/Azucena/Azucena_6.jpg
Face not detected in file Images/Azucena/Azucena_36.jpg
Face not detected in file Images/Azucena/Azucena_8.jpg
Face not detected in file Images/Azucena/Azucena_56.jpg
Face not detected in file Images/Azucena/Azucena_16.jpg
Face not detected in file Images/Azucena/Azucena_14.jpg
Face not detected in file Images/Azucena/Azucena_33.jpg
Face not detected in file Images/Azucena/Azucena_15.jpg
Face not detected in file Images/Azucena/Azucena_9.jpg
Face not detected in file Images/Azucena/Azucena_34.jpg
Face not detected in file Images/Azucena/Azucena_17.jpg
Face not detected in file Images/Azucena/Azucena_18.jpg
Face not detected in file Images/Azucena/Azucena_55.jpg
Face not detected in file Images/Azucena/Azucena_25.jpg
Face not detected in file Images/Azucena/Azucena_23.jpg
Face not detected in file Images/Azucena/Azucena_54.jpg
Face not detected in file Images/Azucena/Azucena_35.jpg
Face not detected in file Images/Azucena/Azuc

### Get the embbedings of the faces

In [11]:
from facenet_pytorch import InceptionResnetV1, MTCNN
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import os
import argparse
import cv2
import torch
from PIL import Image
import torchvision.transforms as transforms
import re
import random 

In [12]:
resnet = InceptionResnetV1(pretrained='vggface2').eval()

In [13]:
def readFaces(file,model,tensor = False):
    face_dict = {}
    for roots,dirs,files in os.walk(file):
        emb_list = []
        for file in files:
            if '.jpg' in file:
                print(file)
                path = os.path.join(roots,file)
                img_emb = embeddings(path,model)
                if not tensor:
                    img_emb = img_emb.detach().numpy()
                    emb_list.append(img_emb)
        face_dict[re.sub("_.*$","",file)] = emb_list
    train, label = [], []
    for key, values in face_dict.items():
        for val in values:
            train.append(val)
            label.append(key)
    return train, label
        

def embeddings(file, model):
    img = Image.open(file).convert('RGB')
    img_tensor = transforms.functional.to_tensor(img)
    embedding = model(img_tensor.unsqueeze(0))[0]
    return embedding

In [14]:
train, label = readFaces("./Face/", resnet)

Azucena_12.jpg
Azucena_52.jpg
Azucena_59.jpg
Azucena_11-checkpoint.jpg
Azucena_47.jpg
Azucena_64.jpg
Azucena_61.jpg
Azucena_26.jpg
Azucena_60.jpg
Azucena_11.jpg
Azucena_58.jpg
Azucena_62.jpg
Azucena_20.jpg
Azucena_44.jpg
Azucena_45.jpg
Azucena_7.jpg
Azucena_4.jpg
Azucena_27.jpg
Azucena_1.jpg
Azucena_22.jpg
Azucena_38.jpg
Azucena_42.jpg
Azucena_13.jpg
Azucena_50.jpg
Azucena_49.jpg
Azucena_31.jpg
Azucena_57.jpg
Azucena_40.jpg
Azucena_39.jpg
Azucena_43.jpg
Azucena_10.jpg
Azucena_29.jpg
Azucena_2.jpg
Azucena_51.jpg
Azucena_46.jpg
Azucena_65.jpg
Azucena_21.jpg
Azucena_41.jpg
Azucena_3.jpg
Azucena_37.jpg
Azucena_24.jpg
Azucena_19.jpg
Azucena_63.jpg
Azucena_5.jpg
Azucena_48.jpg
Azucena_30.jpg
Azucena_28.jpg
Iuliia_4.jpg
Iuliia_28.jpg
Iuliia_24.jpg
Iuliia_31.jpg
Iuliia_21.jpg
Iuliia_13.jpg
Iuliia_12.jpg
Iuliia_26.jpg
Iuliia_36.jpg
Iuliia_22.jpg
Iuliia_5.jpg
Iuliia_2.jpg
Iuliia_33.jpg
Iuliia_29.jpg
Iuliia_34.jpg
Iuliia_17.jpg
Iuliia_23.jpg
Iuliia_16.jpg
Iuliia_11.jpg
Iuliia_20.jpg
Iuliia_25.jpg

In [15]:
# Shuffling the lists
temp = list(zip(train, label)) 
random.shuffle(temp) 
train_rnd, label_rnd = zip(*temp) 
train_rnd, label_rnd = list(train_rnd), list(label_rnd)

In [16]:
#Splitting training and test
test_num = 10
testX, testY = train_rnd[-test_num:],label_rnd[-test_num:]
trainX, trainY = train_rnd[:-test_num],label_rnd[:-test_num]

In [17]:
# Fitting an SVM model
model = SVC(kernel='linear', probability=True)
model.fit(trainX, trainY)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

## Testing

#### Testing in the data test

In [18]:
model.predict_proba(testX)

array([[0.97401945, 0.0114553 , 0.01452525],
       [0.9654119 , 0.02120769, 0.01338041],
       [0.02165868, 0.01471799, 0.96362333],
       [0.0110714 , 0.00649179, 0.98243681],
       [0.0210554 , 0.01826512, 0.96067948],
       [0.01463834, 0.01244852, 0.97291314],
       [0.11329819, 0.85028664, 0.03641517],
       [0.97549695, 0.00819438, 0.01630867],
       [0.01201156, 0.01241341, 0.97557502],
       [0.04396155, 0.92436595, 0.0316725 ]])

In [15]:
#testY

#### Testing in an image from a different person

In [16]:
!wget https://upload.wikimedia.org/wikipedia/commons/c/c1/Lionel_Messi_20180626.jpg

In [19]:
mtcnn = MTCNN(select_largest=False, post_process = False, margin = 50)

In [20]:
img_test = cv2.imread("Lionel_Messi_20180626.jpg")
img_test = cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB)
img_test_cropped = mtcnn(img_test)
test_emb = resnet(img_test_cropped.unsqueeze(0))[0]
test_emb = test_emb.detach().numpy()

In [21]:
model.predict_proba(test_emb.reshape(1,-1))

array([[0.22798799, 0.04523323, 0.72677878]])

### Upload the data to the bucket

In [22]:
import numpy as np
import os
from sklearn import datasets
import pickle as pkl 

# Load data and join it in a dataframe
data = {'data': trainX, "label": trainY}

# Create directory and write csv
os.makedirs('./data', exist_ok=True)

with open('./data/data.pickle', 'wb') as handle:
    pkl.dump(data, handle, protocol=pkl.HIGHEST_PROTOCOL)

### Save the data from the bucket

In [33]:
with open(os.path.join("./data", "data.pickle"), 'rb') as handle:
    data = pkl.load(handle)

# Training the model

In [177]:
from sagemaker.sklearn.estimator import SKLearn

script_path = './model/model.py'

sklearn = SKLearn(
    entry_point=script_path,
    train_instance_type="ml.m4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session
    #hyperparameters={'max_leaf_nodes': 30}
)

In [178]:
import sagemaker
from sagemaker import get_execution_role


sagemaker_session = sagemaker.Session()
role=sagemaker.get_execution_role()
inputs = sagemaker_session.upload_data(path='data', key_prefix='data/data.pickle', bucket='video-facerecogproj')


In [179]:
sklearn.fit({'train': inputs})

2020-04-27 18:11:40 Starting - Starting the training job...
2020-04-27 18:11:41 Starting - Launching requested ML instances......
2020-04-27 18:12:48 Starting - Preparing the instances for training......
2020-04-27 18:13:45 Downloading - Downloading input data...
2020-04-27 18:14:40 Training - Training image download completed. Training in progress..[34m2020-04-27 18:14:41,168 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-04-27 18:14:41,171 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-04-27 18:14:41,183 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-04-27 18:14:41,450 sagemaker-containers INFO     Module model does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-04-27 18:14:41,450 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-04-27 18:14:41,450 sagemaker-containers INFO     Generating MANIFEST.in[0m
[34

In [180]:
predictor = sklearn.deploy(initial_instance_count=1,
                           instance_type="ml.m4.xlarge"
                              )

---------------!

### Testing predictor

In [29]:
import os
import pickle as pkl
with open(os.path.join("data/", "data.pickle"), 'rb') as handle:
    data = pkl.load(handle)

In [30]:
train = data['data']
label = data['label']

In [31]:
#print(predictor.predict(train[0].reshape(1,-1)))

## Testing invoke method

In [32]:
import io
import numpy as np

def np2npy(arr):
    npy = io.BytesIO()
    np.save(npy, arr)
    return npy

In [34]:
npy = np2npy(testX[0].reshape(1,-1))

In [35]:
npy.seek(0)

0

In [36]:
import boto3

runtime= boto3.client('runtime.sagemaker')

response = runtime.invoke_endpoint(EndpointName="sagemaker-scikit-learn-2020-04-27-18-11-39-809",
                                   ContentType = 'application/x-npy',
                                   Body=npy)


In [37]:
response['Body'].read().decode()

'["Azucena"]'