In [1]:
# S3 prefix
prefix = 'facerecogproj'

import sagemaker
from sagemaker import get_execution_role

sagemaker_session = sagemaker.Session()

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()

In [2]:
# Change this to Lifecycle Configurations
!pip install --upgrade pip
!pip install facenet_pytorch

Collecting pip
  Using cached https://files.pythonhosted.org/packages/54/0c/d01aa759fdc501a58f431eb594a17495f15b88da142ce14b5845662c13f3/pip-20.0.2-py2.py3-none-any.whl
[31mfastai 1.0.60 requires nvidia-ml-py3, which is not installed.[0m
Installing collected packages: pip
  Found existing installation: pip 10.0.1
    Uninstalling pip-10.0.1:
      Successfully uninstalled pip-10.0.1
Successfully installed pip-20.0.2
[33mYou are using pip version 20.0.2, however version 20.1b1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m
Collecting facenet_pytorch
  Downloading facenet_pytorch-2.2.9-py3-none-any.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 12.9 MB/s eta 0:00:01
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.2.9


In [3]:
from facenet_pytorch import InceptionResnetV1, MTCNN

## Download the images from bucket

In [4]:
import boto3
import botocore

BUCKET_NAME = 'images-facerecogproj'
s3_res = boto3.resource('s3')

In [9]:
import re
import boto3
import os

def get_s3_keys(bucket):
    """Get a list of keys in an S3 bucket."""
    keys = []
    s3 = boto3.client('s3')
    resp = s3.list_objects_v2(Bucket= bucket)
    for obj in resp['Contents']:
        keys.append(obj['Key'])
    return keys

def download_keys(keys, file, bucket, verbose = False):
    """ Download all the keys in a list to file"""
    create_dirs(keys, file)
    s3 = boto3.client('s3')
    for i,key in enumerate(keys):
        if verbose:
            print(key)
        try:
            # download as local file
            s3.download_file(bucket, key, os.path.join(file,key))
            
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                print("The object does not exist.")
            else:
                raise
    return print("{} files were downloaded!".format(i))

def create_dirs(keys, file):
    """ Given a set of keys will create the needed files"""
    if not os.path.exists(file):
        os.mkdir(file)
        
    folders = [re.split("/", key)[:-1] for key in keys]
    unique_folders = [list(x) for x in set(tuple(x) for x in folders)]
    success = 0
    for folders in unique_folders:
        path = os.path.join(file,"/".join(folders))
        if not os.path.exists(path):
            os.makedirs(path)
            success += 1
    return print("{} Folders were created".format(success))

In [6]:
import cv2

def crop(input_img):
    '''
      :return:  a cropped face in numpy array format
    '''
    img = cv2.imread(input_img)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mtcnn = MTCNN(select_largest=False, post_process = False, margin = 50)
    img_cropped = mtcnn(img)
    #simple message notifying if a face was detected or not
    if img_cropped is None:
        print(f'Face not detected in file {input_img}')
        return
    img_cropped = img_cropped.permute(1, 2, 0).int().numpy() 
    return img_cropped
  
def plot_crop_face(img_cropped, output_img):
    '''
    :params:
    :return:
    '''
    plt.imshow(img_cropped)  
    plt.axis('off')
    plt.savefig(output_img)

def cropFace(input_img, output_img = None):
    '''
    :params:
    :return:
    '''
    if output_img is None:
        return crop(input_img)
    else:
        img_cropped = crop(input_img)
        plot_crop_face(img_cropped,output_img)

def cropAllAux(file_dir, output_dir, keys = None):
    """
    Crop all the functions given a directory and save it locally or in a bucket

    Args:
       file_dir (str): Name of the directory that contains all images
       output_dir (str): Name of the directory or bucket were the images should be saved
       keys (json): Json with credential keys


    Returns:
       Print statement
    """
     # It creates the folder if it does not exist
    if not keys:
        os.makedirs(output_dir, exist_ok= True)
        
    for file in os.listdir(file_dir):
        if ".jpg" in file:
            user = re.sub("_.*$","",file)
            img_cropped = crop(os.path.join(file_dir,file))
            if img_cropped is not None:
                if not keys:
                    os.makedirs(os.path.join(output_dir,user), exist_ok= True)
                    #print("Saving file {file} in directory {out}".format(file = file, out = output_dir))
                    cv2.imwrite(os.path.join(output_dir,user,file),img_cropped)
                else:
                    tmp_file = "tmp_"+file
                    tmp_path = os.path.join(os.getcwd(),tmp_file)
                    cv2.imwrite(tmp_file, img_cropped) 
                    try:
                        # Uploading to the bucket
                        print("Saving file {file} in bucket {out}".format(file = file, out = output_dir))
                        uploadBucket(tmp_path, output_dir, file, keys = keys)
                        os.remove(tmp_path) 
                    except: 
                        print("error")        
    return print("Done!")

def cropAll(file_dir, output_dir, keys = None):
    """
    Crop all the functions given a directory and save it locally or in a bucket

    Args:
       file_dir (str): Name of the directory that contains all images
       output_dir (str): Name of the directory or bucket were the images should be saved
       keys (json): Json with credential keys


    Returns:
       Print statement
    """
     # It creates the folder if it does not exist
    for roots,dirs,files in os.walk(file_dir):
        cropAllAux(roots, output_dir, keys = None)
    return print("Done!")

In [7]:
keys = get_s3_keys(BUCKET_NAME)

In [10]:
download_keys(keys,"Images","images-facerecogproj")

0 Folders were created
170 files were downloaded!


In [20]:
cropAll("Images", "Face", keys = None)

NameError: name 'cropAll' is not defined

## Ready to have some fun!

In [4]:
from facenet_pytorch import InceptionResnetV1, MTCNN
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import os
import argparse
import cv2
import torch
from PIL import Image
import torchvision.transforms as transforms
import re
import random 

In [5]:
resnet = InceptionResnetV1(pretrained='vggface2').eval()

Downloading parameters (1/2)
Downloading parameters (2/2)


In [6]:
def readFaces(file,model,tensor = False):
    face_dict = {}
    for roots,dirs,files in os.walk(file):
        emb_list = []
        for file in files:
            if '.jpg' in file:
                print(file)
                path = os.path.join(roots,file)
                img_emb = embeddings(path,model)
                if not tensor:
                    img_emb = img_emb.detach().numpy()
                    emb_list.append(img_emb)
        face_dict[re.sub("_.*$","",file)] = emb_list
    train, label = [], []
    for key, values in face_dict.items():
        for val in values:
            train.append(val)
            label.append(key)
    return train, label
        

def embeddings(file, model):
    img = Image.open(file).convert('RGB')
    img_tensor = transforms.functional.to_tensor(img)
    embedding = model(img_tensor.unsqueeze(0))[0]
    return embedding

In [7]:
train, label = readFaces("./Face/", resnet)

Azucena_12.jpg
Azucena_52.jpg
Azucena_59.jpg
Azucena_47.jpg
Azucena_64.jpg
Azucena_61.jpg
Azucena_26.jpg
Azucena_60.jpg
Azucena_11.jpg
Azucena_58.jpg
Azucena_62.jpg
Azucena_20.jpg
Azucena_44.jpg
Azucena_45.jpg
Azucena_7.jpg
Azucena_4.jpg
Azucena_27.jpg
Azucena_1.jpg
Azucena_22.jpg
Azucena_38.jpg
Azucena_42.jpg
Azucena_13.jpg
Azucena_50.jpg
Azucena_49.jpg
Azucena_31.jpg
Azucena_57.jpg
Azucena_40.jpg
Azucena_39.jpg
Azucena_43.jpg
Azucena_10.jpg
Azucena_29.jpg
Azucena_2.jpg
Azucena_51.jpg
Azucena_46.jpg
Azucena_65.jpg
Azucena_21.jpg
Azucena_41.jpg
Azucena_3.jpg
Azucena_37.jpg
Azucena_24.jpg
Azucena_19.jpg
Azucena_63.jpg
Azucena_5.jpg
Azucena_48.jpg
Azucena_30.jpg
Azucena_28.jpg
Iuliia_4.jpg
Iuliia_28.jpg
Iuliia_24.jpg
Iuliia_31.jpg
Iuliia_21.jpg
Iuliia_13.jpg
Iuliia_12.jpg
Iuliia_26.jpg
Iuliia_36.jpg
Iuliia_22.jpg
Iuliia_5.jpg
Iuliia_2.jpg
Iuliia_33.jpg
Iuliia_29.jpg
Iuliia_34.jpg
Iuliia_17.jpg
Iuliia_23.jpg
Iuliia_16.jpg
Iuliia_11.jpg
Iuliia_20.jpg
Iuliia_25.jpg
Iuliia_19.jpg
Iuliia_18.j

In [8]:
# Shuffling the lists
temp = list(zip(train, label)) 
random.shuffle(temp) 
train_rnd, label_rnd = zip(*temp) 
train_rnd, label_rnd = list(train_rnd), list(label_rnd)

In [9]:
#Splitting training and test
test_num = 10
testX, testY = train_rnd[-test_num:],label_rnd[-test_num:]
trainX, trainY = train_rnd[:-test_num],label_rnd[:-test_num]

In [10]:
# Fitting an SVM model
model = SVC(kernel='linear', probability=True)
model.fit(trainX, trainY)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

## Testing

In [14]:
model.predict_proba(testX)
#model.predict(testX)

In [15]:
#testY

In [108]:
# Test
import json
import ast
import numpy as np

with open("new1.txt") as json_data:
    test = json.load(json_data)
    
res = ast.literal_eval(test['body']) 
model.predict(np.asarray(res).reshape(1,-1))

In [111]:
model.predict_proba(np.asarray(res).reshape(1,-1))

array([[0.00995952, 0.00784139, 0.98219909]])

In [16]:
#!wget https://upload.wikimedia.org/wikipedia/commons/c/c1/Lionel_Messi_20180626.jpg

In [34]:
mtcnn = MTCNN(select_largest=False, post_process = False, margin = 50)

In [35]:
img_test = cv2.imread("Lionel_Messi_20180626.jpg")
img_test = cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB)
img_test_cropped = mtcnn(img_test)
test_emb = resnet(img_test_cropped.unsqueeze(0))[0]
test_emb = test_emb.detach().numpy()

In [107]:
model.predict_proba(test_emb.reshape(1,-1))

array([[0.20291716, 0.04471621, 0.75236663]])

## Upload the data

In [32]:
import numpy as np
import os
from sklearn import datasets
import pickle as pkl 

# Load data and join it in a dataframe
data = {'data': trainX, "label": trainY}

# Create directory and write csv
os.makedirs('./data', exist_ok=True)
#data.to_csv("./data/data.csv", index = False)

with open('./data/data.pickle', 'wb') as handle:
    pkl.dump(data, handle, protocol=pkl.HIGHEST_PROTOCOL)

In [33]:
with open(os.path.join("./data", "data.pickle"), 'rb') as handle:
    data = pkl.load(handle)

In [91]:
import argparse
import pandas as pd
import os
import pickle as pkl 
import numpy as np
from sklearn.svm import SVC
from sklearn.externals import joblib
import sklearn
from io import StringIO


if __name__ == '__main__':
    
    parser = argparse.ArgumentParser()

    # Hyperparameters are described here. In this simple example we are just including one hyperparameter.
    parser.add_argument('--C', type=float, default= 1)
    parser.add_argument('--kernel', type=str, default='linear')
    parser.add_argument('--gamma', type= str, default='scale')
    parser.add_argument('--probability', type= bool, default= True)

    

    # Sagemaker specific arguments. Defaults are set in the environment variables.
    #parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
    #parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
    #parser.add_argument('--train', type=str, default=os.environ['SM_CHANNEL_TRAIN'])

    args, unknown = parser.parse_known_args()

    # Take the set of files and read them all into a single pandas dataframe
#     with open(os.path.join(args.train, "data.pickle"), 'rb') as handle:
#         data = pkl.load(handle)

    # labels are in the first column
    train_y = data['label']
    train_X = data['data']

    # Now use scikit-learn's NN to train the model.
    model = model = SVC(C = args.C,
                        kernel= args.kernel,
                        gamma = args.gamma,
                        probability= args.probability
                       )



    model = model.fit(train_X, train_y)

    # Print the coefficients of the trained classifier, and save the coefficients
    joblib.dump(model, os.path.join(args.model_dir, "model.joblib"))
    
def model_fn(model_dir):
    """Deserialized and return fitted model

    Note that this should have the same name as the serialized model in the main method
    """
    model = joblib.load(os.path.join(model_dir, "model.joblib"))
    return model


def input_fn(request_body, request_content_type):
    """An input_fn that loads a pickled numpy array"""
    if request_content_type == "application/python-pickle":
        array = np.load(StringIO(request_body))
        return array
    elif request_content_type == "application/x-npy":
        array = np.load(StringIO(request_body))
        return array
    else:
        raise ValueError("The model does not support that content type")


def predict_fn(input_data, model):
    prediction = model.predict(input_data.reshape(1,-1))
    pred_prob = model.predict_proba(input_data.reshape(1,-1))
    return np.array([prediction, pred_prob])



AttributeError: 'Namespace' object has no attribute 'model_dir'

In [56]:
from sagemaker.sklearn.estimator import SKLearn

script_path = './model/model.py'

sklearn = SKLearn(
    entry_point=script_path,
    train_instance_type="ml.m4.xlarge",
    role=role,
    sagemaker_session=sagemaker_session
    #hyperparameters={'max_leaf_nodes': 30}
)

In [57]:
import sagemaker
from sagemaker import get_execution_role


sagemaker_session = sagemaker.Session()
role=sagemaker.get_execution_role()
inputs = sagemaker_session.upload_data(path='data', key_prefix='data/data.pickle', bucket='video-facerecogproj')


In [58]:
sklearn.fit({'train': inputs})

2020-04-27 14:45:47 Starting - Starting the training job...
2020-04-27 14:45:50 Starting - Launching requested ML instances.........
2020-04-27 14:47:29 Starting - Preparing the instances for training......
2020-04-27 14:48:27 Downloading - Downloading input data...
2020-04-27 14:48:59 Training - Downloading the training image.[34m2020-04-27 14:49:20,412 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2020-04-27 14:49:20,415 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-04-27 14:49:20,428 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2020-04-27 14:49:21,015 sagemaker-containers INFO     Module model does not provide a setup.py. [0m
[34mGenerating setup.py[0m
[34m2020-04-27 14:49:21,015 sagemaker-containers INFO     Generating setup.cfg[0m
[34m2020-04-27 14:49:21,015 sagemaker-containers INFO     Generating MANIFEST.in[0m
[34m2020-04-27 14:49:21,01

In [59]:
predictor = sklearn.deploy(initial_instance_count=1,
                           instance_type="ml.m4.xlarge"
                              )

---------------!

In [13]:
import os
import pickle as pkl
with open(os.path.join("data/", "data.pickle"), 'rb') as handle:
    data = pkl.load(handle)

In [16]:
train = data['data']
label = data['label']

In [49]:
print(predictor.predict(train[0]))

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from model with message "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/sagemaker-scikit-learn-2020-04-27-14-14-44-763 in account 706015522303 for more information.

In [26]:
type(train)

list

In [20]:
print(label[:10])

['Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin', 'Joaquin']


In [62]:
sklearn.delete_endpoint()

In [35]:
import io
import numpy as np
def np2csv(arr):
    csv = io.BytesIO()
    np.savetxt(csv, arr, delimiter=',', fmt='%g')
    return csv.getvalue().decode().rstrip()

In [54]:
csv = np2csv(testX[0].reshape((1, -1)))

In [55]:
csv

'-0.0276967,-0.00947405,-0.0437801,-0.00265858,0.0659367,-0.0583536,0.0515616,-0.0571609,0.0152766,-0.0283148,0.0513925,-0.0626204,0.0454505,-0.0574892,-0.051499,-0.0289718,-0.0649302,0.052929,0.0365728,0.0483517,-0.000967004,1.59277e-05,0.0607418,0.0108548,0.0150728,-0.0374967,0.0137491,-0.133851,-0.0824334,0.0435455,0.0533719,0.0382914,-0.0632364,-0.0352004,0.08106,0.0039423,0.118406,-0.052887,-0.00549808,0.00700636,-0.0702291,0.037005,-0.0142568,-0.00580784,0.000908085,0.00844869,-0.0418347,0.0548378,-0.0324489,0.0117128,0.0233124,-0.100277,-0.0826881,0.082128,-0.022932,-0.0436458,0.000652619,0.00654165,-0.0482634,-0.00501404,0.0456865,0.047958,0.0145354,-0.0282688,0.00468444,-0.0396894,0.00947171,-0.0296179,0.0590882,0.0229156,0.0203455,-0.00783025,-0.0115986,-0.000327578,1.4225e-07,-0.0583341,0.041384,-0.00976306,0.0501623,0.027557,0.0223692,0.0727697,0.0429439,-0.0354119,-0.00747092,0.0426174,0.032778,0.012745,0.0534812,0.00144818,0.0154812,0.0304081,0.0257239,-0.00314719,0.01125

In [46]:
testX[0].shape

(512,)

In [111]:
# Test
import json
import ast
import numpy as np

with open("new1.txt") as json_data:
    test = json.load(json_data)

In [31]:
import json 

test = list(testX[0])
to_json= json.dumps(test)

TypeError: Object of type 'float32' is not JSON serializable

In [50]:
data = testX[0]
#payload = json.dumps(data.tolist())

In [20]:
type(data)

numpy.ndarray

In [117]:
from six import BytesIO
test = BytesIO(data)

In [134]:
data.shape

(512,)

In [18]:
# from sagemaker.sklearn.model import SKLearnPredictor

# predictor = SKLearnPredictor("sagemaker-scikit-learn-2020-04-27-05-21-00-478", sagemaker_session=None)
# result = predictor.predict(data)

In [61]:
csv

'-0.0276967,-0.00947405,-0.0437801,-0.00265858,0.0659367,-0.0583536,0.0515616,-0.0571609,0.0152766,-0.0283148,0.0513925,-0.0626204,0.0454505,-0.0574892,-0.051499,-0.0289718,-0.0649302,0.052929,0.0365728,0.0483517,-0.000967004,1.59277e-05,0.0607418,0.0108548,0.0150728,-0.0374967,0.0137491,-0.133851,-0.0824334,0.0435455,0.0533719,0.0382914,-0.0632364,-0.0352004,0.08106,0.0039423,0.118406,-0.052887,-0.00549808,0.00700636,-0.0702291,0.037005,-0.0142568,-0.00580784,0.000908085,0.00844869,-0.0418347,0.0548378,-0.0324489,0.0117128,0.0233124,-0.100277,-0.0826881,0.082128,-0.022932,-0.0436458,0.000652619,0.00654165,-0.0482634,-0.00501404,0.0456865,0.047958,0.0145354,-0.0282688,0.00468444,-0.0396894,0.00947171,-0.0296179,0.0590882,0.0229156,0.0203455,-0.00783025,-0.0115986,-0.000327578,1.4225e-07,-0.0583341,0.041384,-0.00976306,0.0501623,0.027557,0.0223692,0.0727697,0.0429439,-0.0354119,-0.00747092,0.0426174,0.032778,0.012745,0.0534812,0.00144818,0.0154812,0.0304081,0.0257239,-0.00314719,0.01125

In [60]:
import boto3

runtime= boto3.client('runtime.sagemaker')

#payload = np2csv(test_X)
response = runtime.invoke_endpoint(EndpointName="sagemaker-scikit-learn-2020-04-27-14-45-47-186",
                                   ContentType= 'text/csv',
                                    Body=csv)
#result = json.loads(response['Body'].read().decod


ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (500) from model with message "<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<title>500 Internal Server Error</title>
<h1>Internal Server Error</h1>
<p>The server encountered an internal error and was unable to complete your request. Either the server is overloaded or there is an error in the application.</p>
". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/sagemaker-scikit-learn-2020-04-27-14-45-47-186 in account 706015522303 for more information.

In [105]:
test

<_io.BytesIO at 0x7f4569cf2518>

In [None]:
from sagemaker.tensorflow import TensorFlowPredictor

predictor = TensorFlowPredictor('myexistingendpoint')
result = predictor.predict(['my request body'])