<a href="https://colab.research.google.com/github/TLazarevic/BazePodataka2Projekat/blob/master/ColabManual.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from collections import defaultdict
from glob import glob
from random import choice, sample

import cv2
import numpy as np
import pandas as pd
import sklearn
from sklearn import svm, metrics
import pickle
from imutils import face_utils
import dlib
import matplotlib.pyplot  as plt
from sklearn.neighbors import KNeighborsClassifier

# ------------------------------- DATA --------------------------------

val_families_list = ["F09", "F04", "F08", "F06", "F02"]


def get_train_val(family_name):
    train_file_path = "drive/My Drive/train_relationships.csv"
    train_folders_path = "drive/My Drive/train/"
    # train_file_path = "dataset/train_pairs_new.xlsx"
    # train_folders_path = "dataset/train_new/"
    val_families = family_name

    all_images = glob(train_folders_path + "*/*/*.jpg")
    all_images = [x.replace('\\', '/') for x in all_images]
    train_images = [x for x in all_images if val_families not in x]
    val_images = [x for x in all_images if val_families in x]

    train_person_to_images_map = defaultdict(list)

    ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]

    for x in train_images:
        train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

    val_person_to_images_map = defaultdict(list)

    for x in val_images:
        val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)
    #     relationships = pd.read_excel(train_file_path)
    # relationships = pd.read_excel(train_file_path)
    relationships = pd.read_csv(train_file_path)
    relationships = list(zip(relationships.p1.values, relationships.p2.values))
    relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]

    train = [x for x in relationships if val_families not in x[0]]
    val = [x for x in relationships if val_families in x[0]]
    return train, val, train_person_to_images_map, val_person_to_images_map

def euclidianDistance(a,b):
    return np.math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)

def angle(a,b,c):
    angle = np.math.degrees(np.math.atan2(c[1] - b[1], c[0] - b[0]) - np.math.atan2(a[1] - b[1], a[0] - b[0]))
    return angle + 360 if angle < 0 else angle

def extractFeatures(dots, img, imgcolor):

    features=[]

    #skaliranje svih tacaka u odnosu na sirinu lica:
    scaleFactor=dots[15][0]-dots[1][0]

    # distances

    features.append(euclidianDistance(dots[39],dots[27])) # eye2nose distance
    # features.append(euclidianDistance(dots[40],dots[48])) # eye2mouth distance
    # features.append(euclidianDistance(dots[39],dots[42])) # eye distance
    features.append(euclidianDistance(dots[33],dots[51])) # nose-to-mouth distance

    for f in features:
        f = f/scaleFactor

    # colors

    mouth_left = dots[48]
    mouth_right = dots[54]
    mouth_up = dots[50]
    mouth_down = dots[57]
    features.append(img[dots[29][0],dots[29][1]]) #skin color taken from nose center point

    right_eye1 = dots[37]
    right_eye2 = dots[38]
    right_eye3 = dots[41]
    right_eye4 = dots[40]
    right_eye = imgcolor[right_eye2[1]:right_eye4[1],right_eye1[0]:right_eye2[0]]
    xCenter = int(right_eye1[0] + (right_eye2[0] - right_eye1[0])/2)
    yCenter = int(right_eye2[1] + (right_eye4[1] - right_eye2[1])/2)
    right_eye_color = imgcolor[xCenter,yCenter,:]
    features.append(right_eye_color[0])
    features.append(right_eye_color[1])
    features.append(right_eye_color[2])

    #right_eye_grayValue = img[right_eye2[1]:right_eye4[1],right_eye1[0]:right_eye2[0]].mean()
    right_eye_grayValue = img[xCenter,yCenter]
    features.append(right_eye_grayValue)

    # cv2.rectangle(imgcolor, (right_eye1[0], right_eye2[1]), (right_eye2[0], right_eye4[1]), (0, 255, 0), 2)

    # imgplot = plt.imshow(imgcolor)
    # imgplot2 = plt.imshow(right_eye)
    # plt.show()

    norm = np.linalg.norm(features)
    normal_array = features/norm
    return normal_array

def read_img(path, detector, predictor):
    #print(path)
    # img = image.load_img(path, target_size=(224, 224))
    img = cv2.imread(path, 0)
    imgcolor = cv2.imread(path)
    
    rects = detector(img, 1)
    for (i, rect) in enumerate(rects):
      shape = predictor(img, rect)
      shape = face_utils.shape_to_np(shape)
      features = extractFeatures(shape, img, imgcolor)
      return np.array(features)  
      break

def gen(list_tuples, person_to_images_map, batch_size=16):
    ppl = list(person_to_images_map.keys())
    while True:
        batch_tuples = sample(list_tuples, batch_size // 2)
        labels = [1] * len(batch_tuples)
        while len(batch_tuples) < batch_size:
            p1 = choice(ppl)
            p2 = choice(ppl)

            if p1 != p2 and (p1, p2) not in list_tuples and (p2, p1) not in list_tuples:
                batch_tuples.append((p1, p2))
                labels.append(0)

        for x in batch_tuples:
            if not len(person_to_images_map[x[0]]):
                print(x[0])
        detector = dlib.get_frontal_face_detector()
        predictor = dlib.shape_predictor('drive/My Drive/shape_predictor_68_face_landmarks.dat')

        X1 = [choice(person_to_images_map[x[0]]) for x in batch_tuples]
        X1 = np.array([read_img(x, detector, predictor) for x in X1])

        X2 = [choice(person_to_images_map[x[1]]) for x in batch_tuples]
        X2 = np.array([read_img(x, detector, predictor) for x in X2])

        distances = []
        labelscpy = labels
        for i in range(len(labels) - 1, -1, -1):
          try:
              #print(sklearn.metrics.pairwise.cosine_similarity(X1[i].reshape(1, -1), X2[i].reshape(1, -1))[0])
              #print(labels[i])
              #distances.append(sklearn.metrics.pairwise.cosine_similarity(X1[i].reshape(1, -1), X2[i].reshape(1, -1))[0])
              distances.append(np.absolute(np.array(X1[i]) - np.array(X2[i])))
          except:
            del labels [i]
        return distances, labels


def computeHOG(image):
    winSize = (64, 64)
    blockSize = (16, 16)
    blockStride = (8, 8)
    cellSize = (8, 8)
    nbins = 9
    derivAperture = 1
    winSigma = 4.
    histogramNormType = 0
    L2HysThreshold = 2.0000000000000001e-01
    gammaCorrection = 0
    nlevels = 64
    hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins, derivAperture, winSigma,
                            histogramNormType, L2HysThreshold, gammaCorrection, nlevels)
    # compute(img[, winStride[, padding[, locations]]]) -> descriptors
    winStride = (8, 8)
    padding = (8, 8)
    locations = ((10, 20),)
    hist = hog.compute(image, winStride, padding, locations)
    return hist


# ------------------------------- MODEL --------------------------------

model = sklearn.neighbors.KNeighborsClassifier()

#for i in range(val_families_list.__len__()):
train, val, train_person_to_images_map, val_person_to_images_map = get_train_val(val_families_list[0])
data = gen(train, train_person_to_images_map, batch_size=2000)
val_data = gen(val, val_person_to_images_map, batch_size=200)
print(len(data[0]))
print(len(val_data[0]))
model.fit(data[0], data[1])
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))
y_pred = model.predict(val_data[0])
print("Accuracy:", metrics.accuracy_score(val_data[1], y_pred))

# print(all_images.__len__())
# # history = model.fit(gen(train, train_person_to_images_map, batch_size=16))
#
# n_samples = all_images.__len__()
# cv = ShuffleSplit(n_splits=5, test_size=0.1, random_state=0)
# cross_val_score(model, all_images, all_person_to_images_map, cv=cv)


1872
191
Accuracy: 0.49214659685863876


In [None]:
!pip install dlib

