# PROJEKAT

In [1]:
from deepface import DeepFace
import numpy as np
import cv2
import matplotlib.pyplot as plt
import dlib
import os
import csv
from sklearn.svm import SVC

In [2]:
def loadImageAnd2Gray(path):
    img_haar = cv2.imread(path)
    return cv2.cvtColor(img_haar, cv2.COLOR_BGR2GRAY)

In [3]:
def display_gray_image(image):
    plt.imshow(image, 'gray')
    
def display_image(image):
    plt.imshow(image)

In [4]:
def draw_coords(image, coords):
    cv2.drawContours(image, [coords], -1, (255,255,255), 3)

In [5]:
def to_np_array(pose_landmarks, el_num=68, dtype="int"):
    coords = np.zeros((el_num, 2), dtype=dtype)
    
    for i in range(0, el_num):
        coords[i] = (pose_landmarks.part(i).x, pose_landmarks.part(i).y)
    
    return coords

In [6]:
def draw_circle(image, coords):
    cv2.circle(image, (coords[0], coords[1]), 25, (255, 255, 255), 2)

In [7]:
def show_faces_with_contures(path):
    face_cascade_classifier = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
    
    img = loadImageAnd2Gray(path)

    faces = face_cascade_classifier.detectMultiScale(img, scaleFactor=1.3, minNeighbors=4, minSize=(30, 30), 
                                              flags=cv2.CASCADE_SCALE_IMAGE)

    for (x, y , w ,h) in faces:
        face_rect = dlib.rectangle(x, y, x + w, y + h)
        aligned_face = align_face(img, face_rect)

    display_gray_image(img)

# Korisne stvari
cv2.rectangle(img, (x,y), (x+w, y+h), (0, 0 , 255), 3) -> iscrtavanje faca na ulaznoj slici

# Pronalazenje 68 tacaka i cetriranje

In [8]:
predictor_model = "shape_predictor_68_face_landmarks.dat"
face_pose_predictor = dlib.shape_predictor(predictor_model)
DESIRED_LEFT_EYE = (0.35, 0.35)
DESIRED_RIGHT_EYE = (0.65, 0.35)
DESIRED_FACE_WIDTH = 110
DESIRED_FACE_HEIGHT = 110

In [9]:
def align_face(img, face_rect, verbose=False):

    pose_landmarks = face_pose_predictor(img, face_rect)
    landmark_coords = to_np_array(pose_landmarks)

    left_eye_coords = landmark_coords[36:41]
    right_eye_coords = landmark_coords[42:47]
    
    if verbose:
        draw_coords(img, landmark_coords)
        draw_coords(img, left_eye_coords)
        draw_coords(img, right_eye_coords)

    left_eye_center = left_eye_coords.mean(axis=0).astype("int")
    right_eye_center = right_eye_coords.mean(axis=0).astype("int")

    dx = right_eye_center[1] - left_eye_center[1]
    dy = right_eye_center[0] - left_eye_center[0]
    angle = np.degrees(np.arctan2(dx, dy)) # - 180

    dist = np.sqrt((dx * dx) + (dy * dy))
    desiredDist = (DESIRED_RIGHT_EYE[0] - DESIRED_LEFT_EYE[0])
    desiredDist *= DESIRED_FACE_WIDTH
    scale = desiredDist / dist

    eyes_center = (int((left_eye_center[0] + right_eye_center[0]) // 2), int((left_eye_center[1] + right_eye_center[1]) // 2))
    M = cv2.getRotationMatrix2D(eyes_center, angle, scale)
    tx = DESIRED_FACE_WIDTH * 0.5
    ty = DESIRED_FACE_HEIGHT * DESIRED_LEFT_EYE[1]
    M[0, 2] += (tx - eyes_center[0])
    M[1, 2] += (ty - eyes_center[1])

    (w, h) = (DESIRED_FACE_WIDTH, DESIRED_FACE_HEIGHT)
    aligned_face = cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_CUBIC)

    return aligned_face

# Ekstrakcija obelezja hog-om

In [10]:
IMAGE_ROWS = 110
IMAGE_COLS = 110
NBINS = 9             # broj binova
CELL_SIZE = (8, 8)    # broj piksela po celiji
BLOCK_SIZE = (3, 3)   # broj celija po bloku

In [11]:
hog = cv2.HOGDescriptor(_winSize=(IMAGE_COLS // CELL_SIZE[1] * CELL_SIZE[1], 
                                  IMAGE_ROWS // CELL_SIZE[0] * CELL_SIZE[0]),
                        _blockSize=(BLOCK_SIZE[1] * CELL_SIZE[1],
                                    BLOCK_SIZE[0] * CELL_SIZE[0]),
                        _blockStride=(CELL_SIZE[1], CELL_SIZE[0]),
                        _cellSize=(CELL_SIZE[1], CELL_SIZE[0]),
                        _nbins=NBINS)

In [12]:
def extract_hog_features(hog, aligned_image):
    return hog.compute(aligned_image)

# Detekcija face sa haar-om

In [13]:
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

In [14]:
def detect_faces_haar(img, scaleFactor_=1.3, minNeighbors_=4, minSize_=(100, 100), flags_=cv2.CASCADE_SCALE_IMAGE):
    face_rects = []
    
    faces = face_cascade.detectMultiScale(img, scaleFactor=scaleFactor_, minNeighbors=minNeighbors_, minSize=minSize_, 
                                          flags=flags_)
    for (x, y , w ,h) in faces:
        face_rect = dlib.rectangle(x, y, x + w, y + h)
        face_rects.append(face_rect)
    
    return face_rects

# Detekcija face sa hog-om

In [15]:
face_detector = dlib.get_frontal_face_detector()

In [16]:
def detect_faces_hog(img):
    faces = face_detector(img, 1)
    return faces

# Ekstrakcija obelezja

In [17]:
def extract_faces(faces, img):
    extracted_faces_features = []
    
    for face_rect in faces:   
        aligned_face = align_face(img, face_rect)

        extracted_face_features = extract_hog_features(hog, aligned_face)
        extracted_faces_features.append(extracted_face_features.flatten())
    return extracted_faces_features

# Treniranje modela

In [18]:
image_dir = "Train"

x_train = []
y_train = []

current_id = 0
label_ids = {}

for root, dirs, files in os.walk(image_dir):
    for file in files:
        path = os.path.join(root, file)
        underscore_idx = os.path.basename(path).find('_')
        label = os.path.basename(path)[ : underscore_idx]
        
        if not label in label_ids.values():
            label_ids[current_id] = label
            current_id += 1
        id_ = label_ids[current_id-1]
        
        image = loadImageAnd2Gray(path)
        faces = detect_faces_haar(image)
        
        if(len(faces)) != 1:
            continue
        
        hoged = extract_faces(faces, image)
        x_train.append(hoged[0])
        y_train.append(id_)

svm_model_linear = SVC(kernel = 'linear', C = 1)
svm_model_linear.probability = True
svm_model_linear.fit(x_train, y_train)

# print(svm_model_linear.get_params())

SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

# Klasifikatori

In [19]:
MODEL_MEAN_VALUES=(78.4263377603, 87.7689143744, 114.895847746)

def create_blob(crop_img):
    return cv2.dnn.blobFromImage(crop_img, 1.0, (227,227), MODEL_MEAN_VALUES, swapRB=False)

# Klasifikator pola

In [20]:
genderProto="Classifiers/gender_deploy.prototxt"
genderModel="Classifiers/gender_net.caffemodel"

genderList=['Male','Female']

genderNet=cv2.dnn.readNet(genderModel,genderProto)

In [21]:
def predict_gender(img_colored, x, y, w, h):
        
    crop_img = img_colored[y:y+h, x:x+w]
    
    blob=create_blob(crop_img)
    
    genderNet.setInput(blob)
    genderPreds=genderNet.forward()
    gender=genderList[genderPreds[0].argmax()]
    
    return gender

# Klasifikator starosti

In [22]:
ageProto="Classifiers/age_deploy.prototxt"
ageModel="Classifiers/age_net.caffemodel"

ageList=['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']

ageNet=cv2.dnn.readNet(ageModel,ageProto)

In [23]:
def predict_age(img_colored, x, y, w, h):
        
    crop_img = img_colored[y:y+h, x:x+w]
    
    blob=create_blob(crop_img)
    
    ageNet.setInput(blob)
    agePreds=ageNet.forward()
    age=ageList[agePreds[0].argmax()]
    
    return age

# Klasifikator za rasu

In [24]:
def predict_ethnicity(img_colored, x, y, w, h):
    crop_img = img_colored[y:y+h, x:x+w]
    try:
        result = DeepFace.analyze(crop_img, actions=['race'])
    except ValueError:
        result = {'dominant_race':"Unknown"}
    return result

# Main 

In [26]:
video_path = "Videos/Scarlett_Downey_video2.mp4"

cap = cv2.VideoCapture(video_path)
last_frame_num = cap.get(cv2.CAP_PROP_FRAME_COUNT)

actors = {}

fps_trash_hold = 30
current_frame = 0

while True:

    ret, frame = cap.read()
    gray  = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame_colored = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    faces = detect_faces_haar(gray)
    
    for face_rect in faces:        
        w = face_rect.width()
        h = face_rect.height()
        x = face_rect.left()
        y = face_rect.top()
        coords = np.array([[x, y], [x + w, y], [x + w, y + h], [x, y + h]])
        draw_coords(frame, coords)
        
        hoged = extract_faces([face_rect], gray)
        
        svm_predictions = svm_model_linear.predict_proba(np.array(hoged))
        max_idx = np.argmax(svm_predictions)
        max_value = svm_predictions[0][max_idx]
        
        if max_value >= 0.6:
            font = cv2.FONT_HERSHEY_SIMPLEX
            name = label_ids[max_idx]
            name = name.replace('-', ' ')
            if name not in actors:
                actors[name] = ["", "", ""] 
            if current_frame == 3 or current_frame%fps_trash_hold == 0:
                actors[name][0] = predict_gender(frame_colored, x, y, w, h)
                actors[name][1] = predict_age(frame_colored, x, y, w, h)
                ethinicity_result = predict_ethnicity(frame_colored, x, y, w, h)
                actors[name][2] = ethinicity_result['dominant_race']
            color = (255, 255, 255)
            stroke = 2
            text1 = "{}".format(name)
            text2 = "{} {} {}".format(actors[name][0], actors[name][1], actors[name][2])
            cv2.putText(frame, text1, (x,y - 40), font, 0.5, color, stroke, cv2.LINE_AA)
            cv2.putText(frame, text2, (x,y - 20), font, 0.5, color, stroke, cv2.LINE_AA)
            
    cv2.imshow('frame',frame)
    if last_frame_num == current_frame:
        break
    current_frame += 1
    if cv2.waitKey(20) & 0xFF == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

error: OpenCV(4.5.5) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


# Rename script

In [27]:
def rename_pictures(image_dir, name, begin_idx = 1):
    name = name + '_'
    jpeg = ".jpeg"
    i = begin_idx
    for filename in os.listdir(image_dir):
        file_to_rename_path = image_dir + "/" + filename
        new_name = image_dir + "/" + name + str(i) + jpeg
        os.rename(file_to_rename_path, new_name)
        i += 1

In [28]:
rename_pictures("Videos/Boseman", "Chadwick-Boseman")

# Convert video to images

In [3]:
def video_to_images(video_path, dest_dir):
    last_frame_num = None
    cap = cv2.VideoCapture(video_path)
    image_name = video_path[video_path.find('/')+1:video_path.find('.')-1]
    print(image_name)
    i = 1
    last_frame_num = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    print(last_frame_num)
    while True:
        ret, frame = cap.read()

        cv2.imwrite(dest_dir +"/"+ image_name + "_" + str(i) +".jpeg", frame)
        if last_frame_num == i:
            break
        i += 1
    cap.release()
    #video_to_images("Videos/Tom-Holland-video1_Trimmed2.mp4", "Videos/Frames")

In [4]:
video_to_images("Videos/Scarlett_Downey_video2.mp4", "Videos/Frames")

Scarlett_Downey_video
423.0


# Evaluacija

In [25]:
def load_csv(csv_path, full_evaluation=False):
    frames = {}
    
    with open(csv_path, mode ='r') as file:
        
        csvFile = csv.reader(file)

        for line in csvFile:
            img_name = line[5]
            label = line[0]
            underscore_idx = img_name.rfind('_')
            dot_idx = img_name.rfind('.')
            id = eval(img_name[underscore_idx + 1 : dot_idx])
            
            if id not in frames:
                frames[id] = {}
            if full_evaluation:
                frames[id][label] = [line[1], line[2], line[3], line[4], line[8], line[9], line[10]]
            else:
                frames[id][label] = [line[1], line[2], line[3], line[4]]   
            
    return frames

In [26]:
def bb_intersection_over_union(boxA, boxB):
    xA = max(boxA[0][0], boxB[0][0])
    yA = max(boxA[1][1], boxB[1][1])
    xB = min(boxA[2][0], boxB[2][0])
    yB = min(boxA[3][1], boxB[3][1])

    interArea = (xB - xA) * (yB - yA)

    boxAArea = (boxA[2][0] - boxA[0][0]) * (boxA[3][1] - boxA[1][1])
    boxBArea = (boxB[2][0] - boxB[0][0]) * (boxB[3][1] - boxB[1][1])

    iou = interArea / float(boxAArea + boxBArea - interArea)

    return iou

In [27]:
def display_evaluation_results(tp, tn, fp, fn):
    print("True positive: {}".format(tp))
    print("True negative: {}".format(tn))
    print("False positive: {}".format(fp))
    print("False negative: {}".format(fn))
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f_score = 2 * precision * recall / (precision + recall)
    
    print("Precision: {}".format(precision))
    print("Recall: {}".format(recall))
    print("F score: {}".format(f_score))
    
    return precision, recall, f_score

In [40]:
def full_evaluation_condition(annotated_values, frame_colored, x, y, w, h, full_evaluation=False):
    if not full_evaluation: 
        return True
    
    gender = predict_gender(frame_colored, x, y, w, h)
    age = predict_age(frame_colored, x, y, w, h)
    ethnicity_result = predict_ethnicity(frame_colored, x, y, w, h)
    ethnicity = ethnicity_result['dominant_race']
    
    return annotated_values[4] == gender and annotated_values[5].contains(age) and annotated_values[6].contains(ethnicity)

In [43]:
def evaluate(csv_path, video_path, classification_prob = 0.6, detetion_prob = 0.7, full_evaluation=False):
    
    frames = load_csv(csv_path)
    
    cap = cv2.VideoCapture(video_path)
    last_frame_num = cap.get(cv2.CAP_PROP_FRAME_COUNT)

    i = 1 # current_frame_id
    
    tp = 0
    tn = 0
    fp = 0
    fn = 0
    
    while True:        
        ret, frame = cap.read()
        
        gray  = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frame_colored = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces = detect_faces_haar(gray)

        if i not in frames and len(faces) == 0: # nema labeliranih i nema detektovanih lica
            tn += 1
            i += 1
            continue
        if i in frames and len(faces) == 0: # ima labeliranih ali nema detektovanih lica
            fn += len(frames[i].keys()) # sva lica koja nije nasao
            i += 1
            continue
        if i not in frames and len(faces) > 0: # nema labeliranih ali ima detektovanih
            fp += len(faces) # sva lica koja je nasao
            i += 1
            continue
            
        # postoji bar jedno detektovano lice i  postoji bar jedno labelirano lice
        for face_rect in faces:        
            w1 = face_rect.width()
            h1 = face_rect.height()
            x1 = face_rect.left()
            y1 = face_rect.top()
            
            hoged = extract_faces([face_rect], gray)

            svm_predictions = svm_model_linear.predict_proba(np.array(hoged))
            max_idx = np.argmax(svm_predictions)
            max_value = svm_predictions[0][max_idx]
 
            if max_value >= classification_prob:                # Nas klasifikator je nasao nesto
                name = label_ids[max_idx]
                if name in frames[i]:                   # Nas klasifikator je nasao nesto sto je anotirano
                    x2 = eval(frames[i][name][0])
                    y2 = eval(frames[i][name][1])
                    w2 = eval(frames[i][name][2])
                    h2 = eval(frames[i][name][3])
                    iou = bb_intersection_over_union([(x1, y1), (x1 + w1, y1), (x1 + w1, y1 + h1), (x1, y1 + h1)],
                                                     [(x2, y2), (x2 + w2, y2), (x2 + w2, y2 + h2), (x2, y2 + h2)])
                    
                    if iou > detetion_prob and full_evaluation_condition(
                            frames[i][name], frame_colored, x1, y1, w1, h1, full_evaluation):                         
                                                                
                        tp += 1 # postoji detektovano lice, labelirano je i jeste zadovoljio uslov
                    else:                                      
                        fp += 1 # postoji detektovano lice i labelirano je, ali nije zadovoljio uslov
                else:
                    fp += 1 # postoji detektovano lice ali ono nije labelirano
            
            else:
                fn += 1 # postoji labelirano lice ali klasifikator nije siguran sta je
                
        if last_frame_num == i:
            break
        
        i += 1
    
    cap.release()
    display_evaluation_results(tp, tn, fp, fn)

In [44]:
evaluate("Labels/base-evaluation/dinner_labels.csv", "Videos/test/Dinner_scene_Trimmed_37.mp4")
print("-"*100)
evaluate("Labels/full-evaluation/dinner_labels_refactored.csv", "Videos/test/Dinner_scene_Trimmed_37.mp4", 0.6, 0.7, True)

True positive: 590
True negative: 97
False positive: 149
False negative: 278
Precision: 0.7983761840324763
Recall: 0.6797235023041475
F score: 0.7342874922215308
----------------------------------------------------------------------------------------------------


IndexError: list index out of range

# Refactoring labels.csv

In [24]:
def list_to_string(line):
    line_string = ""
    for el in line: 
        line_string += el + ','
    return line_string[:-1]

def refactor_labels_csv(csv_path, dest_path, actors):
    dest_file_path = dest_path + '/' + csv_path[csv_path.rfind('/'): csv_path.find('.')] + "_refactored.csv"
    with open(csv_path, mode ='r') as file1:
        with open(dest_file_path, mode ='w') as file2:
            csvFile = csv.reader(file1)
            for line in csvFile:
                if line[0] in actors:
                    line.append(actors[line[0]][0])
                    line.append(actors[line[0]][1])
                    line.append(actors[line[0]][2])
                    file2.write(list_to_string(line) +'\n')
            

In [34]:
#ageList=['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']
actors = {
#         'Tom-Holland': ['Male', '(15-20)', 'white'],
        'Scarlett-Johansson': ['Female', '(25-32)', 'white'],
        'Robert-Downey-Junior': ['Male', '(38-43)|(48-53)', 'white|latino hispanic'],
#         'Benedict-Cumberbatch' :['Male', '(38-43)', 'white']
    }
# actors = {
#         'Emma-Watson': ['Female', '(15-20)|(25-32)', 'white']
#     }
refactor_labels_csv('Labels/base-evaluation/scarlett_downey_video2_labels.csv', "Labels/full-evaluation", actors)