# 1. 이미지 전처리
## 1.1. 정면 얼굴 찾기

In [23]:
# -*- coding:utf-8 -*-

In [None]:
# vision api로 정면 얼굴 찾기
from googleapiclient import  discovery
from oauth2client.client  import GoogleCredentials
import sys
import io
import base64
from PIL import Image
from PIL import ImageDraw
from genericpath import isfile
import os
import hashlib
from oauth2client.service_account import ServiceAccountCredentials


NUM_THREADS = 70000
MAX_FACE = 2
MAX_LABEL = 2
IMAGE_SIZE = 156,156
MAX_ROLL = 15
MAX_TILT = 15
MAX_PAN = 15

# index to transfrom image string label to number
global_label_index = 0 
global_label_number = [0 for x in range(13)]
global_image_hash = []

class FaceDetector():
    def __init__(self):
        # initialize library
        #credentials = GoogleCredentials.get_application_default()
        scopes = ['https://www.googleapis.com/auth/cloud-platform']
        credentials = ServiceAccountCredentials.from_json_keyfile_name(
                        './still-sensor-325313-e544dd4f101f.json', scopes=scopes)
        self.service = discovery.build('vision', 'v1', credentials=credentials)
        #print ("Getting vision API client : %s" ,self.service)

    #def extract_face(selfself,image_file,output_file):
    def skew_angle(self):
        return None
    
    def detect_face(self,image_file):
        try:
            with io.open(image_file,'rb') as fd:
                image = fd.read()
                batch_request = [{
                        'image':{
                            'content':base64.b64encode(image).decode('utf-8')
                            },
                        'features':[
                            {
                            'type':'FACE_DETECTION',
                            'maxResults':MAX_FACE,
                            },
                            {
                            'type':'LABEL_DETECTION',
                            'maxResults':MAX_LABEL,
                            }
                                    ]
                        }]
                fd.close()
        
            request = self.service.images().annotate(body={
                            'requests':batch_request, })
            response = request.execute()
            if 'faceAnnotations' not in response['responses'][0]:
                print('[Error] %s: Cannot find face ' % image_file)
                return None
                
            face = response['responses'][0]['faceAnnotations']
            label = response['responses'][0]['labelAnnotations']
            
            if len(face) > 1 :
                print('[Error] %s: It has more than 2 faces in a file' % image_file)
                return None
            
            roll_angle = face[0]['rollAngle']
            pan_angle = face[0]['panAngle']
            tilt_angle = face[0]['tiltAngle']
            angle = [roll_angle,pan_angle,tilt_angle]
            
            # check angle
            # if face skew angle is greater than > 20, it will skip the data
            if abs(roll_angle) > MAX_ROLL or abs(pan_angle) > MAX_PAN or abs(tilt_angle) > MAX_TILT:
                print('[Error] %s: face skew angle is big' % image_file)
                return None
            
            # check sunglasses
            for l in label:
                if 'sunglasses' in l['description']:
                    print('[Error] %s: sunglass is detected' % image_file)  
                    return None
            
            box = face[0]['fdBoundingPoly']['vertices']
            left = box[0]['x']
            top = box[1]['y']
                
            right = box[2]['x']
            bottom = box[2]['y']
                
            rect = [left,top,right,bottom]
                
            print("[Info] %s: Find face from in position %s and skew angle %s" % (image_file,rect,angle))
            return rect
        except Exception as e:
            print('[Error] %s: cannot process file : %s' %(image_file,str(e)) )
            
    def rect_face(self,image_file,rect,outputfile):
        try:
            fd = io.open(image_file,'rb')
            image = Image.open(fd)
            draw = ImageDraw.Draw(image)
            draw.rectangle(rect,fill=None,outline="green")
            image.save(outputfile)
            fd.close()
            print('[Info] %s: Mark face with Rect %s and write it to file : %s' %(image_file,rect,outputfile) )
        except Exception as e:
            print('[Error] %s: Rect image writing error : %s' %(image_file,str(e)) )
        
    def crop_face(self,image_file,rect,outputfile):
        
        global global_image_hash
        try:
            fd = io.open(image_file,'rb')
            image = Image.open(fd)  

            # extract hash from image to check duplicated image
            m = hashlib.md5()
            with io.BytesIO() as memf:
                image.save(memf, 'PNG')
                data = memf.getvalue()
                m.update(data)
            image_hash = m.hexdigest()
            
            if image_hash in global_image_hash:
                print('[Error] %s: Duplicated image' %(image_file) )
                return None
            global_image_hash.append(image_hash)

            crop = image.crop(rect)
            im = crop.resize(IMAGE_SIZE,Image.ANTIALIAS)
            
            
            im.save(outputfile,"JPEG")
            fd.close()
            print('[Info]  %s: Crop face %s and write it to file : %s' %( image_file,rect,outputfile) )
            return True
        except Exception as e:
            print('[Error] %s: Crop image writing error : %s' %(image_file,str(e)) )
        
    def getfiles(self,src_dir):
        files = []
        for f in os.listdir(src_dir):
            if isfile(os.path.join(src_dir,f)):
                if not f.startswith('.'):
                    files.append(os.path.join(src_dir,f))

        return files
    
    # read files in src_dir and generate image that rectangle in face and write into files in des_dir
    def rect_faces_dir(self,src_dir,des_dir):
        if not os.path.exists(des_dir):
            os.makedirs(des_dir)
            
        files = self.getfiles(src_dir)
        for f in files:
            des_file = os.path.join(des_dir,os.path.basename(f))
            rect = self.detect_face(f)
            if rect != None:
                self.rect_face(f, rect, des_file)
    
    # read files in src_dir and crop face only and write it into des_dir
    def crop_faces_dir(self,src_dir,des_dir,maxnum):
        
        # training data will be written in $des_dir/training
        # validation data will be written in $des_dir/validate
        
        des_dir_training = os.path.join(des_dir,'training')
        des_dir_validate = os.path.join(des_dir,'validate')
        
        if not os.path.exists(des_dir):
            os.makedirs(des_dir)
        if not os.path.exists(des_dir_training):
            os.makedirs(des_dir_training)
        if not os.path.exists(des_dir_validate):
            os.makedirs(des_dir_validate)
        
        path,folder_name = os.path.split(src_dir)
        label = folder_name
        
        # create label file. it will contains file location 
        # and label for each file
        training_file = open(des_dir+'/training_file.txt','a')
        validate_file = open(des_dir+'/validate_file.txt','a')
        
        files = self.getfiles(src_dir)
        global global_label_index
        cnt = 0 
        num = 0 # number of training data
        for f in files:
            rect = self.detect_face(f)

            # replace ',' in file name to '.'
            # because ',' is used for deliminator of image file name and its label
            des_file_name = os.path.basename(f)
            des_file_name = des_file_name.replace(',','_')
            
            if rect != None:
                # 70% of file will be stored in training data directory
                if(cnt < 8):
                    des_file = os.path.join(des_dir_training,des_file_name)
                    # if we already have duplicated image, crop_face will return None
                    if self.crop_face(f, rect, des_file ) != None:
                        training_file.write("%s,%s,%d\n"%(des_file,label,global_label_index) )
                        num = num + 1
                        global_label_number[global_label_index] = num
                        cnt = cnt+1

                    if (num>=maxnum):
                        break
                # 30% of files will be stored in validation data directory
                else: # for validation data
                    des_file = os.path.join(des_dir_validate,des_file_name)
                    if self.crop_face(f, rect, des_file) != None:
                        validate_file.write("%s,%s,%d\n"%(des_file,label,global_label_index) )
                        cnt = cnt+1
                    
                if(cnt>9): 
                    cnt = 0
        #increase index for image label
        
        global_label_index = global_label_index + 1 
        print('## label %s has %s of training data' %(global_label_index,num))
        training_file.close()
        validate_file.close()
        
    def getdirs(self,dir):
        dirs = []
        for f in os.listdir(dir):
            f=os.path.join(dir,f)
            if os.path.isdir(f):
                if not f.startswith('.'):
                    dirs.append(f)

        return dirs
        
    def crop_faces_rootdir(self,src_dir,des_dir,maxnum):
        # crop file from sub-directoris in src_dir
        dirs = self.getdirs(src_dir)
        
        #list sub directory
        for d in dirs:
            print('[INFO] : ### Starting cropping in directory %s ###'%d)
            self.crop_faces_dir(d, des_dir,maxnum)
        #loop and run face crop
        global global_label_number
        print("number of datas per label ", global_label_number)
        
def main(argv):
#   원본 이미지 저장장소
    srcdir= 'C:\\Users\\성수현\\face_image_align\\원본\\'
#   수정 이미지 저장장소 
    desdir = 'C:\\Users\\성수현\\face_image_align\\수정본\\'
#   이미지에서 정면 얼굴 찾는 횟수
    maxnum = int(36000)

    detector = FaceDetector()

    detector.crop_faces_rootdir(srcdir, desdir,maxnum)
    
if __name__ == "__main__":
    main(sys.argv)

## 1.2. 이미지 수치화
> 여자부분

In [2]:
import face_recognition as fr
from PIL import Image
import glob
import os
import numpy as np
import pandas as pd

In [2]:
# 여자 정면 데이터 불러오기
woman_image_files = glob.glob('./face_aligned_images/woman/'+'*.*')
woman_image_files

['./face_aligned_images/woman\\2NE1 박봄13.jpg',
 './face_aligned_images/woman\\2NE1 박봄18.jpg',
 './face_aligned_images/woman\\2NE1 박봄2.jpg',
 './face_aligned_images/woman\\2NE1 박봄21.jpg',
 './face_aligned_images/woman\\2NE1 박봄25.jpg',
 './face_aligned_images/woman\\2NE1 박봄27.jpg',
 './face_aligned_images/woman\\2NE1 박봄28.jpg',
 './face_aligned_images/woman\\2NE1 박봄31.jpg',
 './face_aligned_images/woman\\2NE1 박봄33.jpg',
 './face_aligned_images/woman\\2NE1 박봄35.jpg',
 './face_aligned_images/woman\\2NE1 박봄36.jpg',
 './face_aligned_images/woman\\2NE1 박봄37.jpg',
 './face_aligned_images/woman\\2NE1 박봄4.jpg',
 './face_aligned_images/woman\\2NE1 박봄40.jpg',
 './face_aligned_images/woman\\2NE1 박봄7.jpg',
 './face_aligned_images/woman\\2NE1 씨엘16.jpg',
 './face_aligned_images/woman\\2NE1 씨엘2.jpg',
 './face_aligned_images/woman\\2NE1 씨엘20.jpg',
 './face_aligned_images/woman\\2NE1 씨엘23.jpg',
 './face_aligned_images/woman\\2NE1 씨엘35.jpg',
 './face_aligned_images/woman\\2NE1 씨엘42.jpg',
 './face_aligned_

In [4]:
# 이미지 데이터 -> 벡터 데이터(수치화)
image = []
woman_encodings = []
for file in woman_image_files:
    woman_encodings.append(fr.face_encodings(fr.load_image_file(file)))

In [3]:
woman_encodings

NameError: name 'woman_encodings' is not defined

In [4]:
woman_filelist = os.listdir('./face_aligned_images/woman/')
woman_filelist

['2NE1 박봄13.jpg',
 '2NE1 박봄18.jpg',
 '2NE1 박봄2.jpg',
 '2NE1 박봄21.jpg',
 '2NE1 박봄25.jpg',
 '2NE1 박봄27.jpg',
 '2NE1 박봄28.jpg',
 '2NE1 박봄31.jpg',
 '2NE1 박봄33.jpg',
 '2NE1 박봄35.jpg',
 '2NE1 박봄36.jpg',
 '2NE1 박봄37.jpg',
 '2NE1 박봄4.jpg',
 '2NE1 박봄40.jpg',
 '2NE1 박봄7.jpg',
 '2NE1 씨엘16.jpg',
 '2NE1 씨엘2.jpg',
 '2NE1 씨엘20.jpg',
 '2NE1 씨엘23.jpg',
 '2NE1 씨엘35.jpg',
 '2NE1 씨엘42.jpg',
 '2NE1 씨엘46.jpg',
 '2NE1 씨엘8.jpg',
 '2NE1 씨엘9.jpg',
 '7공주 오인영19.jpg',
 '7공주 오인영33.jpg',
 '7공주 오인영46.jpg',
 'AOA 민아11.jpg',
 'AOA 민아12.jpg',
 'AOA 민아18.jpg',
 'AOA 민아19.jpg',
 'AOA 민아2.jpg',
 'AOA 민아20.jpg',
 'AOA 민아34.jpg',
 'AOA 민아37.jpg',
 'AOA 민아40.jpg',
 'AOA 민아43.jpg',
 'AOA 민아45.jpg',
 'AOA 민아5.jpg',
 'AOA 민아7.jpg',
 'AOA 설현13.jpg',
 'AOA 설현14.jpg',
 'AOA 설현16.jpg',
 'AOA 설현18.jpg',
 'AOA 설현19.jpg',
 'AOA 설현20.jpg',
 'AOA 설현21.jpg',
 'AOA 설현23.jpg',
 'AOA 설현29.jpg',
 'AOA 설현30.jpg',
 'AOA 설현33.jpg',
 'AOA 설현34.jpg',
 'AOA 설현37.jpg',
 'AOA 설현38.jpg',
 'AOA 설현4.jpg',
 'AOA 설현42.jpg',
 'AOA 설현43.jpg',
 'AOA 설현5.jpg'

In [5]:
import pickle

In [9]:
with open('./woman_encodings.txt', 'wb') as lf:
    pickle.dump(woman_encodings, lf)

In [6]:
with open('./woman_encodings.txt', 'rb') as lf:
    woman_encodings = pickle.load(lf)

In [11]:
# 뽑아낸 이미지 데이터와 데이터 경로의 개수가 일치하는지 확인
print(len(woman_encodings),len(woman_image_files),len(woman_filelist))

7919 7919 7919


In [12]:
woman_encodings

[[array([-8.49732086e-02,  3.68976295e-02,  6.59210011e-02, -8.94702226e-02,
         -6.80861399e-02, -6.67010546e-02, -1.47499964e-01, -9.78505090e-02,
          1.43436626e-01, -1.62566483e-01,  1.80360287e-01, -8.87456164e-02,
         -2.12289631e-01,  6.12437949e-02, -1.13828667e-01,  2.36507192e-01,
         -1.42689124e-01, -1.15179941e-01, -1.07934967e-01,  2.17520818e-02,
          5.26863448e-02,  5.64964637e-02,  3.77974880e-04,  2.79830173e-02,
         -3.68853286e-02, -3.33254158e-01, -9.29427221e-02, -3.51666734e-02,
         -4.15065959e-02, -3.78764831e-02,  2.08890121e-02,  8.67884681e-02,
         -1.38760179e-01,  1.90076306e-02,  1.78764611e-02,  8.57186094e-02,
          1.28213409e-02, -9.29267555e-02,  6.78155944e-02,  5.65703353e-03,
         -2.95771241e-01,  2.17426382e-03,  5.42406514e-02,  2.35465571e-01,
          1.57534197e-01, -1.52335484e-02,  6.40406013e-02, -1.57622308e-01,
          5.36463559e-02, -1.85582891e-01, -2.56794654e-02,  4.41327356e-02,

In [13]:
# 뽑아낸 이미지 데이터와 데이터 경로의 개수 일치 시키기(간혹 이미지 데이터 벡터화에 실패하는 이미지 존재 -> 빈 리스트가 존재)
cnt = 0
for idx,val in enumerate(woman_encodings):
    if not val:
        woman_filelist.pop(idx-cnt)
        woman_image_files.pop(idx-cnt)
        cnt += 1

In [14]:
# 2차원 리스트 -> 1차원 리스트(KMeans에서 입력이 1차원이어야 하므로) + 빈 리스트 제거
t = []
t = woman_encodings.copy()
woman_conv_encodings = sum(t,[])

In [127]:
woman_conv_encodings = woman_conv_encodings[:3964] + woman_conv_encodings[3965:]

In [30]:
woman_encodings[0]

[array([-8.49732086e-02,  3.68976295e-02,  6.59210011e-02, -8.94702226e-02,
        -6.80861399e-02, -6.67010546e-02, -1.47499964e-01, -9.78505090e-02,
         1.43436626e-01, -1.62566483e-01,  1.80360287e-01, -8.87456164e-02,
        -2.12289631e-01,  6.12437949e-02, -1.13828667e-01,  2.36507192e-01,
        -1.42689124e-01, -1.15179941e-01, -1.07934967e-01,  2.17520818e-02,
         5.26863448e-02,  5.64964637e-02,  3.77974880e-04,  2.79830173e-02,
        -3.68853286e-02, -3.33254158e-01, -9.29427221e-02, -3.51666734e-02,
        -4.15065959e-02, -3.78764831e-02,  2.08890121e-02,  8.67884681e-02,
        -1.38760179e-01,  1.90076306e-02,  1.78764611e-02,  8.57186094e-02,
         1.28213409e-02, -9.29267555e-02,  6.78155944e-02,  5.65703353e-03,
        -2.95771241e-01,  2.17426382e-03,  5.42406514e-02,  2.35465571e-01,
         1.57534197e-01, -1.52335484e-02,  6.40406013e-02, -1.57622308e-01,
         5.36463559e-02, -1.85582891e-01, -2.56794654e-02,  4.41327356e-02,
         7.9

In [48]:
woman_conv_encodings[0][0]

-0.08497320860624313

In [125]:
woman_conv_encodings[3964]

array([-1.25570446e-01,  1.61814503e-02,  1.29801601e-01, -1.45984322e-01,
       -1.16389632e-01, -7.45504946e-02,  1.93859637e-02, -1.26741290e-01,
        1.47430107e-01, -8.67568851e-02,  1.26684919e-01, -9.16166455e-02,
       -2.40907863e-01, -7.61273801e-02,  1.38378246e-02,  1.65757120e-01,
       -8.02736953e-02, -1.32743731e-01, -1.00170001e-01, -1.02907084e-01,
        6.16558082e-03,  1.01942927e-01, -2.72529852e-02,  2.97584515e-02,
       -4.97760065e-02, -2.38457650e-01, -5.62373549e-03, -6.75372332e-02,
        1.23853870e-02, -5.79327196e-02,  3.64173912e-02,  1.30970076e-01,
       -1.93327501e-01, -4.02737334e-02, -2.94295233e-03,  7.63394833e-02,
       -9.18719321e-02, -1.45545602e-01,  1.64333388e-01, -1.73483044e-02,
       -2.24101648e-01, -4.54497710e-02,  6.30222484e-02,  1.52501285e-01,
        2.17877388e-01, -5.65493666e-02,  2.40070745e-03, -3.18050645e-02,
        1.24296896e-01, -2.40915000e-01, -6.04266301e-02,  1.44279391e-01,
        1.04131676e-01, -

In [74]:
temp = list(filter(lambda x : len(x) > 0,t))

In [128]:
for idx, val in enumerate(temp):
    if woman_conv_encodings[idx][0] != val[0][0]:
        print(idx,val[0][0])
#     print(woman_encodings[idx][0][0])

In [15]:
import numpy as np

In [129]:
# 뽑아낸 이미지 데이터와 데이터 경로의 개수가 일치하는지 확인
print(len(woman_conv_encodings),len(woman_image_files),len(woman_filelist))

7700 7700 7700


> 남자부분

In [3]:
# 남자 정면 데이터 불러오기
man_image_files = glob.glob('./face_aligned_images/man/'+'*.*')
man_image_files

['./face_aligned_images/man\\2AM 임슬옹11.jpg',
 './face_aligned_images/man\\2AM 임슬옹18.jpg',
 './face_aligned_images/man\\2AM 임슬옹19.jpg',
 './face_aligned_images/man\\2AM 임슬옹20.jpg',
 './face_aligned_images/man\\2AM 임슬옹26.jpg',
 './face_aligned_images/man\\2AM 임슬옹28.jpg',
 './face_aligned_images/man\\2AM 임슬옹3.jpg',
 './face_aligned_images/man\\2AM 임슬옹33.jpg',
 './face_aligned_images/man\\2AM 임슬옹38.jpg',
 './face_aligned_images/man\\2AM 임슬옹39.jpg',
 './face_aligned_images/man\\2AM 임슬옹40.jpg',
 './face_aligned_images/man\\2AM 임슬옹42.jpg',
 './face_aligned_images/man\\2AM 임슬옹43.jpg',
 './face_aligned_images/man\\2AM 임슬옹44.jpg',
 './face_aligned_images/man\\2AM 임슬옹46.jpg',
 './face_aligned_images/man\\2AM 임슬옹5.jpg',
 './face_aligned_images/man\\2AM 임슬옹7.jpg',
 './face_aligned_images/man\\2AM 임슬옹8.jpg',
 './face_aligned_images/man\\2AM 임슬옹9.jpg',
 './face_aligned_images/man\\2PM 장우영11.jpg',
 './face_aligned_images/man\\2PM 장우영15.jpg',
 './face_aligned_images/man\\2PM 장우영17.jpg',
 './face_aligne

In [4]:
# 이미지 데이터 -> 벡터 데이터(수치화)
image = []
man_encodings = []
for file in man_image_files:
    man_encodings.append(fr.face_encodings(fr.load_image_file(file)))

KeyboardInterrupt: 

In [5]:
man_filelist = os.listdir('./face_aligned_images/man/')
man_filelist

['2AM 임슬옹11.jpg',
 '2AM 임슬옹18.jpg',
 '2AM 임슬옹19.jpg',
 '2AM 임슬옹20.jpg',
 '2AM 임슬옹26.jpg',
 '2AM 임슬옹28.jpg',
 '2AM 임슬옹3.jpg',
 '2AM 임슬옹33.jpg',
 '2AM 임슬옹38.jpg',
 '2AM 임슬옹39.jpg',
 '2AM 임슬옹40.jpg',
 '2AM 임슬옹42.jpg',
 '2AM 임슬옹43.jpg',
 '2AM 임슬옹44.jpg',
 '2AM 임슬옹46.jpg',
 '2AM 임슬옹5.jpg',
 '2AM 임슬옹7.jpg',
 '2AM 임슬옹8.jpg',
 '2AM 임슬옹9.jpg',
 '2PM 장우영11.jpg',
 '2PM 장우영15.jpg',
 '2PM 장우영17.jpg',
 '2PM 장우영19.jpg',
 '2PM 장우영21.jpg',
 '2PM 장우영25.jpg',
 '2PM 장우영27.jpg',
 '2PM 장우영33.jpg',
 '2PM 장우영7.jpg',
 '2PM 찬성15.jpg',
 '2PM 찬성16.jpg',
 '2PM 찬성2.jpg',
 '2PM 찬성21.jpg',
 '2PM 찬성23.jpg',
 '2PM 찬성24.jpg',
 '2PM 찬성28.jpg',
 '2PM 찬성29.jpg',
 '2PM 찬성3.jpg',
 '2PM 찬성31.jpg',
 '2PM 찬성35.jpg',
 '2PM 찬성38.jpg',
 '2PM 찬성4.jpg',
 '2PM 찬성41.jpg',
 '2PM 찬성42.jpg',
 '2PM 찬성45.jpg',
 '2PM 찬성46.jpg',
 '2PM 찬성47.jpg',
 '2PM 찬성7.jpg',
 '2PM 찬성8.jpg',
 'B1A4 공찬1.jpg',
 'B1A4 공찬11.jpg',
 'B1A4 공찬12.jpg',
 'B1A4 공찬13.jpg',
 'B1A4 공찬16.jpg',
 'B1A4 공찬21.jpg',
 'B1A4 공찬26.jpg',
 'B1A4 공찬27.jpg',
 'B1A4 공찬3.jpg',
 'B1A4 

In [6]:
import pickle

In [7]:
with open('./man_encodings.txt', 'rb') as lf:
    man_encodings = pickle.load(lf)

In [8]:
# 뽑아낸 이미지 데이터와 데이터 경로의 개수가 일치하는지 확인
print(len(man_encodings),len(man_image_files),len(man_filelist))

7830 7830 7830


In [9]:
# 뽑아낸 이미지 데이터와 데이터 경로의 개수 일치 시키기(간혹 이미지 데이터 벡터화에 실패하는 이미지 존재 -> 빈 리스트가 존재)
cnt = 0
for idx,val in enumerate(man_encodings):
    if not val:
        man_filelist.pop(idx-cnt)
        man_image_files.pop(idx-cnt)
        cnt += 1

In [10]:
# 2차원 리스트 -> 1차원 리스트(KMeans에서 입력이 1차원이어야 하므로) + 빈 리스트 제거
t = []
t = man_encodings.copy()
man_conv_encodings = sum(t,[])

In [11]:
len(man_conv_encodings)

7590

In [12]:
# 뽑아낸 이미지 데이터와 데이터 경로의 개수가 일치하는지 확인
print(len(man_conv_encodings),len(man_image_files),len(man_filelist))

7590 7590 7590


# 2.2 KMeans

In [146]:
from sklearn.cluster import KMeans

In [131]:
# train set으로 KMeans 학습
clf = KMeans(n_clusters =6)
clf.fit(woman_conv_encodings)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=6, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [147]:
# train set으로 KMeans 학습
clf = KMeans(n_clusters =6)
clf.fit(man_conv_encodings)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=6, n_init=10, n_jobs=None, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [148]:
# 13개의 class로 나뉘었는지 확인
np.unique(clf.labels_)

array([0, 1, 2, 3, 4, 5])

# 2.5 데이터 클래스 예측

In [133]:
# val set으로 class 예측
result = clf.predict(woman_conv_encodings)

In [134]:
df_val_face = pd.DataFrame(index=woman_filelist,columns=['class','imagepath'])
# ImageDataGenerator.flow_from_dataframe 에서 class가 string형이어야 하므로 변환
df_val_face['class'] = list(map(str,result))
df_val_face['imagepath'] = woman_image_files

In [135]:
df_val_face.to_csv('./woman_cluster.txt')

In [150]:
# val set으로 class 예측
result = clf.predict(man_conv_encodings)

In [151]:
df_val_face = pd.DataFrame(index=man_filelist,columns=['class','imagepath'])
# ImageDataGenerator.flow_from_dataframe 에서 class가 string형이어야 하므로 변환
df_val_face['class'] = list(map(str,result))
df_val_face['imagepath'] = man_image_files

In [152]:
df_val_face.to_csv('./man_cluster.txt')

In [136]:
df_val_face

Unnamed: 0,class,imagepath
2NE1 박봄13.jpg,1,./face_aligned_images/woman\2NE1 박봄13.jpg
2NE1 박봄18.jpg,1,./face_aligned_images/woman\2NE1 박봄18.jpg
2NE1 박봄2.jpg,1,./face_aligned_images/woman\2NE1 박봄2.jpg
2NE1 박봄21.jpg,1,./face_aligned_images/woman\2NE1 박봄21.jpg
2NE1 박봄25.jpg,2,./face_aligned_images/woman\2NE1 박봄25.jpg
...,...,...
힌복 디자이너 박술녀22.jpg,4,./face_aligned_images/woman\힌복 디자이너 박술녀22.jpg
힌복 디자이너 박술녀33.jpg,4,./face_aligned_images/woman\힌복 디자이너 박술녀33.jpg
힌복 디자이너 박술녀36.jpg,4,./face_aligned_images/woman\힌복 디자이너 박술녀36.jpg
힌복 디자이너 박술녀4.jpg,4,./face_aligned_images/woman\힌복 디자이너 박술녀4.jpg


In [14]:
df = pd.read_csv('./man_cluster.txt')

In [43]:
for idx,val in df[df['class']==3]['Unnamed: 0'].items():
    print(val.split('.')[0])

BGF그룹 회장 홍석조1
BGF그룹 회장 홍석조16
BGF그룹 회장 홍석조28
BGF그룹 회장 홍석조37
BGF그룹 회장 홍석조38
BGF그룹 회장 홍석조44
BGF그룹 회장 홍석조45
BGF그룹 회장 홍석조48
CJ그룹회장 이재현12
CJ그룹회장 이재현26
CJ그룹회장 이재현33
CJ그룹회장 이재현5
DB그룹 회장 김준기1
DB그룹 회장 김준기16
DB그룹 회장 김준기18
DB그룹 회장 김준기21
DB그룹 회장 김준기29
DB그룹 회장 김준기33
DB그룹 회장 김준기34
DB그룹 회장 김준기38
DB그룹 회장 김준기41
DB그룹 회장 김준기5
DB그룹 회장 김준기6
DB그룹 회장 김준기7
FNC엔터테이먼트 대표 한성호48
FNC엔터테이먼트 대표 한성호5
LG그룹 회장 구광모2
LG그룹 회장 구광모8
NCSOFT 대표이사 김택진_v26
NCSOFT 대표이사 김택진_v37
NCSOFT 대표이사 김택진_v43
SG워너비 이석훈13
SG워너비 이석훈37
SK그룹 회장 최태원45
SM 이수만47
XL게임즈대표 송재경18
XL게임즈대표 송재경21
XL게임즈대표 송재경24
XL게임즈대표 송재경37
XL게임즈대표 송재경39
XL게임즈대표 송재경4
XL게임즈대표 송재경42
XL게임즈대표 송재경50
XL게임즈대표 송재경51
XL게임즈대표 송재경52
XL게임즈대표 송재경57
가수 김건모35
가수 김건모7
가수 김태우29
가수 김호중13
가수 성시경25
가수 신승훈1
가수 신승훈12
가수 신승훈14
가수 신승훈20
가수 신승훈34
가수 신승훈38
가수 신승훈42
가수 신승훈45
가수 은지원20
가수 은지원21
가수 조영남16
가수 주영훈1
가수 주영훈13
가수 주영훈34
가수 황치열2
개그맨 김경진22
개그맨 김경진8
개그맨 김영민28
개그맨 김준호24
개그맨 김준호26
개그맨 김준호38
개그맨 김지호10
개그맨 김지호28
개그맨 김태현1
개그맨 김태현19
개그맨 김태현34
개그맨 김태현38
개그맨 김태현47
개그맨 김태현8
개그맨 박명수27
개그맨 박명수7
개그맨 박성광37
개