## 1. 라이브러리

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import cv2
from tqdm import tqdm
from PIL import Image

## 2. 메타데이터 불러오기

In [4]:
dpath = 'data/' # 데이터 경로 설정
data = pd.read_csv(dpath + 'metadata.csv')
data

Unnamed: 0,album_id,album_name,artist,album_release_date,album_total_tracks,emotion,playlist_name
0,0025Y5D7IOZVpEfQtpvHIi,Into the Surf,Foals,2019-09-25,3,relaxed,calm winter morning ☆☆
1,002hMu6uJDan1XKVp3pmnb,Becoming,Feather,2021-01-13,5,sad,Sad Beats
2,002mOl8os2F1mDYRz8qU2a,Антигерой,ELMAN,2019-06-06,1,sad,슬픔
3,004EYz2DQttcGvyTQGDmLp,Merry Christmas Baby / White Christmas,Otis Redding,1968,2,happy,Happy Holidays
4,005KGm28jIUE3hW019Itx6,Too Much Love,Mujo,2021-04-15,1,relaxed,"Chill Beats 💙 groove, relax"
...,...,...,...,...,...,...,...
63309,7zw73YMhMg7lnMrkUAzE8t,Just Another Memory,JustSidekicks,2020-07-10,1,happy,Happy Vibes ?��
63310,7zwLfr8xSz9rWPU5KNBHkQ,cherry,Oatmello,2019-05-24,1,relaxed,Relax Lofi 😴
63311,7zxLuEY7mKLKdyWjnCfP35,Colorblind (feat. Dayseeker),Adventure Club,2021-08-13,1,sad,Sad Beats
63312,7zxfQk44mX0eW4eWtdDlKI,Mind Right,TK N Cash,2014-10-14,1,angry,Pissed Off Workout


**YOLO 객체인식 정보를 반환하는 함수**

출처: https://hanryang1125.tistory.com/9

In [13]:
def yolo_trans(frame, size, score_threshold, nms_threshold):
    # YOLO 네트워크 불러오기
    net = cv2.dnn.readNet(f"yolov4.weights", "yolov4.cfg")
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

    # 이미지의 높이, 너비, 채널 받아오기
    height, width, channels = frame.shape

    # 네트워크에 넣기 위한 전처리
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (size, size), (0, 0, 0), True, crop=False)

    # 전처리된 blob 네트워크에 입력
    net.setInput(blob)

    # 결과 받아오기
    try:
        outs = net.forward(output_layers)
    except:
        return []

    # 각각의 데이터를 저장할 빈 리스트
    class_ids = []
    confidences = []
    boxes = []

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.1:
                # 탐지된 객체의 너비, 높이 및 중앙 좌표값 찾기
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # 객체의 사각형 테두리 중 좌상단 좌표값 찾기
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Non Maximum Suppression (겹쳐있는 박스 중 confidence 가 가장 높은 박스를 선택)
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, score_threshold=score_threshold, nms_threshold=nms_threshold)

    result = []

    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            
            # 인식된 객체 수만큼 [객체아이디, 신뢰도, x, y, width, height] 추가
            result.append([class_ids[i], confidences[i], x, y, w, h])  

    return result

In [14]:
SENTIMENT = 'angry'    # happy, sad, relaxed, angry 에 대해 모두 실행 

In [15]:
image_list = []
image_array_list = []
na_id = []
album_ids = data[data['emotion'] == SENTIMENT]['album_id']

for album_id in tqdm(album_ids):
    image_list.append(Image.open(dpath + "emotion_album_images/{}/{}.jpg".format(SENTIMENT, album_id)).resize((300,300)))
    image_array_list.append(np.array(image_list[-1]))

100%|████████████████████████████████████████████████████████████████████████████| 13206/13206 [03:00<00:00, 73.12it/s]


In [16]:
# 클래스 리스트
classes = ["person", "bicycle", "car", "motorcycle",
           "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
           "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse",
           "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
           "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis",
           "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
           "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife",
           "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
           "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table",
           "toilet", "tv", "laptop", "mouse", "remote", "keyboard",
           "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
           "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

# 입력 사이즈 리스트 (Yolo 에서 사용되는 네크워크 입력 이미지 사이즈)
size_list = [320, 416, 608]
obj_list = []

for image in tqdm(image_array_list):
    
    # shape이 2차원일 경우 3차원으로 변경
    if len(image.shape) == 2:
        image = np.expand_dims(image, -1)
        
    temp = yolo_trans(frame=image, size=size_list[2], score_threshold=0.4, nms_threshold=0.4)
    
    # 객체가 없는 경우, [[-1, 0, 0, 0, 0, 0]] 저장 
    # (객체아이디는 0부터 시작하므로 -1, 나머지는 0으로 설정)
    if temp:
        obj_list.append(temp)
    else:
        obj_list.append([[-1, 0, 0, 0, 0, 0]])

100%|██████████████████████████████████████████████████████████████████████████| 13206/13206 [7:14:59<00:00,  1.98s/it]


In [17]:
import pickle

with open('{}_obj_list.pickle'.format(SENTIMENT), 'wb') as f:
    pickle.dump(obj_list, f)

In [18]:
with open('{}_obj_list.pickle'.format(SENTIMENT), 'rb') as f:
    temp = pickle.load(f)

In [19]:
obj_list

[[[0, 0.7302692532539368, 22, 76, 226, 217]],
 [[0, 0.996549665927887, -5, -4, 309, 307]],
 [[0, 0.4492538571357727, 57, 107, 184, 192]],
 [[0, 0.9017158150672913, 75, 96, 218, 200]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.8260605335235596, -6, -2, 310, 300]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.7335050702095032, 9, 123, 116, 173]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.8695269227027893, 264, 128, 6, 8],
  [56, 0.9273641705513, 32, 118, 57, 105],
  [56, 0.5251302719116211, 64, 115, 34, 78],
  [56, 0.9499431252479553, 206, 113, 63, 98],
  [56, 0.9780985713005066, 155, 113, 80, 128]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.49141213297843933, 0, 11, 146, 170],
  [0, 0.860316812992096, 131, 222, 170, 77]],
 [[0, 0.986515462398529, 1, 66, 288, 235]],
 [[0, 0.985859751701355, 2, -6, 232, 313]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.9325319528579712, 20, 74, 107, 216]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[2, 0.68

In [20]:
temp

[[[0, 0.7302692532539368, 22, 76, 226, 217]],
 [[0, 0.996549665927887, -5, -4, 309, 307]],
 [[0, 0.4492538571357727, 57, 107, 184, 192]],
 [[0, 0.9017158150672913, 75, 96, 218, 200]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.8260605335235596, -6, -2, 310, 300]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.7335050702095032, 9, 123, 116, 173]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.8695269227027893, 264, 128, 6, 8],
  [56, 0.9273641705513, 32, 118, 57, 105],
  [56, 0.5251302719116211, 64, 115, 34, 78],
  [56, 0.9499431252479553, 206, 113, 63, 98],
  [56, 0.9780985713005066, 155, 113, 80, 128]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.49141213297843933, 0, 11, 146, 170],
  [0, 0.860316812992096, 131, 222, 170, 77]],
 [[0, 0.986515462398529, 1, 66, 288, 235]],
 [[0, 0.985859751701355, 2, -6, 232, 313]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[0, 0.9325319528579712, 20, 74, 107, 216]],
 [[-1, 0, 0, 0, 0, 0]],
 [[-1, 0, 0, 0, 0, 0]],
 [[2, 0.68