## feature 추출
: 1 **score** → n **resized staves →** n **feature →** 다 이어서 1 **csv file**
- n **resized stave images, csv file은** `data/processed-feature/{title}/` **에 저장됨.**

In [1]:
import pandas as pd
import numpy as np
import glob
import os
import re
import cv2
import csv
import json
import xml.etree.ElementTree as ET
from PIL import Image, ImageOps
from datetime import datetime

constant

In [2]:
DATA_PATH=f"../data"
MODEL_PATH=f"../model"
IMAGE_PATH=f"../image/"

DATA_FEATURE_PATH=f"{DATA_PATH}/processed-feature"
DATA_RAW_PATH=f"{DATA_PATH}/raw"
DATA_TEST_PATH=f"{DATA_PATH}/test"

OSMD="osmd-dataset-v1.0.0"

STAVE = "stave"
PAD_STAVE = "pad-stave"
FEATURE = "feature"
LABELED_FEATURE = "labeled-feature"
CURSOR="cursor"

OMR="omr-seq2seq"

STAVE_HEIGHT = 180
STAVE_WIDTH = 1850

# -- width:40 중에 | 좌우 15pad * 2 -> 10px
NOTE_PAD = 15

pitch

![image.png](attachment:image.png)

In [3]:
REST_QUARTER="REST_QUARTER"
REST_EIGHTH="REST_EIGHTH"  
REST_HALF="REST_HALF"
REST_WHOLE="REST_WHOLE"

NOTHING ="NOTHING" # 아무것도 없음.

PITCH_NOTES = [NOTHING, 'D4', 'F4', 'A4', 'C5', 'D5', 'E5', 'F5', 'G5', 'A5', REST_QUARTER, REST_EIGHTH, REST_HALF, REST_WHOLE]
PTICH_HEIGHT = len(PITCH_NOTES)

# -- 각 0 ~ 16 로 매핑 -> 실제 pitch는 -4 해주면 됨.
# {0: 'A3', 1: 'B3', 2: 'C4', 3: 'D4', 4: 'E4', 5: 'F4', 6: 'G4', 7: 'A4', 8: 'B4', 9: 'C5', 10: 'D5', 11: 'E5', 12: 'F5', 13: 'G5', 14: 'A5', 15: 'B5', 16: 'C6'}
CODE2PITCH_NOTE = {index: note for index, note in enumerate(PITCH_NOTES)}
# {'A3': 0, 'B3': 1, 'C4': 2, 'D4': 3, 'E4': 4, 'F4': 5, 'G4': 6, 'A4': 7, 'B4': 8, 'C5': 9, 'D5': 10, 'E5': 11, 'F5': 12, 'G5': 13, 'A5': 14, 'B5': 15, 'C6': 16}
PITCH_NOTE2CODE = {note: index for index, note in enumerate(PITCH_NOTES)}

In [4]:
from itertools import combinations
# 가능한 모든 노트 조합 생성
all_combinations = []
for r in range(1, len(PITCH_NOTES) + 1):
    all_combinations.extend(combinations(PITCH_NOTES, r))

# 클래스 라벨 생성
# label_list:class
# {('D4',): 'Class_0', ('F4',): 'Class_1',
# 아무것도 안 친 거  
class_labels = {():'Class_0'}
for idx, combo in enumerate(all_combinations):
    class_labels[combo] = f"Class_{idx+1}"
print(class_labels)
# class:label_list
# {Class_0:('D4',),,,
labels_class = {v: k for k, v in class_labels.items()}

# 데이터에 대한 라벨 생성
def generate_labels(data):
    labels = []
    for item in data:
        label = class_labels[tuple(item)]
        labels.append(label)
    return labels

# # 예시 데이터
# example_data = [['D4', 'F4', 'REST_QUARTER'],
#                 ['A4', 'C5', 'REST_EIGHTH'],
#                 ['D5', 'E5', 'F5'],
#                 ['G5', 'A5', 'REST_HALF'],
#                 ['REST_WHOLE']]
# 라벨 생성
# example_labels = generate_labels(example_data)

{(): 'Class_0', ('D4',): 'Class_1', ('F4',): 'Class_2', ('A4',): 'Class_3', ('C5',): 'Class_4', ('D5',): 'Class_5', ('E5',): 'Class_6', ('F5',): 'Class_7', ('G5',): 'Class_8', ('A5',): 'Class_9', ('REST_QUARTER',): 'Class_10', ('REST_EIGHTH',): 'Class_11', ('REST_HALF',): 'Class_12', ('REST_WHOLE',): 'Class_13', ('D4', 'F4'): 'Class_14', ('D4', 'A4'): 'Class_15', ('D4', 'C5'): 'Class_16', ('D4', 'D5'): 'Class_17', ('D4', 'E5'): 'Class_18', ('D4', 'F5'): 'Class_19', ('D4', 'G5'): 'Class_20', ('D4', 'A5'): 'Class_21', ('D4', 'REST_QUARTER'): 'Class_22', ('D4', 'REST_EIGHTH'): 'Class_23', ('D4', 'REST_HALF'): 'Class_24', ('D4', 'REST_WHOLE'): 'Class_25', ('F4', 'A4'): 'Class_26', ('F4', 'C5'): 'Class_27', ('F4', 'D5'): 'Class_28', ('F4', 'E5'): 'Class_29', ('F4', 'F5'): 'Class_30', ('F4', 'G5'): 'Class_31', ('F4', 'A5'): 'Class_32', ('F4', 'REST_QUARTER'): 'Class_33', ('F4', 'REST_EIGHTH'): 'Class_34', ('F4', 'REST_HALF'): 'Class_35', ('F4', 'REST_WHOLE'): 'Class_36', ('A4', 'C5'): 'Class

In [5]:
def get_datetime():
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
def get_title(score_path):
    return os.path.basename(os.path.dirname(score_path))

In [6]:
def get_all_files(parent_folder_path, exp):
    all_file_list = glob.glob(f"{parent_folder_path}/*")
    file_list = [file for file in all_file_list if file.endswith(f".{exp}")]
    return file_list

## 이미지 전처리

In [7]:
def transform_img2binaryImg(img):
    """
    param : rgb image
    return : binary image
    """
    img = cv2.imread(img)
    
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # -- 설정 임곗값(retval), 결과 이미지(biImg)
    ret, biImg = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV) # -- 임곗값을 초과할 경우 0, 아닐 경우 maxval
    return biImg

In [8]:
def get_score_data(score_path):
    """
    score의 binary img, size 얻기
    return: binary img, height, width
    """
    biImg = transform_img2binaryImg(score_path)
    height, width = biImg.shape
    return biImg, height, width

In [9]:
def extract_segment_from_score(biImg):
    """
    score에서 각 segment 추출
    객체 정보를 함께 반환하는 레이블링 함수
    cnt : 객체 수 + 1 (배경 포함)
    labels : 객체에 번호가 지정된 레이블 맵
    stats : N x 5, N은 객체 수 + 1이며 각각의 행은 번호가 지정된 객체를 의미, 5열에는 x, y, width, height, area 순으로 정보가 담겨 있습니다. x,y 는 좌측 상단 좌표를 의미하며 area는 면적, 픽셀의 수를 의미합니다.
    centroids : N x 2, 2열에는 x,y 무게 중심 좌표가 입력되어 있습니다. 무게 중심 좌표는 픽셀의 x 좌표를 다 더해서 갯수로 나눈 값입니다. y좌표도 동일합니다.
    """
    cnt, labels, stats, centroids = cv2.connectedComponentsWithStats(biImg)
    return cnt, labels, stats, centroids

In [10]:
def extract_stave_from_score(biImg, score_width, cnt, stats):
    """
    stave 추출
    """
    stave_list=[]
    # -- idx 0은 배경이라 제외
    for i in range(1, cnt):
        x, y, w, h, _ = stats[i]
        # -- stave 인식
        if w > score_width * 0.3: # -- stave width가 score width와 같지 않은 경우가 있을 수도 있음
            stave = biImg[y:y+h, x:x+w]
            stave_list.append(stave)

    return stave_list

In [11]:
def save_stave(title, stave_list, state):
    """
    save stave list
    """
    os.makedirs(f"{DATA_FEATURE_PATH}/{title}", exist_ok=True)
    for idx, stave in enumerate(stave_list):
        date_time = get_datetime()
        cv2.imwrite(f"{DATA_FEATURE_PATH}/{title}/{title}_{state}_{idx}_{date_time}.png", stave)
        print(state, idx, "--shape: ", stave.shape)

In [12]:
def transform_score2stave(score_path):
    """
    score로부터 stave image추출
    """

    biImg, _, width = get_score_data(score_path)

    cnt, _, stats, _ = extract_segment_from_score(biImg)

    stave_list = extract_stave_from_score(biImg, width, cnt, stats)

    return stave_list

# score로부터 stave image추출!!!!

In [13]:
score_path = f"{DATA_RAW_PATH}/{OSMD}/Rock-ver/Rock-ver.png" # -- stave를 추출할 악보
title = get_title(score_path)

stave_list = transform_score2stave(score_path)
save_stave(title, stave_list, STAVE)

stave 0 --shape:  (169, 1850)
stave 1 --shape:  (175, 1850)
stave 2 --shape:  (171, 1850)
stave 3 --shape:  (171, 1850)
stave 4 --shape:  (175, 1850)
stave 5 --shape:  (176, 1850)
stave 6 --shape:  (175, 1850)
stave 7 --shape:  (171, 1850)
stave 8 --shape:  (171, 1850)
stave 9 --shape:  (171, 1850)
stave 10 --shape:  (171, 1850)
stave 11 --shape:  (171, 1850)
stave 12 --shape:  (171, 1850)
stave 13 --shape:  (171, 1850)
stave 14 --shape:  (171, 1850)
stave 15 --shape:  (171, 1850)
stave 16 --shape:  (171, 1850)
stave 17 --shape:  (171, 1850)
stave 18 --shape:  (172, 1850)
stave 19 --shape:  (173, 1850)
stave 20 --shape:  (173, 1850)
stave 21 --shape:  (171, 944)


-----------------------------------------------

In [14]:
def transform_stave2padStave(stave_list):
    pad_stave = []
    for stave in stave_list:
        pad_image = np.zeros((STAVE_HEIGHT, STAVE_WIDTH))

        # 새로운 이미지에 주어진 이미지 삽입
        pad_image[:stave.shape[0], :stave.shape[1]] = stave

        # 부족한 부분 0으로 채우기
        pad_image[stave.shape[0]:, :] = 0  # 높이 부족 부분
        pad_image[:, stave.shape[1]:] = 0  # 너비 부족 부분
        pad_stave.append(pad_image)
    return pad_stave

# Padding Stave Image

In [15]:
pad_stave_list = transform_stave2padStave(stave_list)
save_stave(title, pad_stave_list, PAD_STAVE)

pad-stave 0 --shape:  (180, 1850)
pad-stave 1 --shape:  (180, 1850)
pad-stave 2 --shape:  (180, 1850)
pad-stave 3 --shape:  (180, 1850)
pad-stave 4 --shape:  (180, 1850)
pad-stave 5 --shape:  (180, 1850)
pad-stave 6 --shape:  (180, 1850)
pad-stave 7 --shape:  (180, 1850)
pad-stave 8 --shape:  (180, 1850)
pad-stave 9 --shape:  (180, 1850)
pad-stave 10 --shape:  (180, 1850)
pad-stave 11 --shape:  (180, 1850)
pad-stave 12 --shape:  (180, 1850)
pad-stave 13 --shape:  (180, 1850)
pad-stave 14 --shape:  (180, 1850)
pad-stave 15 --shape:  (180, 1850)
pad-stave 16 --shape:  (180, 1850)
pad-stave 17 --shape:  (180, 1850)
pad-stave 18 --shape:  (180, 1850)
pad-stave 19 --shape:  (180, 1850)
pad-stave 20 --shape:  (180, 1850)
pad-stave 21 --shape:  (180, 1850)


---

In [16]:
def transform_staveImg2feature(img_list):
    """
    (선택)
    pad image file로부터 feature 추출
    단, padding 된 png만 (pad-stave 이름 붙은 것들만)
    """
    feature_list=[]
    for idx, img in enumerate(img_list):
        if PAD_STAVE in img:
            biImg = transform_img2binaryImg(img)
            feature_list.append(biImg)
            print(idx, "--shape: ", biImg.shape)
    return feature_list

In [17]:
def save_feature_csv(title, features, state):
    """
    state : LABELED_FEATURE | FEATURE 
    """
    date_time = get_datetime()
    save_path = f"{DATA_FEATURE_PATH}/{title}/{title}_{state}_{date_time}.csv"
    df = pd.DataFrame(features)
    df.to_csv(save_path, index=False, header=False)
    print("features shape: ", df.shape)

## csv 파일로 저장

In [18]:
# -- 1. image file로부터 feature 저장하려면 아래 코드
# img_list = get_all_files(f"{DATA_FEATURE_PATH}/{title}", "png")
# feature_list = transform_staveImg2feature(img_list)
# -- 2. 위 feature 그대로 쓰려면 아래 코드
feature_list = pad_stave_list

# 데이터 이어붙이기
merged_data = np.concatenate(feature_list, axis=1)

# 전치
transposed_data = np.transpose(merged_data)

save_feature_csv(title, transposed_data, FEATURE)

features shape:  (40700, 180)


---

## XML 라벨링

In [19]:
def load_xml_data(file_path: str):
    """
    xml data 불러오기
    """
    try:
        tree = ET.parse(file_path)  # XML 파일을 파싱
        root = tree.getroot()
        return root
    except ET.ParseError as e:
        print(f"XML 파일을 파싱하는 동안 오류가 발생했습니다: {e}")
        return None

In [20]:
def extract_pitch(xml_file):
    """
    1. multiple pitch 추출
    <chord/> <-  얘 있으면 동시에 친 거임
    <unpitched>
        <display-step>A</display-step>
        <display-octave>5</display-octave>
    </unpitched>

    2. !!!!!!!!!!!!!예외!!!!!!!!!!!!!
    - grace note 제외

    3. 쉼표 추출
    <note>
        <rest/>
        <duration>48</duration>
        <type>quarter</type>
    </note>

    output : [['G5'], ['G5'], ['G5'], ['C5'], ['C5'], ['F4', 'A5'], ...]
    """
    def extract_step_octave(pitch_element):
        """
        step, octave 추출
        <unpitched>
            <step>C</step>
            <octave>5</octave>
        </unpitched>
        """
        step = pitch_element.find('display-step').text
        octave = pitch_element.find('display-octave').text
        return step, octave
    
    # XML 파일 파싱
    root = load_xml_data(xml_file)
    
    pitch_list = []
    chord_list = []

    # 모든 <note> 엘리먼트를 찾습니다.
    for note in root.iter('note'):
        # <grace> 엘리먼트를 가진 <note> 엘리먼트인지 확인
        is_grace = note.find('grace') is not None
        if is_grace:
            print("grace!")
            continue

        # <rest> 엘리먼트를 가진 <note> 엘리먼트인지 확인
        # REST_QUARTER="REST_QUARTER"
        # REST_EIGHTH="REST_EIGHTH"  
        # REST_HALF="REST_HALF"
        # REST_WHOLE="REST_WHOLE"
        is_rest = note.find('rest') is not None
        if is_rest:
            rest_element = note.find('type').text
            if rest_element == 'quarter':
                pitch_list.append([REST_QUARTER])
            elif rest_element == 'eighth':
                pitch_list.append([REST_EIGHTH])
            elif rest_element == 'half':
                pitch_list.append([REST_HALF])
            elif rest_element == 'whole':
                pitch_list.append([REST_WHOLE])
            continue

        pitch_elements = note.findall('./unpitched')
        # <chord> 엘리먼트를 가진 <note> 엘리먼트인지 확인
        is_chord = note.find('chord') is not None
        # 만약 <chord> 엘리먼트를 가진 <note> 엘리먼트라면, 계속 추가
        if is_chord:
            for pitch_element in pitch_elements:
                step, octave = extract_step_octave(pitch_element)
                chord_list.append(step + octave)
        else:
            for pitch_element in pitch_elements:
                step, octave = extract_step_octave(pitch_element)
                chord_list = [] # -- 초기화
                chord_list.append(step + octave)
                pitch_list.append(chord_list)
    
    return pitch_list

In [21]:
# XML 파일 경로
xml_file_path = f'{DATA_RAW_PATH}/{OSMD}/{title}/Rock-ver.xml'
pitch_list = extract_pitch(xml_file_path)

# 결과를 출력합니다.
for i, pitches in enumerate(pitch_list):
    print(f"Note {i}: ", pitches)

grace!
grace!
grace!
grace!
Note 0:  ['G5']
Note 1:  ['G5']
Note 2:  ['G5']
Note 3:  ['C5']
Note 4:  ['F4', 'A5']
Note 5:  ['C5', 'G5']
Note 6:  ['G5']
Note 7:  ['F4', 'G5']
Note 8:  ['F4', 'G5']
Note 9:  ['C5', 'G5']
Note 10:  ['G5']
Note 11:  ['F4', 'G5']
Note 12:  ['G5']
Note 13:  ['C5', 'G5']
Note 14:  ['G5']
Note 15:  ['F4', 'G5']
Note 16:  ['F4', 'G5']
Note 17:  ['C5', 'G5']
Note 18:  ['G5']
Note 19:  ['F4', 'G5']
Note 20:  ['G5']
Note 21:  ['C5', 'G5']
Note 22:  ['G5']
Note 23:  ['F4', 'G5']
Note 24:  ['F4', 'G5']
Note 25:  ['C5', 'G5']
Note 26:  ['F4', 'A5']
Note 27:  ['REST_EIGHTH']
Note 28:  ['F4']
Note 29:  ['C5', 'G5']
Note 30:  ['G5']
Note 31:  ['F4', 'G5']
Note 32:  ['F4', 'G5']
Note 33:  ['C5', 'A5']
Note 34:  ['F4', 'A5']
Note 35:  ['G5']
Note 36:  ['C5', 'G5']
Note 37:  ['G5']
Note 38:  ['F4', 'G5']
Note 39:  ['F4', 'G5']
Note 40:  ['C5', 'G5']
Note 41:  ['G5']
Note 42:  ['F4', 'G5']
Note 43:  ['G5']
Note 44:  ['C5', 'G5']
Note 45:  ['G5']
Note 46:  ['F4', 'G5']
Note 4

In [22]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

pitch_labels = generate_labels(pitch_list)
print(pitch_labels)


# 데이터 전처리: One-hot Encoding
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(pitch_labels)
print(encoded_labels)
print("-----------------------------------------")
print(encoded_labels.shape)


onehot_encoder = OneHotEncoder()
onehot_encoded = onehot_encoder.fit_transform(encoded_labels.reshape(-1,1))

encoded_labels=onehot_encoded.toarray()

print(encoded_labels)
print("-----------------------------------------")
print(encoded_labels.shape)

['Class_8', 'Class_8', 'Class_8', 'Class_4', 'Class_32', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_8', 'Class_31', 'Class_8', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_8', 'Class_31', 'Class_8', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_32', 'Class_11', 'Class_2', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_51', 'Class_32', 'Class_8', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_8', 'Class_31', 'Class_8', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_8', 'Class_31', 'Class_8', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_32', 'Class_10', 'Class_10', 'Class_11', 'Class_2', 'Class_4', 'Class_32', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_8', 'Class_31', 'Class_8', 'Class_50', 'Class_31', 'Class_8', 'Class_31', 'Class_50', 'Class_8', 'Class_31', 'Class_8', 'Class_50', 'Class_8', 'Class_31', 'Class_31', 'Class_50', 'Class_8'

## cursor 정보를 이용해 라벨링

### cursor 확인

In [23]:
def get_cursor_data(point):
    # cursor 정보는 1024 기준이라서 x2
    top = int(point["top"]) * 2
    left = int(point["left"]) * 2
    height = int(point["height"]) * 2
    width = int(point["width"]) * 2

    return top, left, height, width

In [24]:
def draw_cursor_on_score(image_path, json_path):
    """
    OSMD로 추출한 cursor 위치값을 score에 그려보기
    """
    # 이미지 읽어오기
    image = cv2.imread(image_path)

    # JSON 파일 읽어오기
    with open(json_path, "r") as json_file:
        data = json.load(json_file)

    # 빨간색 네모 그리기
    for cursor in data["cursorList"]:
        for point in cursor:
            top, left, height, width = get_cursor_data(point)
            cv2.rectangle(image, (left+NOTE_PAD, top), (left + width-NOTE_PAD, top + height), (0, 0, 255), 2)
    date_time = get_datetime()
    cv2.imwrite(f"{DATA_FEATURE_PATH}/{title}/{title}-{CURSOR}-{date_time}.png", image)

In [25]:
json_path=f"{DATA_RAW_PATH}/{OSMD}/{title}/{title}.json"
draw_cursor_on_score(score_path, json_path)

### 라벨링

In [26]:
def load_feature_from_csv(csv_file_path):
    df = pd.read_csv(csv_file_path)
    print("csv shape: ", df.shape)
    return df

In [27]:
csv_file_path=get_all_files(f"{DATA_FEATURE_PATH}/{title}", "csv")

# label_feature 없애기
filtered_list = [s for s in csv_file_path if LABELED_FEATURE not in s]
feature_df = load_feature_from_csv(filtered_list[-1])

csv shape:  (40699, 180)


In [28]:
PTICH_COMB_HEIGHT=encoded_labels.shape[1]

In [29]:
"""
1. 먼저 가로로 label df 생성 후
2. feature df + label df.T -> new csv
"""
with open(json_path, "r") as json_file:
    data = json.load(json_file)

# shape: PTICH_HEIGHT x feature concat width(40700)
# label_df = pd.DataFrame(0, index=range(PTICH_COMB_HEIGHT), columns=range(feature_df.shape[0]))

# PTICH_COMB_HEIGHT만큼의 길이를 가지며 끝 번째 원소는 1이고 나머지는 0인 리스트 생성
first_row = [0] * (PTICH_COMB_HEIGHT) + [1]
# DataFrame 생성 및 초기화
label_df = pd.DataFrame([first_row] * feature_df.shape[0])
print(label_df)

label_list=[]
score_leftpad = data["measureList"][0][0]["left"] * 2 # -- stave는 score의 양옆 padding을 자르게 되니, 실제 cursor size와 달라짐. -> 맨 처음 마디의 x 만큼 sliding

cursor_list=0
# cursorList-2d: row 마디 x col 노트 
for i, cursor in enumerate(data["cursorList"]):
    print("len: ", len(cursor))

    for j, point in enumerate(cursor):
        print("row: ", i,", col: ",j)

        top, left, height, width = get_cursor_data(point)
        left += i * STAVE_WIDTH - score_leftpad
        print(i, " : ", left)

        # # -- 노트 인식된 곳에, xml에서 뽑아온 걸 매핑
        # pitch_code = [0]*PTICH_HEIGHT
        # for pitch in pitch_list[cursor_list]:
        #     pitch_idx=PITCH_NOTE2CODE[pitch]
        #     pitch_code[pitch_idx]=1

        
        # [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.] + [0]
        # pitch_code=encoded_labels[cursor_list].append(0)
        pitch_code=np.append(encoded_labels[cursor_list],0)
        print(pitch_code)
        
        right_idx = min(left + width, label_df.shape[1]) # -- shape을 넘길 수 있어서
        
        tmp_width = right_idx - left - 2*NOTE_PAD
        pitch_code_df = [pitch_code.copy() for _ in range(tmp_width)]
        transpose_data = np.transpose(pitch_code_df)
        
        label_df.loc[:, left+NOTE_PAD: left+width-1-NOTE_PAD ] = transpose_data
        cursor_list+=1
        # print(label_df.loc[:, left: left + tmp_width-1])
    print("----------------------------")
    
print("pitch_list len: ", len(pitch_list), "cursor len:", cursor_list)

label_df = np.transpose(label_df)

# 메모리 이슈로 잠시,,, 주석처리..
# merged_df = pd.concat([feature_df, label_df], axis=1)
# save_feature_csv(title, merged_df, LABELED_FEATURE)

# print(label_df)

       0   1   2   3   4   5   6   7   8   9   10  11  12  13  14  15  16  17
0       0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
1       0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
2       0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
3       0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
4       0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
...    ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..
40694   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
40695   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
40696   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
40697   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1
40698   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1

[40699 rows x 18 columns]
len:  27
row:  0 , col:  0
0  :  134


In [30]:
def draw_label_on_cursor(image_path, json_path, label_list):
    """
    OSMD로 추출한 label을 cursor에 그려보기
    """
    # 이미지 읽어오기
    image = cv2.imread(image_path)

    # JSON 파일 읽어오기
    with open(json_path, "r") as json_file:
        data = json.load(json_file)

    cursor_list=0
    # cursorList-2d: row 마디 x col 노트 
    for i, cursor in enumerate(data["cursorList"]):
        for j, point in enumerate(cursor):
            top, left, height, width = get_cursor_data(point)

            # -- 노트 인식된 곳에, xml에서 뽑아온 걸 매핑
            for idx, pitch in enumerate(pitch_list[cursor_list]):
                cv2.putText(image, pitch, (left+NOTE_PAD, top+NOTE_PAD+idx*24), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 1, cv2.LINE_AA)
            
            cursor_list+=1

    date_time = get_datetime()
    cv2.imwrite(f"{DATA_FEATURE_PATH}/{title}/{title}-label-{date_time}.png", image)

In [31]:
cursored_score_path=f"{DATA_FEATURE_PATH}/Rock-ver/Rock-ver-cursor-2024-03-24_02-01-44.png" 
draw_label_on_cursor(score_path, json_path, label_list)

# Train

In [32]:
def split_x_data(data, chunk_size):
    num_samples, num_features = data.shape
    num_chunks = num_samples // chunk_size

    # 나머지 부분을 제외한 데이터만 사용
    data = data[: num_chunks * chunk_size, :]

    # reshape을 통해 3D 배열로 변환
    return data.reshape((num_chunks, chunk_size, num_features, 1))

def split_data(data, chunk_size):
    num_samples, num_features = data.shape
    num_chunks = num_samples // chunk_size

    # 나머지 부분을 제외한 데이터만 사용
    data = data[: num_chunks * chunk_size, :]

    # reshape을 통해 3D 배열로 변환
    return data.reshape((num_chunks, chunk_size, num_features))
    # 데이터 분할을 위한 함수 정의

In [33]:
from sklearn.model_selection import train_test_split

print(label_df)

X = split_x_data(feature_df.to_numpy(), STAVE_WIDTH)
y = split_data(label_df.to_numpy(), STAVE_WIDTH)

# -- split train, val, test
x_train_temp, x_test, y_train_temp, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)
# del X
# del y

x_train_final, x_val_final, y_train_final, y_val_final = train_test_split(
    x_train_temp,
    y_train_temp,
    test_size=0.2,
    random_state=42,
)
del x_train_temp
del y_train_temp

x_train = x_train_final
x_val = x_val_final
x_test = x_test
y_train = y_train_final
y_val = y_val_final
y_test = y_test

print("x_train : ", x_train.shape)
print("y_train : ", y_train.shape)
print("x_val : ", x_val.shape)
print("y_val : ", y_val.shape)
print("x_test : ", x_test.shape)
print("y_test : ", y_test.shape)


    0      1      2      3      4      5      6      7      8      9      ...  \
0       0      0      0      0      0      0      0      0      0      0  ...   
1       0      0      0      0      0      0      0      0      0      0  ...   
2       0      0      0      0      0      0      0      0      0      0  ...   
3       0      0      0      0      0      0      0      0      0      0  ...   
4       0      0      0      0      0      0      0      0      0      0  ...   
5       0      0      0      0      0      0      0      0      0      0  ...   
6       0      0      0      0      0      0      0      0      0      0  ...   
7       0      0      0      0      0      0      0      0      0      0  ...   
8       0      0      0      0      0      0      0      0      0      0  ...   
9       0      0      0      0      0      0      0      0      0      0  ...   
10      0      0      0      0      0      0      0      0      0      0  ...   
11      0      0      0     

ValueError: Found input variables with inconsistent numbers of samples: [21, 0]

In [None]:
from keras.models import Model

import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Input, Bidirectional
from tensorflow.keras.optimizers import RMSprop

n_rows = STAVE_WIDTH
n_columns = STAVE_HEIGHT
n_classes = onehot_encoded.shape[1]
opt_learning_rate= 0.01

input_layer = Input(shape=(n_rows, n_columns))
conv1 = Conv1D(
    filters=32, kernel_size=8, strides=1, activation="relu", padding="same"
)(input_layer)
conv2 = Conv1D(
    filters=32, kernel_size=8, strides=1, activation="relu", padding="same"
)(conv1)
conv3 = Conv1D(
    filters=32, kernel_size=8, strides=1, activation="relu", padding="same"
)(conv2)
lstm1 = Bidirectional(LSTM(32, return_sequences=True))(conv3)
lstm2 = Bidirectional(LSTM(32, return_sequences=True))(lstm1)
lstm3 = Bidirectional(LSTM(32, return_sequences=True))(lstm2)

output_layer = Dense(n_classes, activation="softmax")(lstm3)
model = Model(inputs=input_layer, outputs=output_layer)


# def build_encoder(input_shape):
#     input_layer = Input(shape=input_shape)
#     conv1 = Conv1D(filters=32, kernel_size=8, strides=1, activation="tanh", padding="same")(input_layer)
#     conv2 = Conv1D(filters=32, kernel_size=8, strides=1, activation="tanh", padding="same")(conv1)
#     conv3 = Conv1D(filters=32, kernel_size=8, strides=1, activation="tanh", padding="same")(conv2)
#     lstm1 = Bidirectional(LSTM(32, return_sequences=True))(conv3)
#     lstm2 = Bidirectional(LSTM(32, return_sequences=True))(lstm1)
#     lstm3 = Bidirectional(LSTM(32, return_sequences=True))(lstm2)
#     return Model(inputs=input_layer, outputs=lstm3)

# def build_decoder(encoder_output, n_classes, opt_learning_rate):
#     output_layer = Dense(n_classes, activation="sigmoid")(encoder_output)
#     model = Model(inputs=encoder_output, outputs=output_layer)
#     opt = RMSprop(learning_rate=opt_learning_rate)
#     model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["binary_accuracy"])
#     return model

# # Encoder 모델을 먼저 구축합니다.
# encoder_input_shape = (n_rows, n_columns)
# encoder = build_encoder(encoder_input_shape)
# encoder.summary()

# # Decoder 모델을 구축하기 위해 Encoder의 출력을 사용합니다.
# encoder_output = encoder.output
# decoder = build_decoder(encoder_output, n_classes, opt_learning_rate)
# decoder.summary()

# model = Model(inputs=encoder.input, outputs=decoder(encoder.output))


opt = RMSprop(learning_rate=opt_learning_rate)
model.compile(
    loss="categorical_crossentropy", optimizer=opt, metrics=["binary_accuracy"]
)
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

batch_size=32
training_epochs=40

# Implement model train logic
early_stopping = EarlyStopping(
    monitor="val_loss", patience=3, restore_best_weights=True, mode="auto"
)

history = model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    validation_data=(x_val, y_val),
    epochs=training_epochs,
    callbacks=[early_stopping],
)

stopped_epoch = early_stopping.stopped_epoch
print("--! finish train : stopped_epoch >> ", stopped_epoch, " !--")

# Implement model evaluation logic
print("\n# Evaluate on test data")

results = model.evaluate(
    x_test, y_test, batch_size=batch_size
)
print("test loss:", results[0])
print("test accuracy:", results[1])

"""
-- 학습한 모델 저장하기
"""

# 현재 날짜와 시간 가져오기
date_time = get_datetime()
# 모델 저장
model_path = f"{MODEL_PATH}/{OMR}_{date_time}.h5"
model.save(model_path)
print("--! save model: ", model_path)

y_pred = model.predict(x_test)
print(y_pred)

In [None]:
from typing import List
import matplotlib.pyplot as plt

def show_label_dict_plot(label: dict[str, List[float]], start=0, end=None):
    """
    -- label 그래프
    {
        "HH": [1, 0, 0, ...],
        "ST": [0, 0, 0, ...],
        ...
    }
    """
    if end is None:  # end가 none이라면 y_true 끝까지
        end = len(label[list(label.keys())[0]])  # 첫 번째 value의 길이

    leng = len(PITCH_NOTE2CODE)
    for key, label_arr in label.items():
        data = np.array(label_arr)
        plt.subplot(leng, 1, PITCH_NOTE2CODE[key] + 1)
        plt.plot(data)
        plt.axis([start, end, 0, 0.1])
        plt.title(f"{key}")

    os.makedirs(IMAGE_PATH, exist_ok=True)  # 이미지 폴더 생성
    date_time=get_datetime()
    plt.savefig(f"{IMAGE_PATH}/predict-{date_time}.png")
    plt.show()

In [None]:
def transform_arr2dict(arr_data):
    print("shape:", arr_data.shape)

    result_dict = {}
    for code, drum in CODE2PITCH_NOTE.items():
        # print("--", code, drum)

        result_dict[drum] = [row[code] for row in arr_data]
        print(result_dict[drum])
    return result_dict

def load_models(model_file=None):
    """
    -- method_type과 feature type에 맞는 가장 최근 모델 불러오기
    """
    model_files = glob.glob(f"{MODEL_PATH}/*.h5")
    if model_files is None or len(model_files) == 0:
        print("-- ! No pre-trained model ! --")
        return

    model_files.sort(reverse=True)  # 최신 순으로 정렬
    load_model_file = model_files[0]  # 가장 최근 모델

    if model_file is not None:  # 불러오고자 하는 특정 모델 파일이 있다면
        load_model_file = model_file

    print("-- ! load model: ", load_model_file)
    model = tf.keras.models.load_model(load_model_file)
    return model

def predict_score(stave_path):
    # -- resized image -> binary image
    biImg = transform_img2binaryImg(stave_path)
    biImg = 255-biImg
    biImg = np.transpose(biImg)

    # save_feature_csv("asdf", biImg, FEATURE) # -- 잘 됐는지 눈으로 확인하기 위함..
    feature = split_x_data(biImg, STAVE_WIDTH)

    predict_data = model.predict(feature)
    predict_data = predict_data.reshape((-1, n_classes))
    
    predict_data=onehot_encoder.inverse_transform(predict_data)
    print("onehot후: ", predict_data.shape)

    predict_data=label_encoder.inverse_transform(predict_data)
    print("label_enco후: ", predict_data.shape)

    result_pros=[]
    for pred in predict_data:
        result_pros.append(labels_class[pred])
        print(labels_class[pred])
    
    # result_dict = transform_arr2dict(predict_data)
    show_label_dict_plot(result_dict)

model = load_models()
# predict_score(f"{DATA_TEST_PATH}/Rock-ver_pad-stave_19_2024-03-22_03-03-59.png")
predict_score(f"{DATA_TEST_PATH}/Rock-ver_pad-stave_9_2024-03-24_02-01-38.png")

In [None]:
# image = cv2.imread(src)

# # Contour
# # RETR_EXTERNAL: 그림의 외곽 부분에만 컨투어를 그림
# contours, hierarchy = cv2.findContours(masked, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

# masked_copy=255-masked

# # i=1
# # for contour in contours:
# #     x, y, w, h = cv2.boundingRect(contour)
# #     # -- 여기서 음표만 추출할 수 있도록 길이 설정... 하드코딩이라 방법 생각해내야 함.
# #     if (w>10 and w<=30) and (h>10): 
# #         roi = masked_copy[y:y+h, x:x+w]
# #         # cv2.imwrite('../data/segment-img/segments/save%s.jpg' %i, roi)
# #     i+=1

# for contour in contours:
#     x, y, w, h = cv2.boundingRect(contour)
#     aspect_ratio = w / h
    
#     # # 꼬리가 있는 음표는 가로 세로 비율이 작은 경우로 가정
#     # # if aspect_ratio < 0.5:
#     # # 꼬리의 방향에 따라 stem 결정
#     # if w < h:  # 세로로 긴 꼬리
#     #     stem_x = x + w // 2
#     #     stem_y1 = y + h // 2
#     #     stem_y2 = y + h
#     # else:     # 가로로 긴 꼬리
#     #     stem_x = x + w
#     #     stem_y1 = y + h // 2
#     #     stem_y2 = y + h
#     # cv2.rectangle(image, (stem_x, stem_y1), (stem_x, stem_y2), (0, 0, 255), 2)
#     # if (w>10 and w<=30) and (h>10):
#     cv2.rectangle(image, (x,y,w,h), (0, 0, 255), 2)

# # masked_copy=cv2.drawContours(masked_copy, contours, -1, (255, 0, 0), 3)
# date_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# cv2.imwrite(f"{SAVE_IMAGE_PATH}/scores/{score_title}-score-{date_time}.png", image)

In [None]:
# img = cv2.imread(src)
# img2 = img.copy()
# # 바이너리 이미지로 변환
# imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# ret, imthres = cv2.threshold(imgray, 127, 255, cv2.THRESH_BINARY_INV)

# # 가장 바깥 컨투어만 수집   --- ①
# contour, hierarchy = cv2.findContours(imthres, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
# cv2.drawContours(img, contour, -1, (0,255,0), 3)

# date_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# cv2.imwrite(f"{SAVE_IMAGE_PATH}/segments/{score_title}-segment-{date_time}.png", img)

In [None]:
cv2.waitKey(0)
cv2.destroyAllWindows()