## Music notes detection, crop and sheet of music score

In [None]:
import cv2
import os
import numpy as np

def threshold(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    ret, image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
    return image

def remove_noise(image):
    image = threshold(image) # 이미지 이진화
    mask = np.zeros(image.shape, np.uint8) # 보표 영역만 추출하기 위해 마스크 생성
    cnt, labels, stats, centroids = cv2.connectedComponentsWithStats(image) # 레이블링
    for i in range(1, cnt):
        x, y, w, h, area = stats[i]
        if w > image.shape[1] * 0.5: # 보표 영역에만
            cv2.rectangle(mask, (x,y,w,h), (255,0,0),-1) # 사각형 그리기
    masked_image = cv2.bitwise_and(image,mask)
    return masked_image

def remove_staves(image):
    height, width = image.shape
    staves = [] #오선의 좌표들이 저장될 리스트

    for row in range(height):
        pixels = 0
        for col in range(width):
            pixels += (image[row][col] == 255) # 한 행에 존재하는 흰색 픽셀의 개수를 셈
        if pixels >= width * 0.5: # 이미지 넓이의 50% 이상이라면
            if len(staves) == 0 or abs(staves[-1][0] + staves[-1][1] - row) > 1: # 첫 오선이거나 이전에 검출된 오선과 다른 오선
                staves.append([row, 0]) # 오선 추가 [오선의 y좌표][오선 높이]
            else: # 이전에 검출된 오선과 다른 오선
                staves[-1][1] += 1 # 높이 업데이트

    for staff in range(len(staves)):
        top_pixel = staves[staff][0] # 오선의 최상단 y좌표
        bot_pixel = staves[staff][0] + staves[staff][1] # 오선의 최하단 y 좌표 (오선의 최상단 y좌표 + 오선 높이)
        for col in range(width):
            if image[top_pixel -1][col] == 0 and image[bot_pixel + 1][col] == 0: # 오선 위, 아래로 픽셀이 있는지 타색
                for row in range(top_pixel, bot_pixel + 1):
                    image[row][col] = 0
    return image, [x[0] for x in staves]

def normalization(image, staves, standard):
    avg_distance = 0
    lines = int(len(staves) / 5) #보표의 개수
    for line in range(lines):
        for staff in range(4):
            staff_above = staves[line * 5 + staff]
            staff_below = staves[line * 5 + staff + 1]
            avg_distance += abs(staff_above - staff_below) # 오선의 간격을 누적해서 더해줌
    avg_distance /= len(staves) - lines # 오선 간의 평균 간격
    height, width = image.shape # 이미지의 높이와 넓이
    weight = standard / avg_distance # 기준으로 정한 오선 간격을 이용해 가중치를 구함
    new_width = int(width * weight) # 이미지의 넓이에 가중치를 곱해줌
    new_height = int(height * weight) # 이미지의 높이에 가중치를 곱해줌

    image = cv2.resize(image, (new_width, new_height)) # 이미지 리사이징
    ret, image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) # 이미지 이진화
    staves = [x * weight for x in staves] # 오선 좌표에도 가중치를 곱해줌
    return image, staves

def weighted(value):
    standard = 10
    return int(value * (standard / 10))

def closing(image):
    kernel = np.ones((weighted(5), weighted(5)), np.uint8)
    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)
    return image

def put_text(image, text, loc):
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(image, str(text), loc, font, 0.6, (255,0,0), 2)

def get_center(y, h):
    return (y+y+h) / 2


def object_detection(image, staves):
    lines = int(len(staves) / 5) #보표의 개수
    objects = [] # 구성요소 정보가 저장될 리스트

    closing_image = closing(image)
    cnt, labels, stats, centroids = cv2.connectedComponentsWithStats(closing_image) # 모든 객체 검출
    for i in range(1, cnt):
        (x, y, w, h, area) = stats[i]
        if w >= weighted(5) and h >= weighted(5): # 악보의 구성요소가 되기 위한 넓이, 높이 조건
            center = get_center(y, h)
            #cv2.rectangle(image, (x,y,w,h),(255,0,0),1)
            box = (x,y,w,h)
            crop = image[y-1:y+h+1, x-1:x+w+1]
            title = '/content/dataset/Note94_c{0}{1}{2}{3}.jpg'.format(x,y,w,h)
            crop = 255-crop
            cv2.imwrite(title,crop)
            for line in range(lines):
                area_top = staves[line*5] - weighted(20) # 위치 조건(상단)
                area_bot = staves[(line +1)*5 -1] + weighted(20) # 위치 조건 (하단)
                if area_top <= center <= area_bot:
                    objects.append([line, (x,y,w,h, area)]) # 객체 리스트에 보표 번호와 객체의 정보(위치, 크기)를 추가
    objects.sort() # 보표 번호 -> x 좌표 순으로 오름차순 정렬
    return image, objects 

VERTICAL = True 
HORIZONTAL = False 

def get_line(image, axis, axis_value, start, end, length):
    if axis:
        points = [(i, axis_value) for i in range(start, end)]
    else:
        points = [(axis_value, i) for i in range(start, end)]
    pixels = 0
    for i in range(len(points)):
        (y, x) = points[i]
        pixels += (image[y][x] == 255)
        next_point = image[y+1][x] if axis else image[y][x+1]
        if next_point == 0 or i == len(points) - 1:
            if pixels >= weighted(length):
                break
            else:
                pixles = 0
    return y if axis else x, pixels


def stem_detection(image, stats, length):
    (x, y, w, h, area) = stats
    stems = [] 
    for col in range(x, x+w):
        end, pixels = get_line(image, VERTICAL, col, y, y+h, length)
        if pixels:
            if len(stems) == 0 or abs(stems[-1][0] + stems[-1][2] - col) >= 1:
                (x, y, w, h) = col, end - pixels +1, 1, pixels
                stems.append([x,y,w,h])
            else:
                stems[-1][2] +=1 
    return stems

def object_analysis(image, objects):
    for obj in objects:
        stats = obj[1]
        stems = stem_detection(image, stats, 30)
        direction = None 
        if len(stems) > 0:
            if stems[0][0] - stats[0] >= weighted(5):
                direction = True 
            else:
                direction = False 
        obj.append(stems)
        obj.append(direction)
    return image, objects

def recognize_key(image, staves, stats):
    (x,y,w,h,area) = stats 
    ts_conditions = (
        staves[0] + weighted(5) >= y >= staves[0] - weighted(5) and 
        staves[4] + weighted(5) >= y+h >= staves[4] - weighted(5) and 
        staves[2] + weighted(5) >= get_center(y,h) >= staves[2] - weighted(5) and 
        weighted(18) >= w >= weighted(10) and 
        weighted(45) >= h >= weighted(35)
    )
    if ts_conditions:
        return True, 0
    else:
        stems = stem_detection(image, stats, 20)
        if stems[0][0] - x >= weighted(3):
            key = int(10 * len(stems) / 2)
        else: 
            key = 100 * len(stems)
    return False, key

def recognition(image, staves, objects):
    key = 0 
    time_signature = False 
    beats = [] # 박자 리스트
    pitches = [] #음이름 리스트

    for i in range(1, len(objects)):
        obj = objects[i]
        line = obj[0]
        stats = obj[1]
        stems = obj[2]
        direction = obj[3]
        (x,y,w,h,area) = stats 
        staff = staves[line * 5: (line+1)*5]
        if not time_signature: #조표가 완전히 탐색되지 않음
            ts, temp_key = recognize_key(image, staff, stats)
            time_signature = ts 
            key += temp_key 
        else: # 조표가 완전히 탐색되었음
            notes = recognize_note(image, staff, stats, stems, direction)
            if len(notes[0]):
                for beat in notes[0]:
                    beats.append(beat)
                for pitch in notes[1]:
                    pitches.append(pitch)
        cv2.rectangle(image, (x,y,w,h), (255,0,0), 1)
        put_text(image, i, (x,y-weighted(20)))
    return image, key, beats, pitches

def count_rect_pixels(image, rect):
    x,y,w,h = rect
    pixels = 0
    for row in range(y, y+h):
        for col in range(x, x+w):
            if image[row][col] == 255:
                pixels += 1
    return pixels


def recognize_note_head(image, stem, direction):
    (x, y, w, h) = stem
    if direction:
        area_top = y + h - weighted(7)
        area_bot = y + h + weighted(7)
        area_left = x - weighted(14)
        area_right = x
    else:
        area_top = y - weighted(7)
        area_bot = y + weighted(7)
        area_left = x + w
        area_right = x + w + weighted(14)
   
    cnt = 0
    cnt_max = 0
    head_center= 0
    pixel_cnt = count_rect_pixels(image, (area_left, area_top, area_right - area_left, area_bot-area_top))
    for row in range(area_top, area_bot):
        col, pixels = get_line(image, HORIZONTAL, row, area_left, area_right, 5)
        pixels += 1
        if pixels >= weighted(5):
            cnt+= 1
            cnt_max = max(cnt_max, pixels)
            head_center += row
    head_exist = (cnt >= 3 and pixel_cnt >= 50)
    head_fill = (cnt >= 8 and cnt_max >= 9 and pixel_cnt >= 80)
    head_center = head_center/cnt
    return head_exist, head_fill, head_center

def count_pixels_part(image, area_top, area_bot, area_col):
    cnt = 0
    flag = False
    for row in range(area_top, area_bot):
        if not flag and image[row][area_col] == 255:
            flag = True
            cnt+=1
        elif flag and image[row][area_col] == 0:
            flag = False
    return cnt


def recognize_note_tail(image, index, stem, direction):
    (x,y,w,h) = stem
    if direction:
        area_top = y
        area_bot = y+h-weighted(15)
    else: 
        area_top = y+weighted(15)
        area_bot = y+h
    if index:
        area_col = x - weighted(4)
    else: 
        area_col = x+w+weighted(4)
    cnt = count_pixels_part(image, area_top, area_bot, area_col)
    return cnt

def recognize_note_dot(image, stem, direction, tail_cnt, stems_cnt):
    (x,y,w,h) = stem
    if direction:
        area_top = y+h-weighted(10)
        area_bot = y+h+weighted(5)
        area_left = x+w+weighted(2)
        area_right = x+w+weighted(12)
    else:
        area_top = y-weighted(10)
        area_bot = y+weighted(5)
        area_left = x+w+weighted(14)
        area_right = x+w+weighted(24)
    dot_rect = (
        area_left, 
        area_top, 
        area_right - area_left,
        area_bot - area_top
    )
    pixels = count_rect_pixels(image, dot_rect)
    threshold = (10,15,20,30)
    if direction and stems_cnt == 1:
        return pixels >= weighted(threshold[tail_cnt])
    else:
        return pixels >= weighted(threshold[0])


def recognize_note(image, staff, stats, stems, direction):
    (x,y,w,h,area) = stats
    notes = []
    pitches = []
    note_condition = (
        len(stems) and 
        w >= weighted(10) and # 넓이 조건 
        h >= weighted(35) and # 높이 조건
        area >= weighted(95) # 픽셀 개수 조건
    )
    if note_condition:
        for i in range(len(stems)):
            stem = stems[i]
            head_exist, head_fill, head_center = recognize_note_head(image, stem, direction)
            if head_exist:
                tail_cnt = recognize_note_tail(image, i, stem, direction)
                dot_exist = recognize_note_dot(image, stem, direction, len(stems), tail_cnt )
                note_classification = (
                    ((not head_fill and tail_cnt ==0 and not dot_exist), 2),
                    ((not head_fill and tail_cnt ==0 and dot_exist), -2),
                    ((head_fill and tail_cnt ==0 and not dot_exist), 4),
                    ((head_fill and tail_cnt ==0 and dot_exist), -4),
                    ((head_fill and tail_cnt ==1 and not dot_exist), 8),
                    ((head_fill and tail_cnt ==1 and dot_exist), -8),
                    ((head_fill and tail_cnt ==2 and not dot_exist), 16),
                    ((head_fill and tail_cnt ==2 and dot_exist), -16),
                    ((head_fill and tail_cnt ==3 and not dot_exist), 32),
                    ((head_fill and tail_cnt ==3 and dot_exist), -32)
                )
                for j in range(len(note_classification)):
                    if note_classification[j][0]:
                        note = note_classification[j][1]
                        notes.append(note)
                        put_text(image, note, (stem[0]- weighted(10), stem[1]+stem[3]+weighted(30)))
                        break

    return notes, pitches






img = cv2. imread('Note22.png', cv2.IMREAD_COLOR)
# 1. 보표 영역 추출 및 그 외 노이즈 제거
image_1 = remove_noise(img)

# 2. 오선 제거
image_2, staves = remove_staves(image_1)

# 3. 악보 이미지 정규화
image_3, staves = normalization(image_2, staves, 10)

# 4. 객체 검출 과정
image_4, objects = object_detection(image_3, staves)

# 5. 객체 분석 과정
#image_5, objects = object_analysis(image_4, objects)

# 6. 인식 과정
#image_6, key, beats, pitches = recognition(image_5, staves, objects)

cv2.imwrite("remove22.png",image_4)



True

## Add margin to image

In [None]:
from PIL import Image
  
image = Image.open("remove22.png")


right = 50
left = 50
top = 50
bottom = 50
  
width, height = image.size
  
new_width = width + right + left
new_height = height + top + bottom
  
result = Image.new("RGB", (new_width, new_height), (255, 255, 255))
  
result.paste(image, (left, top))
  
result.save('mNote_output2.png')

TypeError: ignored

## Add margin to multi-images

In [None]:
#%cd /content/pytorch-CycleGAN-and-pix2pix/datasets
!unzip -qq "/content/dataset/nntrain.zip"

In [None]:
import os 
import numpy as np
from PIL import Image


#%cd /content/dataset/
image_path = '/content/nntrain/'
img_list = os.listdir(image_path)
img_list_jpg = [img for img in img_list if img.endswith(".jpg")] 
os.chdir('/content/nntrain/')
print(img_list[1])
for i in img_list_jpg:
    image = Image.open(i)
    right = 25
    left = 25
    top = 25
    bottom = 25

    width, height = image.size
    new_width = width + right + left
    new_height = height + top + bottom

    result = Image.new("RGB", (new_width, new_height), (255, 255, 255))
    result.paste(image, (left, top))
    result.save('/content/nntest/nn{0}'.format(i))

Note3_c9096621340.jpg


## Zip results of margin add image

In [None]:
!zip -r /content/nntest.zip /content/nntest/

  adding: content/nntest/ (stored 0%)
  adding: content/nntest/nnc1416761441.jpg (deflated 25%)
  adding: content/nntest/nnNote10_c10133291341.jpg (deflated 27%)
  adding: content/nntest/nnNote11_c9774415748.jpg (deflated 25%)
  adding: content/nntest/nnNote23_c1881721440.jpg (deflated 25%)
  adding: content/nntest/nnNote10_c4843104749_2.jpg (deflated 20%)
  adding: content/nntest/nnNote8_c4191011541.jpg (deflated 20%)
  adding: content/nntest/nnNote10_c10886941441.jpg (deflated 25%)
  adding: content/nntest/nnNote11_c10143321441.jpg (deflated 27%)
  adding: content/nntest/nnNote1_c1616821972.jpg (deflated 21%)
  adding: content/nntest/nnNote22_c8148741242.jpg (deflated 26%)
  adding: content/nntest/nnc6132311340.jpg (deflated 28%)
  adding: content/nntest/nnNote11_c9988241440.jpg (deflated 25%)
  adding: content/nntest/nnNote4_c5905373343_1.jpg (deflated 24%)
  adding: content/nntest/nnNote11_c2969185452.jpg (deflated 24%)
  adding: content/nntest/nnNote10_c4769287547.jpg (deflated 22