# 어텐션 LSTM 모델에서 데이터로 사용할 pickle 파일을 생성하는 과정

# 기본 경로 설정

In [1]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd /content/drive/MyDrive/Project_Music_KW/preprocessing

/content/drive/MyDrive/Project_Music_KW/preprocessing


# 패키지 임포트

In [3]:
import glob
import pickle
import numpy as np
from music21 import *

# 테스트

In [4]:
# 악보 중 피아노 파트를 출력하는 테스트
file = '/content/drive/MyDrive/Project_Music_KW/preprocessing/data/midi_songs/electric_de_chocobo.mid'

score = converter.parse(file)
s = instrument.partitionByInstrument(score)
s.parts[0].show('text')

{0.0} <music21.instrument.Piano Piano>
{0.0} <music21.tempo.MetronomeMark vivace Quarter=160.0>
{0.0} <music21.note.Rest rest>
{0.0} <music21.note.Note C#>
{0.25} <music21.note.Note C>
{0.5} <music21.note.Note C>
{0.75} <music21.note.Note B>
{1.0} <music21.note.Note B>
{1.25} <music21.note.Note B->
{1.5} <music21.note.Note B->
{1.75} <music21.note.Note A>
{2.0} <music21.note.Note A>
{2.25} <music21.note.Note G#>
{2.5} <music21.note.Note G#>
{2.75} <music21.note.Note G>
{3.0} <music21.note.Note G>
{3.25} <music21.note.Note F#>
{3.5} <music21.note.Note F#>
{3.75} <music21.note.Note F>
{4.0} <music21.note.Note E>
{4.5} <music21.note.Note B>
{5.0} <music21.note.Note G>
{5.5} <music21.note.Note B>
{6.0} <music21.note.Note E>
{6.5} <music21.note.Note B>
{7.0} <music21.note.Note G>
{7.5} <music21.note.Note B>
{8.0} <music21.note.Note E>
{8.5} <music21.note.Note B>
{9.0} <music21.note.Note G>
{9.5} <music21.note.Note B>
{10.0} <music21.note.Note E>
{10.5} <music21.note.Note B>
{11.0} <music21.

# mid 파일을 pickle 파일로 변환

In [5]:
# 데이터 이름
data_name = 'Rachmaninov'
pickle_name = 'Rachmaninov_pickle'

In [15]:
notes = []
durations = []
seq_len = 100

for file in glob.glob("./data/{}/*.mid".format(data_name)):
  
  midi = converter.parse(file)      
  print("Parsing %s" % file)

  notes_to_parse = None

  try: # file has instrument parts
    s2 = instrument.partitionByInstrument(midi)
    notes_to_parse = s2.parts[0].recurse() 
  except: # file has notes in a flat structure
    notes_to_parse = midi.flat.notes

  notes.extend(['START'] * seq_len)
  durations.extend([0]* seq_len)

  for element in notes_to_parse:            
    if isinstance(element, note.Note):
      if element.isRest:
        notes.append(str(element.name))
        durations.append(element.duration.quarterLength)
      else:
        notes.append(str(element.nameWithOctave))
        durations.append(element.duration.quarterLength)
    if isinstance(element, chord.Chord):
      notes.append('.'.join(n.nameWithOctave for n in element.pitches))
      durations.append(element.duration.quarterLength)

with open('./data/{}_notes'.format(pickle_name), 'wb') as f:
  pickle.dump(notes, f) #['G2', 'D3', 'B3', 'A3', 'B3', 'D3', 'B3', 'D3', 'G2',...]
with open('./data/{}_durations'.format(pickle_name), 'wb') as f:
  pickle.dump(durations, f) 

Parsing ./data/Rachmaninov/rac_op3_2_format0.mid
Parsing ./data/Rachmaninov/rac_op23_3_format0.mid
Parsing ./data/Rachmaninov/rac_op23_2_format0.mid
Parsing ./data/Rachmaninov/rac_op23_5_format0.mid
Parsing ./data/Rachmaninov/rac_op32_1_format0.mid
Parsing ./data/Rachmaninov/rac_op32_13_format0.mid
Parsing ./data/Rachmaninov/rac_op33_5_format0.mid
Parsing ./data/Rachmaninov/rac_op33_8_format0.mid
Parsing ./data/Rachmaninov/rac_op33_6_format0.mid


In [16]:
def get_distinct(elements):
    # Get all pitch names
    element_names = sorted(set(elements))
    n_elements = len(element_names)
    return (element_names, n_elements)

def create_lookups(element_names):
    # create dictionary to map notes and durations to integers
    element_to_int = dict((element, number) for number, element in enumerate(element_names))
    int_to_element = dict((number, element) for number, element in enumerate(element_names))

    return (element_to_int, int_to_element)

In [17]:
# 고유한 음표와 박자 얻어오기
note_names, n_notes = get_distinct(notes)
duration_names, n_durations = get_distinct(durations)
distincts = [note_names, n_notes, duration_names, n_durations]

with open('./data/{}_distincts'.format(pickle_name), 'wb') as f:
    pickle.dump(distincts, f)

# 음표와 박자 룩업 딕셔너리 만들고 저장하기
note_to_int, int_to_note = create_lookups(note_names)
duration_to_int, int_to_duration = create_lookups(duration_names)
lookups = [note_to_int, int_to_note, duration_to_int, int_to_duration]

with open('./data/{}_lookups'.format(pickle_name), 'wb') as f:
    pickle.dump(lookups, f)

In [18]:
notes

['START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
 'START',
