# Get flatten tokens

In [1]:
import pickle
import random
import json
from tqdm import tqdm
import pandas as pd

In [2]:
with open('../hierarchy/data/tree_tokens.pkl', 'rb') as f:
    tree_tokens_dict = pickle.load(f)
    
len(tree_tokens_dict)

1714

In [3]:
def event2df(event):
    keys = list(event[0].keys())

    items_dict = dict()
    for k in keys:
        items_dict[k] = [e[k] for e in event]

    return pd.DataFrame(data=items_dict)

In [4]:
flatten_tokens_dict = dict()

for sid, tree_tokens in tqdm(tree_tokens_dict.items()):
    flatten_tokens = []
    for t in tree_tokens:
        if type(t) != list:
            flatten_tokens.append(t)
            continue
        
        for tt in t:
            if type(tt) != list :
                flatten_tokens.append(tt)
                continue
            flatten_tokens.extend(tt)
            
    flatten_tokens_dict[sid] = flatten_tokens

100%|██████████| 1714/1714 [00:00<00:00, 11936.98it/s]


In [5]:
with open('./data/Base/flatten_tokens_dict.pkl', 'wb') as f:
    pickle.dump(flatten_tokens_dict, f)

# Statistics

In [6]:
import pickle
import random
import json
from tqdm import tqdm
import pandas as pd

In [7]:
with open('./data/Base/flatten_tokens_dict.pkl', 'rb') as f:
    tokens_dict = pickle.load(f)
    
len(tokens_dict)

1714

In [8]:
def event2df(event):
    keys = list(event[0].keys())

    items_dict = dict()
    for k in keys:
        items_dict[k] = [e[k] for e in event]

    return pd.DataFrame(data=items_dict)

In [9]:
sids = list(tokens_dict.keys())

In [10]:
sid = random.sample(sids, 1)[0]
df = event2df(tokens_dict[sid])
df

Unnamed: 0,type,bar,pos,time,tempo,structure,chord,track,pitch,duration
0,Boundary,<SOS>,0,0.00000,0,0,0,0,0,0
1,Tempo,1,Beat_0,0.00000,100,0,0,0,0,0
2,Structure,<CONTI>,Beat_7,0.00349,0,i,0,0,0,0
3,Chord,<CONTI>,Beat_7,0.00000,<CONTI>,<CONTI>,Eb:min,0,0,0
4,Note,<CONTI>,Beat_11,0.00000,<CONTI>,<CONTI>,<CONTI>,PIANO,51,1680
...,...,...,...,...,...,...,...,...,...,...
4031,Note,<CONTI>,Beat_15,0.00000,<CONTI>,<CONTI>,<CONTI>,PIANO,56,240
4032,Note,<CONTI>,Beat_15,0.00000,<CONTI>,<CONTI>,<CONTI>,PIANO,68,480
4033,Note,<CONTI>,Beat_15,0.00000,<CONTI>,<CONTI>,<CONTI>,PIANO,80,480
4034,Note,1,Beat_1,0.00000,<CONTI>,<CONTI>,<CONTI>,PIANO,58,480


In [11]:
song_ids = sids
seq_num = [len(tokens_dict[sid]) for sid in sids]
df = pd.DataFrame({'sid': song_ids, 'seq_num': seq_num})
df

Unnamed: 0,sid,seq_num
0,1_1,1725
1,1_2,1728
2,2_1,1532
3,2_2,1535
4,3_1,1998
...,...,...
1709,907_2,1323
1710,908_1,1683
1711,908_2,1683
1712,909_1,1077


In [12]:
df.describe()

Unnamed: 0,seq_num
count,1714.0
mean,1843.954492
std,444.731872
min,200.0
25%,1549.5
50%,1803.5
75%,2090.0
max,4450.0


In [13]:
MAX_SEQ_LEN = 2560 + 1
selected_df = df[(df['seq_num'] <= MAX_SEQ_LEN)]
len(selected_df), len(df), len(selected_df) / len(df)

(1620, 1714, 0.9451575262543758)

In [14]:
selected_sids = selected_df['sid'].tolist()
# selected_sids.sort()

sid = random.sample(selected_sids, 1)[0]
print(sid)
df = event2df(tokens_dict[sid])
df.head(30)

636_1


Unnamed: 0,type,bar,pos,time,tempo,structure,chord,track,pitch,duration
0,Boundary,<SOS>,0,0.0,0,0,0,0,0,0
1,Tempo,1,Beat_0,0.0,82,0,0,0,0,0
2,Structure,<CONTI>,Beat_3,0.002113,0,i,0,0,0,0
3,Chord,<CONTI>,Beat_3,-0.058333,<CONTI>,<CONTI>,F#:min,0,0,0
4,Note,<CONTI>,Beat_3,-0.058333,<CONTI>,<CONTI>,<CONTI>,BRIDGE,73,840
5,Note,<CONTI>,Beat_3,-0.058333,<CONTI>,<CONTI>,<CONTI>,PIANO,42,960
6,Note,<CONTI>,Beat_3,-0.058333,<CONTI>,<CONTI>,<CONTI>,PIANO,54,360
7,Note,<CONTI>,Beat_3,-0.058333,<CONTI>,<CONTI>,<CONTI>,PIANO,57,360
8,Note,<CONTI>,Beat_3,-0.058333,<CONTI>,<CONTI>,<CONTI>,PIANO,61,360
9,Note,<CONTI>,Beat_5,-0.058333,<CONTI>,<CONTI>,<CONTI>,PIANO,49,360


In [15]:
len(selected_sids), selected_sids[:5]

(1620, ['1_1', '1_2', '2_1', '2_2', '3_1'])

In [16]:
with open('./data/Base/valid_song_ids.pkl', 'wb') as f:
    pickle.dump(selected_sids, f)

# Tokens 2 Vector

In [1]:
from tqdm import tqdm
import random
import pretty_midi
import muspy
import mido
import pandas as pd
import os
import numpy as np
import collections
import pickle
import json

In [2]:
with open('./data/Base/valid_song_ids.pkl', 'rb') as f:
    selected_sids = pickle.load(f)

with open('./data/Base/flatten_tokens_dict.pkl', 'rb') as f:
    tokens_dict = pickle.load(f)

print(len(selected_sids), len(tokens_dict),
      len(selected_sids) / len(tokens_dict))

tokens_dict = {k: tokens_dict[k] for k in selected_sids}
len(tokens_dict)

1620 1714 0.9451575262543758


1620

In [3]:
pad_event = {'type': 0, 'bar': 0, 'pos': 0, 'time': 0, 'tempo': 0,
             'structure': 0, 'chord': 0, 'track': 0, 'pitch': 0, 'duration': 0}


def create_pad_event(t):
    event = pad_event.copy()
    event['type'] = t
    return event

In [4]:
MAX_SEQ_LEN = 2560 + 1

pad_event = create_pad_event('PAD')

for k in tqdm(selected_sids):
    assert len(tokens_dict[k]) <= MAX_SEQ_LEN
    tokens_dict[k] += [pad_event for _ in range(MAX_SEQ_LEN - len(tokens_dict[k]))]

100%|██████████| 1620/1620 [00:00<00:00, 18154.59it/s]


In [5]:
flatten_tokens_dict = tokens_dict

for v in flatten_tokens_dict.values():
    assert len(v) == MAX_SEQ_LEN

len(flatten_tokens_dict)

1620

In [6]:
def event2df(event):
    keys = list(event[0].keys())

    items_dict = dict()
    for k in keys:
        items_dict[k] = [e[k] for e in event]

    return pd.DataFrame(data=items_dict)

In [7]:
df = event2df(flatten_tokens_dict['100_2'])
df

Unnamed: 0,type,bar,pos,time,tempo,structure,chord,track,pitch,duration
0,Boundary,<SOS>,0,0.000000,0,0,0,0,0,0
1,Tempo,1,Beat_0,0.000000,61,0,0,0,0,0
2,Structure,<CONTI>,Beat_2,0.001668,0,i,0,0,0,0
3,Chord,<CONTI>,Beat_2,-0.158333,<CONTI>,<CONTI>,B:maj,0,0,0
4,Note,<CONTI>,Beat_2,-0.158333,<CONTI>,<CONTI>,<CONTI>,PIANO,47,960
...,...,...,...,...,...,...,...,...,...,...
2556,PAD,0,0,0.000000,0,0,0,0,0,0
2557,PAD,0,0,0.000000,0,0,0,0,0,0
2558,PAD,0,0,0.000000,0,0,0,0,0,0
2559,PAD,0,0,0.000000,0,0,0,0,0,0


In [8]:
df[df['type']!='PAD']

Unnamed: 0,type,bar,pos,time,tempo,structure,chord,track,pitch,duration
0,Boundary,<SOS>,0,0.000000,0,0,0,0,0,0
1,Tempo,1,Beat_0,0.000000,61,0,0,0,0,0
2,Structure,<CONTI>,Beat_2,0.001668,0,i,0,0,0,0
3,Chord,<CONTI>,Beat_2,-0.158333,<CONTI>,<CONTI>,B:maj,0,0,0
4,Note,<CONTI>,Beat_2,-0.158333,<CONTI>,<CONTI>,<CONTI>,PIANO,47,960
...,...,...,...,...,...,...,...,...,...,...
1944,Note,<CONTI>,Beat_1,-0.091667,<CONTI>,<CONTI>,<CONTI>,PIANO,71,0
1945,Note,<CONTI>,Beat_1,0.233333,<CONTI>,<CONTI>,<CONTI>,PIANO,59,1800
1946,Note,<CONTI>,Beat_2,-0.441667,<CONTI>,<CONTI>,<CONTI>,PIANO,71,120
1947,Note,<CONTI>,Beat_3,-0.283333,<CONTI>,<CONTI>,<CONTI>,PIANO,71,1680


In [24]:
class_keys = list(flatten_tokens_dict['1_1'][0].keys())
if 'time' in class_keys:
    class_keys.remove('time')

class_keys, len(class_keys)

(['type',
  'bar',
  'pos',
  'tempo',
  'structure',
  'chord',
  'track',
  'pitch',
  'duration'],
 9)

In [25]:
# define dictionary
event2word = {}
word2event = {}

corpus_kv = collections.defaultdict(list)
for k in tqdm(flatten_tokens_dict):
    for event in flatten_tokens_dict[k]:
        for key in class_keys:
            corpus_kv[key].append(event[key])

for ckey in class_keys:
    class_unique_vals = sorted(
        set(corpus_kv[ckey]), key=lambda x: (not isinstance(x, int), x))
    event2word[ckey] = {key: i for i, key in enumerate(class_unique_vals)}
    word2event[ckey] = {i: key for i, key in enumerate(class_unique_vals)}

# print
print('[class size]')
for key in class_keys:
    print(' > {:10s}: {}'.format(key, len(event2word[key])))

100%|██████████| 1620/1620 [00:05<00:00, 293.04it/s]


[class size]
 > type      : 6
 > bar       : 8
 > pos       : 17
 > tempo     : 94
 > structure : 15
 > chord     : 322
 > track     : 4
 > pitch     : 83
 > duration  : 81


In [26]:
event2word

{'type': {'Boundary': 0,
  'Chord': 1,
  'Note': 2,
  'PAD': 3,
  'Structure': 4,
  'Tempo': 5},
 'bar': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, '<CONTI>': 5, '<EOS>': 6, '<SOS>': 7},
 'pos': {0: 0,
  'Beat_0': 1,
  'Beat_1': 2,
  'Beat_10': 3,
  'Beat_11': 4,
  'Beat_12': 5,
  'Beat_13': 6,
  'Beat_14': 7,
  'Beat_15': 8,
  'Beat_2': 9,
  'Beat_3': 10,
  'Beat_4': 11,
  'Beat_5': 12,
  'Beat_6': 13,
  'Beat_7': 14,
  'Beat_8': 15,
  'Beat_9': 16},
 'tempo': {0: 0,
  42: 1,
  45: 2,
  46: 3,
  50: 4,
  51: 5,
  52: 6,
  53: 7,
  54: 8,
  55: 9,
  56: 10,
  57: 11,
  58: 12,
  59: 13,
  60: 14,
  61: 15,
  62: 16,
  63: 17,
  64: 18,
  65: 19,
  66: 20,
  67: 21,
  68: 22,
  69: 23,
  70: 24,
  71: 25,
  72: 26,
  73: 27,
  74: 28,
  75: 29,
  76: 30,
  77: 31,
  78: 32,
  79: 33,
  80: 34,
  81: 35,
  82: 36,
  83: 37,
  84: 38,
  85: 39,
  86: 40,
  87: 41,
  88: 42,
  89: 43,
  90: 44,
  91: 45,
  92: 46,
  93: 47,
  94: 48,
  95: 49,
  96: 50,
  97: 51,
  98: 52,
  100: 53,
  101: 54,
  102:

In [27]:
word2event

{'type': {0: 'Boundary',
  1: 'Chord',
  2: 'Note',
  3: 'PAD',
  4: 'Structure',
  5: 'Tempo'},
 'bar': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: '<CONTI>', 6: '<EOS>', 7: '<SOS>'},
 'pos': {0: 0,
  1: 'Beat_0',
  2: 'Beat_1',
  3: 'Beat_10',
  4: 'Beat_11',
  5: 'Beat_12',
  6: 'Beat_13',
  7: 'Beat_14',
  8: 'Beat_15',
  9: 'Beat_2',
  10: 'Beat_3',
  11: 'Beat_4',
  12: 'Beat_5',
  13: 'Beat_6',
  14: 'Beat_7',
  15: 'Beat_8',
  16: 'Beat_9'},
 'tempo': {0: 0,
  1: 42,
  2: 45,
  3: 46,
  4: 50,
  5: 51,
  6: 52,
  7: 53,
  8: 54,
  9: 55,
  10: 56,
  11: 57,
  12: 58,
  13: 59,
  14: 60,
  15: 61,
  16: 62,
  17: 63,
  18: 64,
  19: 65,
  20: 66,
  21: 67,
  22: 68,
  23: 69,
  24: 70,
  25: 71,
  26: 72,
  27: 73,
  28: 74,
  29: 75,
  30: 76,
  31: 77,
  32: 78,
  33: 79,
  34: 80,
  35: 81,
  36: 82,
  37: 83,
  38: 84,
  39: 85,
  40: 86,
  41: 87,
  42: 88,
  43: 89,
  44: 90,
  45: 91,
  46: 92,
  47: 93,
  48: 94,
  49: 95,
  50: 96,
  51: 97,
  52: 98,
  53: 100,
  54: 101,
  55: 

In [28]:
with open('./data/Base/dictionary.pkl', 'wb') as f:
    pickle.dump((event2word, word2event), f)

In [29]:
class_keys

['type',
 'bar',
 'pos',
 'tempo',
 'structure',
 'chord',
 'track',
 'pitch',
 'duration']

In [30]:
vector = dict()
mask = dict()

for sid in tqdm(tokens_dict):
    arr = np.zeros((MAX_SEQ_LEN, len(class_keys)))
    m = np.ones(MAX_SEQ_LEN)
    
    for i, token in enumerate(tokens_dict[sid]):
        # padded
        if token['type'] == 'PAD':
            m[i] = 0
      
        for k, key in enumerate(class_keys):
            arr[i, k] = event2word[key][token[key]]
    
    vector[sid] = arr
    mask[sid] = m

100%|██████████| 1620/1620 [00:10<00:00, 151.85it/s]


In [31]:
with open('./data/Base/vector.pkl', 'wb') as f:
    pickle.dump(vector, f)
    
with open('./data/Base/mask.pkl', 'wb') as f:
    pickle.dump(mask, f)

In [32]:
vector['1_1']

array([[0., 7., 0., ..., 0., 0., 0.],
       [5., 1., 1., ..., 0., 0., 0.],
       [4., 5., 1., ..., 0., 0., 0.],
       ...,
       [3., 0., 0., ..., 0., 0., 0.],
       [3., 0., 0., ..., 0., 0., 0.],
       [3., 0., 0., ..., 0., 0., 0.]])

In [33]:
mask['1_1']

array([1., 1., 1., ..., 0., 0., 0.])

# Compile

## 划分训练/测试数据

In [1]:
import os
import json
import pickle
import numpy as np
import random 
from tqdm import tqdm

In [2]:
with open('./data/Base/vector.pkl', 'rb') as f:
    vector = pickle.load(f)
    
with open('./data/Base/mask.pkl', 'rb') as f:
    mask = pickle.load(f)
    
len(vector), len(mask)

(1620, 1620)

In [3]:
with open('./data/Base/valid_song_ids.pkl', 'rb') as f:
    song_ids = pickle.load(f)

# split train/test
random.seed(1)
validation_songs = random.sample(song_ids, 5)

len(song_ids), validation_songs

(1620, ['156_1', '649_2', '881_1', '73_2', '295_1'])

In [4]:
vector[validation_songs[0]].shape, mask[validation_songs[0]].shape

((2561, 9), (2561,))

In [5]:
# init
x_list = []
y_list = []
mask_list = []

# process
for sid, words in tqdm(vector.items()):
    x = words[:-1].copy()
    y = words[1:].copy()
    m = mask[sid][:-1].copy()

    # collect
    x_list.append(x)
    y_list.append(y)
    mask_list.append(m)

100%|██████████| 1620/1620 [00:00<00:00, 5093.63it/s]


In [6]:
x_final = np.array(x_list)
y_final = np.array(y_list)
mask_final = np.array(mask_list)

x_final.shape, y_final.shape, mask_final.shape

((1620, 2560, 9), (1620, 2560, 9), (1620, 2560))

In [7]:
test_idx = [song_ids.index(sid) for sid in validation_songs]
train_idx = [song_ids.index(
    sid) for sid in song_ids if sid not in validation_songs]

test_idx = np.array(test_idx)
train_idx = np.array(train_idx)

test_idx.shape, train_idx.shape

((5,), (1615,))

In [8]:
test_idx

array([ 275, 1165, 1564,  129,  522])

In [9]:
with open('./data/Base/experimental_song_ids.pkl', 'wb') as f:
    pickle.dump({'song_ids': song_ids,
                 'val_song_ids': validation_songs}, f)

In [10]:
# save train
path_train = os.path.join('./data/Base/', 'train_data.npz')
np.savez(
    path_train, 
    x=x_final[train_idx],
    y=y_final[train_idx],
    mask=mask_final[train_idx]
)

# save test
path_test = os.path.join('./data/Base/', 'test_data.npz')
np.savez(
    path_test, 
    x=x_final[test_idx],
    y=y_final[test_idx],
    mask=mask_final[test_idx]
)

print('---')
print(' > train x:', x_final[train_idx].shape)
print(' >  test x:', x_final[test_idx].shape)

---
 > train x: (1615, 2560, 9)
 >  test x: (5, 2560, 9)


## 查看输入是否存在问题

In [1]:
import pickle
import numpy as np
import sys
sys.path.append('../../model/TextureAndForm/')
from main import write2midi

In [2]:
dictionary = pickle.load(
    open('data/Base/dictionary.pkl', 'rb'))
event2word, word2event = dictionary

In [3]:
train_data = np.load('data/Base/train_data.npz')

In [4]:
train_x = train_data['x']
train_y = train_data['y']
train_mask = train_data['mask']

train_x.shape, train_y.shape, train_mask.shape

((1615, 2560, 9), (1615, 2560, 9), (1615, 2560))

In [5]:
idx = np.arange(len(train_x))
np.random.shuffle(idx)

for i in idx[:5]:
    arr = train_x[i]
    write2midi(arr=arr, word2event=word2event,
               path_outfile='../../model/TextureAndForm/ckpt/debug/{}.mid'.format(i))

In [3]:
t = np.load('../../model/TextureAndForm/ckpt/Base/loss_9/loss_9_0_(2560, 9).npy')
t.shape

(2560, 9)

In [4]:
write2midi(t, word2event, path_outfile='../../model/TextureAndForm/ckpt/Base/loss_9/loss_9_mid/loss_9_0.mid')

{'type': 'Note', 'bar': '<CONTI>', 'pos': 'Beat_3', 'tempo': '<CONTI>', 'structure': '<CONTI>', 'chord': 'C#:maj', 'track': 0, 'pitch': 0, 'duration': 0}
{'type': 'Note', 'bar': '<CONTI>', 'pos': 'Beat_3', 'tempo': '<CONTI>', 'structure': '<CONTI>', 'chord': 'F#:maj', 'track': 0, 'pitch': 0, 'duration': 0}
{'type': 'Note', 'bar': '<CONTI>', 'pos': 'Beat_12', 'tempo': '<CONTI>', 'structure': '<CONTI>', 'chord': 'F#:maj', 'track': 0, 'pitch': 0, 'duration': 0}
{'type': 'Note', 'bar': '<CONTI>', 'pos': 'Beat_3', 'tempo': '<CONTI>', 'structure': '<CONTI>', 'chord': 'C#:maj', 'track': 0, 'pitch': 0, 'duration': 0}
{'type': 'Note', 'bar': '<CONTI>', 'pos': 'Beat_4', 'tempo': '<CONTI>', 'structure': '<CONTI>', 'chord': 'C#:maj', 'track': 0, 'pitch': 0, 'duration': 0}
{'type': 'Note', 'bar': '<CONTI>', 'pos': 'Beat_3', 'tempo': '<CONTI>', 'structure': '<CONTI>', 'chord': 'F#:maj', 'track': 0, 'pitch': 0, 'duration': 0}
{'type': 'Note', 'bar': '<CONTI>', 'pos': 'Beat_3', 'tempo': '<CONTI>', 'st

# 和原始的CP-transformer的对比

In [1]:
import pickle

In [3]:
with open('../../../CP-Transformer/data/raw/compile/dictionary.pkl', 'rb') as f:
    event2word, word2event = pickle.load(f)

for k in event2word:
    print('{}: {}'.format(k, len(event2word[k])))

tempo: 56
chord: 135
bar-beat: 18
type: 3
pitch: 87
duration: 18
velocity: 25


In [4]:
with open('./data/Base/dictionary.pkl', 'rb') as f:
    event2word, word2event = pickle.load(f)

for k in event2word:
    print('{}: {}'.format(k, len(event2word[k])))

type: 6
bar: 8
pos: 17
tempo: 94
structure: 15
chord: 322
track: 4
pitch: 83
duration: 81


In [5]:
event2word

{'type': {'Boundary': 0,
  'Chord': 1,
  'Note': 2,
  'PAD': 3,
  'Structure': 4,
  'Tempo': 5},
 'bar': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, '<CONTI>': 5, '<EOS>': 6, '<SOS>': 7},
 'pos': {0: 0,
  'Beat_0': 1,
  'Beat_1': 2,
  'Beat_10': 3,
  'Beat_11': 4,
  'Beat_12': 5,
  'Beat_13': 6,
  'Beat_14': 7,
  'Beat_15': 8,
  'Beat_2': 9,
  'Beat_3': 10,
  'Beat_4': 11,
  'Beat_5': 12,
  'Beat_6': 13,
  'Beat_7': 14,
  'Beat_8': 15,
  'Beat_9': 16},
 'tempo': {0: 0,
  42: 1,
  45: 2,
  46: 3,
  50: 4,
  51: 5,
  52: 6,
  53: 7,
  54: 8,
  55: 9,
  56: 10,
  57: 11,
  58: 12,
  59: 13,
  60: 14,
  61: 15,
  62: 16,
  63: 17,
  64: 18,
  65: 19,
  66: 20,
  67: 21,
  68: 22,
  69: 23,
  70: 24,
  71: 25,
  72: 26,
  73: 27,
  74: 28,
  75: 29,
  76: 30,
  77: 31,
  78: 32,
  79: 33,
  80: 34,
  81: 35,
  82: 36,
  83: 37,
  84: 38,
  85: 39,
  86: 40,
  87: 41,
  88: 42,
  89: 43,
  90: 44,
  91: 45,
  92: 46,
  93: 47,
  94: 48,
  95: 49,
  96: 50,
  97: 51,
  98: 52,
  100: 53,
  101: 54,
  102:

# Prompt

## 找到索引

In [1]:
import pickle
import numpy as np
import sys
sys.path.append('../../model/TextureAndForm/')
from main import write2midi

In [2]:
with open('./data/Base/experimental_song_ids.pkl', 'rb') as f:
    song_ids = pickle.load(f)

In [3]:
song_ids

{'song_ids': ['1_1',
  '1_2',
  '2_1',
  '2_2',
  '3_1',
  '3_2',
  '4_1',
  '4_2',
  '5_1',
  '5_2',
  '7_1',
  '7_2',
  '8_1',
  '8_2',
  '9_1',
  '9_2',
  '10_1',
  '10_2',
  '11_1',
  '11_2',
  '12_1',
  '12_2',
  '13_1',
  '13_2',
  '15_1',
  '15_2',
  '16_1',
  '16_2',
  '17_1',
  '17_2',
  '18_1',
  '18_2',
  '19_1',
  '19_2',
  '20_1',
  '20_2',
  '21_1',
  '21_2',
  '22_1',
  '22_2',
  '23_1',
  '23_2',
  '24_1',
  '24_2',
  '25_1',
  '25_2',
  '26_1',
  '26_2',
  '27_1',
  '27_2',
  '28_1',
  '28_2',
  '29_1',
  '29_2',
  '30_1',
  '30_2',
  '31_1',
  '31_2',
  '32_1',
  '32_2',
  '33_1',
  '33_2',
  '35_1',
  '35_2',
  '36_1',
  '36_2',
  '37_1',
  '37_2',
  '38_1',
  '38_2',
  '39_1',
  '39_2',
  '40_1',
  '40_2',
  '41_1',
  '41_2',
  '42_1',
  '42_2',
  '44_1',
  '44_2',
  '45_1',
  '45_2',
  '46_1',
  '46_2',
  '47_1',
  '47_2',
  '48_1',
  '48_2',
  '49_1',
  '49_2',
  '50_1',
  '50_2',
  '51_1',
  '51_2',
  '52_1',
  '52_2',
  '53_1',
  '53_2',
  '54_1',
  '54_2',
  '5

In [4]:
INDEX = '17_1'
INDEX = '295_1'

In [5]:
dictionary = pickle.load(
    open('data/Base/dictionary.pkl', 'rb'))
event2word, word2event = dictionary

In [6]:
train_data = np.load('data/Base/train_data.npz')

In [7]:
train_x = train_data['x']
train_y = train_data['y']
train_mask = train_data['mask']

train_x.shape, train_y.shape, train_mask.shape

((1615, 2560, 9), (1615, 2560, 9), (1615, 2560))

In [8]:
idx = song_ids['song_ids'].index(INDEX)
arr = train_x[idx]
write2midi(arr=arr, word2event=word2event,
           path_outfile='../../model/TextureAndForm/ckpt/debug/{}.mid'.format(idx))

In [9]:
arr

array([[0., 7., 0., ..., 0., 0., 0.],
       [5., 1., 1., ..., 0., 0., 0.],
       [4., 5., 9., ..., 0., 0., 0.],
       ...,
       [3., 0., 0., ..., 0., 0., 0.],
       [3., 0., 0., ..., 0., 0., 0.],
       [3., 0., 0., ..., 0., 0., 0.]])

## 确定长度

In [10]:
from tqdm import tqdm
import random
import pretty_midi
import muspy
import mido
import pandas as pd
import os
import numpy as np
import collections
import pickle
import json

In [11]:
with open('./data/Base/valid_song_ids.pkl', 'rb') as f:
    selected_sids = pickle.load(f)

with open('./data/Base/flatten_tokens_dict.pkl', 'rb') as f:
    tokens_dict = pickle.load(f)

print(len(selected_sids), len(tokens_dict),
      len(selected_sids) / len(tokens_dict))

tokens_dict = {k: tokens_dict[k] for k in selected_sids}
len(tokens_dict)

1620 1714 0.9451575262543758


1620

In [12]:
def event2df(event):
    keys = list(event[0].keys())

    items_dict = dict()
    for k in keys:
        items_dict[k] = [e[k] for e in event]

    return pd.DataFrame(data=items_dict)

In [13]:
df = event2df(tokens_dict[INDEX])
df

Unnamed: 0,type,bar,pos,time,tempo,structure,chord,track,pitch,duration
0,Boundary,<SOS>,0,0.000000,0,0,0,0,0,0
1,Tempo,1,Beat_0,0.000000,70,0,0,0,0,0
2,Structure,<CONTI>,Beat_3,0.002465,0,i,0,0,0,0
3,Note,<CONTI>,Beat_3,0.391667,<CONTI>,<CONTI>,<UNK>,PIANO,57,1680
4,Chord,<CONTI>,Beat_4,-0.158333,<CONTI>,<CONTI>,D:min,0,0,0
...,...,...,...,...,...,...,...,...,...,...
2263,Note,<CONTI>,Beat_8,-0.216667,<CONTI>,<CONTI>,<CONTI>,PIANO,53,120
2264,Note,<CONTI>,Beat_9,-0.475000,<CONTI>,<CONTI>,<CONTI>,PIANO,57,120
2265,Note,<CONTI>,Beat_14,-0.433333,<CONTI>,<CONTI>,<CONTI>,PIANO,57,480
2266,Note,<CONTI>,Beat_14,-0.041667,<CONTI>,<CONTI>,<CONTI>,PIANO,62,360


In [14]:
df[~df['type'].isin(['Note'])][:20]

Unnamed: 0,type,bar,pos,time,tempo,structure,chord,track,pitch,duration
0,Boundary,<SOS>,0,0.0,0,0,0,0,0,0
1,Tempo,1,Beat_0,0.0,70,0,0,0,0,0
2,Structure,<CONTI>,Beat_3,0.002465,0,i,0,0,0,0
4,Chord,<CONTI>,Beat_4,-0.158333,<CONTI>,<CONTI>,D:min,0,0,0
17,Chord,1,Beat_0,-0.158333,<CONTI>,<CONTI>,Bb:maj,0,0,0
31,Chord,1,Beat_0,-0.158333,<CONTI>,<CONTI>,Bb:maj,0,0,0
32,Chord,<CONTI>,Beat_4,-0.158333,<CONTI>,<CONTI>,D:min,0,0,0
46,Chord,1,Beat_0,-0.158333,<CONTI>,<CONTI>,A:maj,0,0,0
58,Chord,1,Beat_0,-0.158333,<CONTI>,<CONTI>,A:maj,0,0,0
62,Chord,<CONTI>,Beat_4,-0.158333,<CONTI>,<CONTI>,D:min,0,0,0


In [15]:
prompt_sz = 20
df[:prompt_sz]

Unnamed: 0,type,bar,pos,time,tempo,structure,chord,track,pitch,duration
0,Boundary,<SOS>,0,0.0,0,0,0,0,0,0
1,Tempo,1,Beat_0,0.0,70,0,0,0,0,0
2,Structure,<CONTI>,Beat_3,0.002465,0,i,0,0,0,0
3,Note,<CONTI>,Beat_3,0.391667,<CONTI>,<CONTI>,<UNK>,PIANO,57,1680
4,Chord,<CONTI>,Beat_4,-0.158333,<CONTI>,<CONTI>,D:min,0,0,0
5,Note,<CONTI>,Beat_4,-0.366667,<CONTI>,<CONTI>,<CONTI>,PIANO,62,1560
6,Note,<CONTI>,Beat_4,-0.308333,<CONTI>,<CONTI>,<CONTI>,PIANO,50,1680
7,Note,<CONTI>,Beat_4,-0.158333,<CONTI>,<CONTI>,<CONTI>,BRIDGE,81,120
8,Note,<CONTI>,Beat_4,0.008333,<CONTI>,<CONTI>,<CONTI>,PIANO,65,1560
9,Note,<CONTI>,Beat_5,-0.158333,<CONTI>,<CONTI>,<CONTI>,BRIDGE,69,120


In [16]:
arr = arr[:prompt_sz]
write2midi(arr=arr, word2event=word2event,
           path_outfile='../../model/TextureAndForm/ckpt/debug/Yijianmei_{}.mid'.format(prompt_sz))

In [17]:
arr.shape

(20, 9)

In [18]:
np.save('../../model/TextureAndForm/ckpt/Prompt/{}.npy'.format(INDEX), arr)