In [0]:
# First we download the dataset of bach chorales, here this set is assumed to be the Inspiration Set
!wget -O Jsb16thSeparated.npz https://github.com/czhuang/JSB-Chorales-dataset/raw/master/Jsb16thSeparated.npz

--2020-05-15 09:33:36--  https://github.com/czhuang/JSB-Chorales-dataset/raw/master/Jsb16thSeparated.npz
Resolving github.com (github.com)... 140.82.112.4
Connecting to github.com (github.com)|140.82.112.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/czhuang/JSB-Chorales-dataset/master/Jsb16thSeparated.npz [following]
--2020-05-15 09:33:37--  https://raw.githubusercontent.com/czhuang/JSB-Chorales-dataset/master/Jsb16thSeparated.npz
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.48.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.48.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 102808 (100K) [application/octet-stream]
Saving to: ‘Jsb16thSeparated.npz’


2020-05-15 09:33:37 (923 KB/s) - ‘Jsb16thSeparated.npz’ saved [102808/102808]



In [0]:
# We import required packages
import numpy as np
from magenta.models.coconet import lib_pianoroll
from magenta.models.coconet import lib_util
import os

In [0]:
# We now read the file contents
p = open("Jsb16thSeparated.npz", "rb")
dataset_bach_chorales = np.load(p, allow_pickle=True, encoding="latin1")

In [3]:
dataset_bach_chorales['valid'].shape

(76,)

In [4]:
dataset_bach_chorales['train'].shape

(229,)

In [5]:
dataset_bach_chorales['test'].shape

(77,)

In [0]:
# We define Hiperparameters
key = "Jsb16thSeparated"
min_pitch = 36
max_pitch = 81
shortest_duration = 0.125
quantization_level = 0.125
num_instruments = 4
qpm = 60
separate_instruments = True

In [0]:
# We define the pianoroll encoder_decoder
pianoroll_encoder_decoder = lib_pianoroll.PianorollEncoderDecoder(
      shortest_duration=shortest_duration,
      min_pitch=min_pitch,
      max_pitch=max_pitch,
      separate_instruments=separate_instruments,
      num_instruments=num_instruments,
      quantization_level=quantization_level)

In [0]:
# We get the piano rolls
train_pianorolls = list(map(pianoroll_encoder_decoder.encode, dataset_bach_chorales['train']))

In [0]:
# From the pianorolls we get the midi format
midi_datas = []
for pianoroll in train_pianorolls:
  midi_data = pianoroll_encoder_decoder.decode_to_midi(pianoroll)
  midi_datas.append(midi_data)

In [74]:
# We save the midis
for i, midi_data in enumerate(midi_datas):
    midi_fpath = os.path.join("./TrainMidis", "%s_%i.midi" % ("TrainMidi", i))
    with lib_util.atomic_file(midi_fpath) as p:
      midi_data.write(p)





In [0]:
# We obtain the k elements for the inspiration set
inspiration_bag = {}
inspiration_set = {}
for pianoroll in train_pianorolls:
  pianoroll_bag = []
  for timestep in pianoroll:
    timestep_bag = []
    for pitchstep in timestep:
      pitches_list = pitchstep.tolist()
      timestep_bag.append("".join(map(str, map(int,pitches_list))))
    timestep_string = "".join(timestep_bag)
    pianoroll_bag.append(timestep_string)
    if timestep_string in inspiration_bag: 
      inspiration_bag[timestep_string] += 1 
    else: 
      inspiration_bag[timestep_string] = 1
  pianoroll_string = "".join(pianoroll_bag)
  if pianoroll_string in inspiration_set: 
    inspiration_set[pianoroll_string] += 1 
  else: 
    inspiration_set[pianoroll_string] = 1

In [0]:
# We obtain the frequencies
repeated_values = []
for key in inspiration_set:
  repeated_values.append(inspiration_set[key])
repeated_values = np.array(repeated_values)