In [1]:
# import fluidsynth
import glob
import numpy as np
import pandas as pd
import pretty_midi
import collections
from pathlib import Path
from IPython import display

import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn

%matplotlib inline
from matplotlib import pyplot as plt

# scrape time

In [2]:
import requests
from bs4 import BeautifulSoup
_datadir = Path('./data/classical')
def get_artist_link(tag):
    if tag.name != 'a':
        return False
    if tag.parent.name != 'td':
        return False
    if not tag.parent.has_attr('class'):
        return False
    if tag.parent.attrs['class'][0] == 'midi':
        return True
    return False

def get_files(tag):
    if tag.name != 'a':
        return False
    if tag.parent.name != 'td':
        return False
    if not tag.parent.has_attr('class'):
        return False
    if tag.parent.attrs['class'][0] != 'midi':
        return False
    if tag.has_attr('class'):
        return False
    href = tag.attrs['href']
    if href.split('/')[0] != 'midis':
        return False
    return True

root_url = 'http://www.piano-midi.de/'
base_url = f'{root_url}/midi_files.htm'
r = requests.get(base_url)
soup = BeautifulSoup(r.content, 'html.parser')
composer_page_link = soup.find_all(get_artist_link)
for composer_link in composer_page_link:
    composer_url = f'{root_url}/{composer_link.attrs["href"]}'
    r = requests.get(composer_url)
    soup = BeautifulSoup(r.content, 'html.parser') 
    file_tags = soup.find_all(get_files)
    for file_tag in file_tags:
        href = file_tag.attrs['href']
        filename = href.split('/')[-1]
        file_url = f'{root_url}/{href}'
        r = requests.get(file_url)
        with open(_datadir / filename, 'wb') as f:
            f.write(r.content)
        print(f'{filename} downloaded')

ModuleNotFoundError: No module named 'bs4'

# load metadata

In [7]:
_datadir = Path('./data/classical_C')
_metadata_file = _datadir / 'metadata.csv'

if not _metadata_file.exists():
    files = collections.defaultdict(list)
    for filepath in _datadir.glob('*.mid'):
        files['file'].append(str(filepath))
        composer = filepath.stem.split('_')[0]
        files['composer'].append(composer)

        pm = pretty_midi.PrettyMIDI(str(filepath))
        files['end_time'].append(pm.get_end_time())
        
        
        tempos, probabilities = pm.estimate_tempi()
        assert np.isclose(sum(probabilities), 1)
        tempo_bpm = np.dot(tempos, probabilities) # expected tempo in beats/min
        seconds_per_beat = (1/tempo_bpm)*60
        time_sig_denom = pm.time_signature_changes[0].denominator
        note_dist = seconds_per_beat / (16 / time_sig_denom)
        
        files['expected_tempo'].append(tempo_bpm)
        files['16th_note_duration'].append(note_dist)
        roll = midi_to_pianoroll(str(filepath), sample_dist=note_dist)
        files['roll_length'].append(roll.shape[1])

    df_meta = pd.DataFrame({ key: np.asarray(val) for key, val in files.items() })
    df_meta.to_csv(_metadata_file, index=False)
else:
    df_meta = pd.read_csv(_metadata_file)
    
    
df_meta.head()

Unnamed: 0,file,composer,end_time,expected_tempo,16th_note_duration,roll_length
0,data/classical_C/beethoven_les_adieux_3_C_.mid,beethoven,300.601217,172.65097,0.173761,1729
1,data/classical_C/clementi_opus36_6_2_C_.mid,clementi,136.598831,230.29044,0.13027,1048
2,data/classical_C/chpn_op10_e12_C_.mid,chpn,138.571344,207.866337,0.072162,1920
3,data/classical_C/chpn-p2_C_.mid,chpn-p2,121.57748,107.696155,0.139281,829
4,data/classical_C/chpn_op35_1_C_.mid,chpn,392.316231,197.124089,0.076094,5155


In [8]:
df_meta['16th_note_duration'].describe()

count    337.000000
mean       0.100930
std        0.048265
min        0.032209
25%        0.071719
50%        0.084659
75%        0.119674
max        0.380261
Name: 16th_note_duration, dtype: float64

# converting midi to audio

In [36]:
# Sampling rate for audio playback
_SAMPLING_RATE = 16000

def display_audio(pm: pretty_midi.PrettyMIDI, seconds=30):
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  # Take a sample of the generated waveform to mitigate kernel resets
  waveform_short = waveform[:seconds*_SAMPLING_RATE]
  return display.Audio(waveform_short, rate=_SAMPLING_RATE)

# converting midi to pianoroll

In [4]:
def midi_to_pianoroll(file, sample_dist=0.02):
    pm = pretty_midi.PrettyMIDI(file)
    sampling_rate = 1/sample_dist
    piano_roll = pm.get_piano_roll(fs=sampling_rate)
    return piano_roll

In [16]:
sample_file = df_meta['file'][5]
pm = pretty_midi.PrettyMIDI(sample_file)
rolls = midi_to_pianoroll(df_meta['file'][5])

In [31]:
file_idx = 2

row = df_meta.loc[file_idx]

sample_file = row['file']


note_dist = row['16th_note_duration']

rolls = midi_to_pianoroll(sample_file, sample_dist=0.02)
elem_eq = np.equal(rolls['left'][:, 1:], rolls['left'][:, :-1])
frame_eq = np.all(elem_eq, axis=0)
print(np.sum(frame_eq)/len(frame_eq))

rolls = midi_to_pianoroll(sample_file, sample_dist=note_dist)
elem_eq = np.equal(rolls['left'][:, 1:], rolls['left'][:, :-1])
frame_eq = np.all(elem_eq, axis=0)
print(np.sum(frame_eq)/len(frame_eq))

0.9173166279455692
0.6724732949876746


In [28]:
sample_file = df_meta['file'][5]


note_dist = df_meta.loc[5]['16th_note_duration']

rolls = midi_to_pianoroll(sample_file, sample_dist=0.02)
elem_eq = np.equal(rolls['right'][:, 1:], rolls['right'][:, :-1])
frame_eq = np.all(elem_eq, axis=0)
print(np.sum(frame_eq)/len(frame_eq))

rolls = midi_to_pianoroll(sample_file, sample_dist=note_dist)
elem_eq = np.equal(rolls['right'][:, 1:], rolls['right'][:, :-1])
frame_eq = np.all(elem_eq, axis=0)
print(np.sum(frame_eq)/len(frame_eq))

0.9210454669207732
0.6741019698725377


# converting midi to non-quantized representation

In [105]:
def midi_to_notes(midi_file: str) -> pd.DataFrame:
  pm = pretty_midi.PrettyMIDI(midi_file)
  instrument = pm.instruments[0]
  notes = collections.defaultdict(list)

  # Sort the notes by start time
  sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
  prev_start = sorted_notes[0].start

  for note in sorted_notes:
    start = note.start
    end = note.end
    notes['pitch'].append(note.pitch)
    notes['start'].append(start)
    notes['end'].append(end)
    notes['step'].append(start - prev_start)
    notes['duration'].append(end - start)
    notes['velocity'].append(note.velocity)
    prev_start = start

  return pd.DataFrame({name: np.array(value) for name, value in notes.items()})

def notes_to_midi(notes: pd.DataFrame, instrument_name: str) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=note['velocity'],
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  return pm

In [106]:
sample_file = df_meta['file'][0]
pm = pretty_midi.PrettyMIDI(sample_file)

instrument = pm.instruments[0]
instrument_name = pretty_midi.program_to_instrument_name(instrument.program)

df_notes = midi_to_notes(sample_file)
df_notes.head()

example_pm = notes_to_midi(df_notes, instrument_name=instrument_name)

# define dataset and dataloader

In [70]:
sample_file = df_meta['file'][5]
pm = pretty_midi.PrettyMIDI(sample_file)
pm.time_signature_changes[0].denominator

4

In [20]:
class PianoRoll(Dataset):
    
    def __init__(self, df_meta: pd.DataFrame, seq_length: int = 25, 
                 batch_size: int = 20, batch_per_file: int = 1):
        self.df_meta = df_meta
        
        self.seq_length = seq_length
        self.batch_size = batch_size
        self.batch_per_file = batch_per_file
        self.idx_per_file = self.batch_size*self.batch_per_file
        
        self.roll_cache = {}
        
    def __len__(self):
        return self.df_meta.shape[0]*self.idx_per_file
    
    def __getitem__(self, idx):
        file_idx = idx // self.idx_per_file
        window_idx = idx % self.idx_per_file
        
        
        seq, label = self.get_rolls(file_idx, window_idx)
        
        seq = torch.from_numpy(seq).float()
        label = torch.from_numpy(label).float()
        return seq, label
    
    def midi_to_pianoroll(self, file, sample_dist=0.02):
        pm = pretty_midi.PrettyMIDI(file)
        
        sampling_rate = 1/sample_dist
        piano_roll = pm.get_piano_roll(fs=sampling_rate)
        return piano_roll
    
    def get_rolls(self, file_idx, window_idx):
        file_path = self.df_meta.iloc[file_idx]['file']
        
        if file_idx in self.roll_cache:
            roll = self.roll_cache[file_idx]
        else:
            note_dist = df_meta.iloc[file_idx]['16th_note_duration']
            roll = self.midi_to_pianoroll(file_path, sample_dist=note_dist)
            self.roll_cache[file_idx] = roll
            
        roll[roll != 0] = 1
        roll = roll.T
        
        seq = roll[:-1]
        label = roll[-1]
        return seq, label

In [27]:
batch_size = 20
md = CombinedHands(df_meta=df_meta.iloc[:3], batch_size=batch_size, batch_per_file=2)
dataloader = DataLoader(md, batch_size=batch_size, shuffle=False)
features, labels = next(iter(dataloader))

In [28]:
features.shape

torch.Size([20, 25, 128])

# define NN

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cpu device


In [7]:
class PianoRollLSTM(nn.Module):
    def __init__(self, separate=True):
        super(PianoRollLSTM, self).__init__()
        
        self.separate = separate
        
        if separate:
            input_size = 256
        else:
            input_size = 128
            
        hidden_size = input_size // 2
            
        self.lstm = nn.LSTM(input_size=input_size, batch_first=True, num_layers=1, hidden_size=hidden_size)
        
        self.left_pitch_layer = nn.Sequential(
            nn.Linear(hidden_size, input_size),
            nn.Sigmoid()
        )
        
        if self.separate:
            self.right_pitch_layer = nn.Sequential(
                nn.Linear(hidden_size, input_size),
                nn.Sigmoid()
            )
    def forward(self, x):
        output, (h_n, c_n) = self.lstm(x)
         
        left_output = self.left_pitch_layer(h_n)
        
        if not self.separate:
            return left_output
        else:
            right_output = self.right_pitch_layer(h_n)
            return left_output, right_output

# training time

In [64]:
batch_per_file = 2 #625
seq_length = 100
learning_rate = 1e-3
batch_size = 8
num_workers = 0

df_chpn = df_meta[df_meta['composer'] == 'chpn']
rng = np.random.default_rng(12345)
idx = np.arange(df_chpn.shape[0])
n_train = int(0.8*idx.shape[0])
train_idx = rng.choice(idx, size=n_train, replace=False)
test_idx = idx[~np.in1d(idx, train_idx)]
df_train = df_chpn.iloc[train_idx]
df_test = df_chpn.iloc[test_idx]

dset_train = PianoRoll(df_meta=df_train.iloc[1:2], 
                    batch_size=batch_size, 
                    batch_per_file=batch_per_file,
                    seq_length=seq_length)


dset_test = PianoRoll(df_meta=df_test, 
                    batch_size=batch_size, 
                    batch_per_file=batch_per_file,
                    seq_length=seq_length)


train_dataloader = DataLoader(dset_train, batch_size=batch_size, shuffle=False, num_workers=0)
test_dataloader = DataLoader(dset_test, batch_size=batch_size, shuffle=False, num_workers=0)

model = PianoRollLSTM(separate=False)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


metrics = collections.defaultdict(list)

iter_idx = -1
train_iterator = iter(train_dataloader)

train_losses = []

while iter_idx < 20:
    iter_idx += 1
    start = time.time()

    try:
        features, labels = next(train_iterator)
    except StopIteration:
        train_iterator = iter(train_dataloader)
        features, labels = next(train_iterator)


    # compute prediction and loss
    pred = model(features)[0, :, :]
    loss = loss_fn(pred, labels)

    # backprop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    train_losses.append(loss.item())


    # compute metrics every 10 iterations
    if iter_idx % 3 == 0:

        metrics['iter'].append(iter_idx)

        # compute train loss
        train_loss = np.mean(np.asarray(train_losses))
        metrics['train_loss'].append(train_loss)
        train_losses = []


        # test loop
        test_loss_fn = nn.BCELoss()
        test_loss = 0
        frac_notes_correct = 0
        frac_frames_correct = 0
        num_batches = len(test_dataloader)
        for features, labels in test_dataloader:
            pred = model(features)[0, :, :]
            test_loss += test_loss_fn(pred, labels).item()

            notes = (pred > 0.5).type(torch.float)
            equal = torch.eq(notes, labels)
            frac_notes_correct += torch.mean(torch.sum(equal, axis=1) / 128)
            frac_frames_correct += torch.sum(torch.all(equal, axis=1))

        frac_notes_correct /= num_batches
        frac_frames_correct = frac_frames_correct / num_batches
        test_loss /= num_batches

        metrics['test_loss'].append(test_loss)
        metrics['frac_notes_correct'].append(frac_notes_correct)
        metrics['frac_frames_correct'].append(frac_frames_correct)

        df_metrics = pd.DataFrame({ key: np.asarray(val) for key, val in metrics.items() })
        
    end = time.time()
    print(f'iter_idx = {iter_idx}, iter_time = {end-start:.03f}')

iter_idx = 0, iter_time = 6.527
iter_idx = 1, iter_time = 1.408
iter_idx = 2, iter_time = 1.231
iter_idx = 3, iter_time = 6.134
iter_idx = 4, iter_time = 1.256
iter_idx = 5, iter_time = 1.232
iter_idx = 6, iter_time = 5.397
iter_idx = 7, iter_time = 1.257
iter_idx = 8, iter_time = 1.265
iter_idx = 9, iter_time = 5.278
iter_idx = 10, iter_time = 1.270
iter_idx = 11, iter_time = 1.295
iter_idx = 12, iter_time = 5.467
iter_idx = 13, iter_time = 1.301
iter_idx = 14, iter_time = 1.353
iter_idx = 15, iter_time = 5.680
iter_idx = 16, iter_time = 1.287
iter_idx = 17, iter_time = 1.264
iter_idx = 18, iter_time = 5.361
iter_idx = 19, iter_time = 1.325
iter_idx = 20, iter_time = 1.381


In [67]:
# def process_path(row):
#     filestr = row['file']
#     Path(filestr)


filestr = df_meta.iloc[0]['file']


In [73]:
df_meta['pred_roll_length'] = df_meta['end_time']/df_meta['16th_note_duration']
df_meta['pred_roll_length'] = df_meta['pred_roll_length'].astype(int)
df_meta['n_batches'] = df_meta['pred_roll_length']*0.8 // batch_size
# self.df_meta['n_batches'] = self.df_meta['n_batches'].astype(int)
file_idx_ends = []
n_batches = df_meta['n_batches'].values
file_idx_ends = [n_batches[0]*batch_size - 1]
for batches_in_file in n_batches[1:]:
    file_idx_ends.append(batches_in_file*batch_size + file_idx_ends[-1] )

df_meta['file_idx_ends'] = file_idx_ends

df_meta

Unnamed: 0,file,composer,end_time,expected_tempo,16th_note_duration,pred_roll_length,n_batches,file_idx_ends
0,data/classical/beethoven_opus22_1.mid,beethoven,399.624004,164.101873,0.091407,4371,437.0,3495.0
1,data/classical/schub_d760_4.mid,schub,206.069698,240.133084,0.062465,3298,329.0,6127.0
2,data/classical/mz_330_3.mid,mz,482.101321,189.367179,0.079211,6086,608.0,10991.0
3,data/classical/beethoven_les_adieux_1.mid,beethoven,341.950341,214.732513,0.069854,4895,489.0,14903.0
4,data/classical/burg_spinnerlied.mid,burg,102.871024,277.682898,0.054018,1904,190.0,16423.0
...,...,...,...,...,...,...,...,...
332,data/classical/mz_311_1.mid,mz,700.884007,265.431386,0.056512,12402,1240.0,764399.0
333,data/classical/scn16_3.mid,scn16,253.717954,191.453476,0.078348,3238,323.0,766983.0
334,data/classical/haydn_33_3.mid,haydn,236.772844,215.481556,0.069612,3401,340.0,769703.0
335,data/classical/muss_3.mid,muss,89.440365,192.066679,0.078098,1145,114.0,770615.0
