In [None]:
!pip install muspy
!sudo apt install -y fluidsynth
!pip install --upgrade pyfluidsynth

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting muspy
  Downloading muspy-0.5.0-py3-none-any.whl (119 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.1/119.1 KB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting miditoolkit>=0.1
  Downloading miditoolkit-0.1.16-py3-none-any.whl (20 kB)
Collecting music21>=6.0
  Downloading music21-8.1.0-py3-none-any.whl (22.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.8/22.8 MB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypianoroll>=1.0
  Downloading pypianoroll-1.0.4-py3-none-any.whl (26 kB)
Collecting mido>=1.0
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.1/51.1 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bidict>=0.21
  Downloading bidict-0.22.1-py3-none-any.whl (35 kB)
Collecting pretty-midi>=0.2
  Downloading pretty_midi-0

In [None]:
import torch
from torch import nn
import torch.optim as optim
import pandas as pd
import pathlib
import glob
import music21
import muspy as mp
import random
import pretty_midi
import fluidsynth
import numpy as np
from tqdm import tqdm

from IPython import display
from matplotlib import pyplot as plt
from typing import Dict, List, Optional, Sequence, Tuple

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
major="/content/drive/MyDrive/Colab Notebooks/ChefOeuvre/major_parts/content/major_parts"
minor="/content/drive/MyDrive/Colab Notebooks/ChefOeuvre/minor_parts/content/minor_parts"

In [None]:
seed = 42
np.random.seed(seed)

# Sampling rate for audio playback
_SAMPLING_RATE = 16000

In [None]:
data_dir = pathlib.Path(major)
filenames = glob.glob(str(data_dir/'*.mid*'))
print('Number of files:', len(filenames))

sample_file = filenames[2]
print(sample_file)

Number of files: 317
/content/drive/MyDrive/Colab Notebooks/ChefOeuvre/major_parts/content/major_parts/Takin It To The Streets_transpose.mid


In [None]:
pm = pretty_midi.PrettyMIDI(sample_file)

In [None]:
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=30):
  waveform = pm.fluidsynth(fs=_SAMPLING_RATE)
  # Take a sample of the generated waveform to mitigate kernel resets
  waveform_short = waveform[:seconds*_SAMPLING_RATE]
  return display.Audio(waveform_short, rate=_SAMPLING_RATE)

In [None]:
display_audio(pm)

In [None]:
def midi_to_notes(file: str):
  music = mp.read_midi(file)
  l = mp.to_note_representation(music)
  df = pd.DataFrame(l)
  df[1] = df[1] % 12 # + 60
  return list(df[1])

In [None]:
def data_to_notes(dir: str):
  data_dir = pathlib.Path(major)
  filenames = glob.glob(str(data_dir/'*.mid*'))
  X = []
  for i in tqdm(range(len(filenames))):
    X.append(midi_to_notes(filenames[i]))
  return X

In [None]:
X = data_to_notes(sample_file)

100%|██████████| 317/317 [00:24<00:00, 12.78it/s]


In [None]:
class Model(nn.Module):

  def __init__(self, input_dim, hidden_dim):
    super(Model, self).__init__()

    self.num_layers = 10
    self.hidden_dim = hidden_dim

    self.embedding = nn.Embedding(hidden_dim, 3)

    self.lstm = nn.LSTM(
            input_size = input_dim,
            hidden_size = self.hidden_dim,
            num_layers = self.num_layers,
        )

    self.fc = nn.Linear(hidden_dim, 12)

  def forward(self, x):
    # embed = self.embedding(x)

    output, _ = self.lstm(x)

    output = torch.tanh(output)
    
    return self.fc(output)

In [None]:
RNN_LSTM = Model(10, 12)

t = torch.FloatTensor([X[0][:10]])

out = RNN_LSTM(t)

print(out)

print(torch.argmax(out))

tensor([[-0.2327,  0.0950, -0.0623, -0.1263, -0.2886,  0.1482,  0.2961, -0.2144,
         -0.1908, -0.1915,  0.2556,  0.0405]], grad_fn=<AddmmBackward0>)
tensor(6)


In [None]:
print(out)

tensor([[-0.2327,  0.0950, -0.0623, -0.1263, -0.2886,  0.1482,  0.2961, -0.2144,
         -0.1908, -0.1915,  0.2556,  0.0405]], grad_fn=<AddmmBackward0>)


In [None]:
# model_test = nn.LSTM(10, 12)

# out, (a, b) = model_test(t)

#print(out)

In [None]:
def train(model: nn.Module, data: list, fen_size: int, num_epochs=10, optimizer="adam"):
  y = 0
  fen = fen_size

  model.train()
  
  # loss
  criterion = nn.L1Loss()

  # optimizer
  optimizer = optim.Adam(model.parameters(),
                              lr=0.001,
                              weight_decay=0.0)

  # training
  for epoch in range(num_epochs):
    print("epoch", epoch+1, "/", num_epochs)
    train_loss, total_acc, total_count = 0, 0, 0
    for id_music in tqdm(range(len(data))):

      music = data[id_music]
      music_size = len(music)
      i = 0
      while (music_size - i) > (fen + 1):

        X = torch.FloatTensor([music[i:(i+fen)]])
        Y = torch.FloatTensor([music[i+fen+1]])
        i += 1
      
        out = model(X)

        y_pred = torch.FloatTensor([torch.argmax(out)])
        y_pred = torch.tensor(y_pred, requires_grad=True)

        loss = criterion(y_pred, Y)

        loss.backward()
        optimizer.step()

        train_loss += loss
      y += i
    print("Epoch: {}. Loss: {}".format(epoch+1, train_loss/i))
  return y

In [None]:
RNN_LSTM = Model(10, 12)

In [None]:
train(RNN_LSTM, X[0:50], 10)

epoch 1 / 10


  y_pred = torch.tensor(y_pred, requires_grad=True)
100%|██████████| 50/50 [02:17<00:00,  2.75s/it]


Epoch: 1. Loss: 254.79432678222656
epoch 2 / 10


100%|██████████| 50/50 [02:40<00:00,  3.21s/it]


Epoch: 2. Loss: 254.79432678222656
epoch 3 / 10


100%|██████████| 50/50 [02:33<00:00,  3.08s/it]


Epoch: 3. Loss: 254.79432678222656
epoch 4 / 10


100%|██████████| 50/50 [02:33<00:00,  3.07s/it]


Epoch: 4. Loss: 254.79432678222656
epoch 5 / 10


100%|██████████| 50/50 [02:35<00:00,  3.11s/it]


Epoch: 5. Loss: 254.79432678222656
epoch 6 / 10


100%|██████████| 50/50 [02:35<00:00,  3.12s/it]


Epoch: 6. Loss: 254.79432678222656
epoch 7 / 10


100%|██████████| 50/50 [02:38<00:00,  3.18s/it]


Epoch: 7. Loss: 254.79432678222656
epoch 8 / 10


100%|██████████| 50/50 [02:36<00:00,  3.13s/it]


Epoch: 8. Loss: 254.79432678222656
epoch 9 / 10


100%|██████████| 50/50 [02:36<00:00,  3.12s/it]


Epoch: 9. Loss: 254.79432678222656
epoch 10 / 10


100%|██████████| 50/50 [02:35<00:00,  3.11s/it]


Epoch: 10. Loss: 254.79432678222656


849230

In [None]:
def predict(model: nn.Module, music_size: int, track):
  t = 10
  #note_predicted = [random.randint(0, 11) for x in range(t)]
  note_predicted = track.tolist()[0]

  for i in range(music_size-t):
    out = model(torch.FloatTensor([note_predicted[-t:]]))
    note_predicted.append(int(torch.argmax(out)))
    
  return note_predicted

In [None]:
def prediction_to_notes(pred: list):
  generated_notes = []
  prev_start = 0
  for i in range(len(pred)):
    pitch = pred[i] + 60

    duration = 1
    step = 0.3

    start = prev_start + step
    end = start + duration

    input_note = (pitch, step, duration)
    generated_notes.append((*input_note, start, end))

    prev_start = start

  generated_notes = pd.DataFrame(generated_notes, columns=('pitch','step','duration', 'start', 'end'))

  # print(generated_notes.head(10))

  return generated_notes

In [None]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str, 
  instrument_name: str,
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  pm.write(out_file)
  return pm

In [None]:
print(t)
list_predict = predict(RNN_LSTM, 120, t)

df_pred = prediction_to_notes(list_predict)

pm = pretty_midi.PrettyMIDI(sample_file)

print('Number of instruments:', len(pm.instruments))
instrument = pm.instruments[0]
instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
print('Instrument name:', instrument_name)

out_file = 'output.mid'
out_pm = notes_to_midi(df_pred, out_file=out_file, instrument_name=instrument_name)
display_audio(out_pm)


tensor([[ 4.,  7., 11.,  2.,  4.,  7., 11.,  2.,  7.,  0.]])
Number of instruments: 4
Instrument name: Bright Acoustic Piano


In [None]:
df_pred.head(30)

Unnamed: 0,pitch,step,duration,start,end
0,64.0,0.3,1,0.3,1.3
1,67.0,0.3,1,0.6,1.6
2,71.0,0.3,1,0.9,1.9
3,62.0,0.3,1,1.2,2.2
4,64.0,0.3,1,1.5,2.5
5,67.0,0.3,1,1.8,2.8
6,71.0,0.3,1,2.1,3.1
7,62.0,0.3,1,2.4,3.4
8,67.0,0.3,1,2.7,3.7
9,60.0,0.3,1,3.0,4.0


In [None]:
t = torch.IntTensor([X[0][20:30]])
print(X[0][31])

out = RNN_LSTM(t)

print(out)
print(torch.argmax(out[0][0]))

4
tensor([[[-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],
         [-0.2492,  0.1250,  0.1058,  0.0847,  0.0530, -0.0240,  0.0457,
           0.1697,  0.1918,  0.0379],