Referal https://www.tensorflow.org/tutorials/audio/music_generation

In [17]:
from IPython import display
import collections
import datetime
import fluidsynth
import glob
import numpy as np
import pathlib
import pandas as pd
import pretty_midi
import seaborn as sns
import tensorflow as tf

from matplotlib import pyplot as plt
from typing import Dict, List, Optional, Sequence, Tuple

In [18]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)

# Sampling rate for audio playback
_SAMPLING_RATE = 16000

# Download the maestro dataset of 1282 files
data_dir = pathlib.Path('data/maestro-v2.0.0')
if not data_dir.exists():
  tf.keras.utils.get_file(
      'maestro-v2.0.0-midi.zip',
      origin='https://storage.googleapis.com/magentadata/datasets/maestro/v2.0.0/maestro-v2.0.0-midi.zip',
      extract=True,
      cache_dir='.', cache_subdir='data',
  )

# these lines from tensor documentation above
filenames = glob.glob(str(data_dir/'**/*.mid*'))
print('Number of files:', len(filenames))
sample_file = filenames[1]
print(sample_file)

# generate a prettyMIDI object for the sample MIDI file
pm = pretty_midi.PrettyMIDI(sample_file)

Number of files: 1282
data/maestro-v2.0.0/2008/MIDI-Unprocessed_03_R1_2008_01-04_ORIG_MID--AUDIO_03_R1_2008_wav--1.midi


In [19]:
def display_audio(pm: pretty_midi.PrettyMIDI, seconds=40):
    waveform = pm.fluidsynth(fs=float(_SAMPLING_RATE))
    # Take a sample of the generated waveform to mitigate kernel resets
    waveform_short = waveform[:seconds*_SAMPLING_RATE]
    return display.Audio(waveform_short, rate=_SAMPLING_RATE)

In [20]:
display_audio(pm)

In [21]:
print('Number of instruments:', len(pm.instruments))
for instrument in pm.instruments:
    # instrument = pm.instruments[instrument]
    instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
    print('Instrument name:', instrument_name)
    for i, note in enumerate(instrument.notes[:10]):
        note_name = pretty_midi.note_number_to_name(note.pitch)
        duration = note.end - note.start
        print(f'{i}: pitch={note.pitch}, note_name={note_name},'
                f' duration={duration:.4f}')

Number of instruments: 1
Instrument name: Acoustic Grand Piano
0: pitch=73, note_name=C#5, duration=0.3021
1: pitch=72, note_name=C5, duration=0.2174
2: pitch=70, note_name=A#4, duration=0.1680
3: pitch=69, note_name=A4, duration=0.1471
4: pitch=46, note_name=A#2, duration=0.7578
5: pitch=70, note_name=A#4, duration=0.3372
6: pitch=65, note_name=F4, duration=0.2852
7: pitch=66, note_name=F#4, duration=0.3099
8: pitch=63, note_name=D#4, duration=0.2826
9: pitch=58, note_name=A#3, duration=1.6302


In [22]:
for i, note in enumerate(instrument.notes[:10]):
  note_name = pretty_midi.note_number_to_name(note.pitch)
  duration = note.end - note.start
  print(f'{i}: pitch={note.pitch}, note_name={note_name},'
        f' duration={duration:.4f}')

0: pitch=73, note_name=C#5, duration=0.3021
1: pitch=72, note_name=C5, duration=0.2174
2: pitch=70, note_name=A#4, duration=0.1680
3: pitch=69, note_name=A4, duration=0.1471
4: pitch=46, note_name=A#2, duration=0.7578
5: pitch=70, note_name=A#4, duration=0.3372
6: pitch=65, note_name=F4, duration=0.2852
7: pitch=66, note_name=F#4, duration=0.3099
8: pitch=63, note_name=D#4, duration=0.2826
9: pitch=58, note_name=A#3, duration=1.6302


In [23]:
# def midi_to_notes(midi_file: str) -> pd.DataFrame:
#   pm = pretty_midi.PrettyMIDI(midi_file)
#   notes = collections.defaultdict(list)

#   instrument_names = []
#   for instrument in pm.instruments:
#     instrument_name = pretty_midi.program_to_instrument_name(instrument.program)
#     instrument_names.append(instrument_name)
#     # print(instrument_name)
#     # Sort the notes by start time
#     sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
#     prev_start = sorted_notes[0].start
#     # print(sorted_notes)

#     for note in sorted_notes:
#         start = note.start
#         end = note.end
#         notes['pitch'].append(note.pitch)
#         notes['start'].append(start)
#         notes['end'].append(end)
#         notes['step'].append(start - prev_start)
#         notes['duration'].append(end - start)
#         prev_start = start
  
#   return pd.DataFrame({name: np.array(value) for name, value in notes.items()}), instrument_names

In [24]:
def midi_to_notes(midi_file: str) -> pd.DataFrame:
  pm = pretty_midi.PrettyMIDI(midi_file)
  instrument = pm.instruments[0]
  notes = collections.defaultdict(list)

  # Sort the notes by start time
  sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
  prev_start = sorted_notes[0].start

  for note in sorted_notes:
    start = note.start
    end = note.end
    notes['pitch'].append(note.pitch)
    notes['start'].append(start)
    notes['end'].append(end)
    notes['step'].append(start - prev_start)
    notes['duration'].append(end - start)
    prev_start = start

  return pd.DataFrame({name: np.array(value) for name, value in notes.items()})

In [16]:
raw_notes = midi_to_notes(sample_file)
raw_notes.head()

Unnamed: 0,pitch,start,end,step,duration
0,73,1.070312,1.372396,0.0,0.302083
1,46,1.088542,1.846354,0.018229,0.757812
2,72,1.277344,1.494792,0.188802,0.217448
3,70,1.484375,1.652344,0.207031,0.167969
4,69,1.673177,1.820312,0.188802,0.147135


In [26]:
get_note_names = np.vectorize(pretty_midi.note_number_to_name)
sample_note_names = get_note_names(raw_notes['pitch'])
sample_note_names[:10]

array(['C#5', 'A#2', 'C5', 'A#4', 'A4', 'A#4', 'A#3', 'F4', 'F#4', 'D#4'],
      dtype='<U3')

### Ceate a MIDI file

In [27]:
def notes_to_midi(
  notes: pd.DataFrame,
  out_file: str, 
  instrument_name: str,
  velocity: int = 100,  # note loudness
) -> pretty_midi.PrettyMIDI:

  pm = pretty_midi.PrettyMIDI()
  instrument = pretty_midi.Instrument(
      program=pretty_midi.instrument_name_to_program(
          instrument_name))

  prev_start = 0
  for i, note in notes.iterrows():
    start = float(prev_start + note['step'])
    end = float(start + note['duration'])
    note = pretty_midi.Note(
        velocity=velocity,
        pitch=int(note['pitch']),
        start=start,
        end=end,
    )
    instrument.notes.append(note)
    prev_start = start

  pm.instruments.append(instrument)
  pm.write(out_file)
  return pm

In [28]:
# Assuming you already have the `notes_df` DataFrame and `instrument_name` list

# Specify the output MIDI file path
example_file = 'example.midi'

# Call the `notes_to_midi` function
example_pm = notes_to_midi(raw_notes, out_file=example_file, instrument_name=instrument_name)

# Play the generated MIDI file in ipynb
display_audio(example_pm)


### Create the training dataset

In [32]:
num_files = 5
all_notes = []
for f in filenames[:num_files]:
    notes = midi_to_notes(f)
    all_notes.append(notes)

all_notes = pd.concat(all_notes)
n_notes = len(all_notes)
print("Number of notes parsed:", n_notes)

Number of notes parsed: 12982


#### Create a tf.data.Dataset from the parsed notes.

In [None]:
key_order = ['pitch', 'step', 'duration']
train_notes = np.stack([all_notes[key] for key in key_order], axis=1)

#### Below, I think they are the code which I asked openAI to be useful for toto.mid file

In [None]:
# seed = 42
# tf.random.set_seed(seed)
# np.random.seed(seed)

# # Sampling rate for audio playback
# _SAMPLING_RATE = 16000


# data_dir = pathlib.Path('/Volumes/MAGIC1/CS50/myMusicGen/data/toto.mid')
# if not data_dir.exists():
#   tf.keras.utils.get_file(
#       'toto.mid',
#       origin='https://www.midiworld.com/download/5024',
#       extract=True,
#       cache_dir='.', cache_subdir='data',
#   )

# #The data above contains 1200+ Midi files
# filenames = str(data_dir)

# #Process a MIDI file
# sample_file = filenames
# print(sample_file)
# print(sample_file)

# # generate a prettyMIDI object for the sample MIDI file
# pm = pretty_midi.PrettyMIDI(sample_file)

In [25]:
# raw_notes, instrument_names_out = midi_to_notes(sample_file)
# print(instrument_names_out)
# raw_notes.head()


In [30]:
# def notes_to_midi(notes_df: pd.DataFrame, out_file: str, instrument_name: List[str]) -> pretty_midi.PrettyMIDI:
#     # Create a new PrettyMIDI object
#     pm = pretty_midi.PrettyMIDI()
#     print(instrument_name)
#     # Create a dictionary to store the instruments
#     instruments = {}

#     # Add notes to the corresponding instruments
#     for index, row in notes_df.iterrows():
#         pitch = int(row['pitch'])
#         start = float(row['start'])
#         end = float(row['end'])
#         duration = float(row['duration'])
#         step = float(row['step'])

#         # Find the instrument index based on the step value
#         instrument_index = int(index % len(instrument_name))

#         if instrument_index in instruments:
#             # If the instrument already exists, use it
#             instrument = instruments[instrument_index]
#         else:
#             # Create a new instrument
#             if instrument_index == 9:
#                 # If it's a bass drum instrument, create a new drum instrument
#                 instrument = pretty_midi.Instrument(program=0, is_drum=True)
#             elif instrument_index == 10:
#                 # If it's a snare drum instrument, create a new drum instrument
#                 instrument = pretty_midi.Instrument(program=0, is_drum=True)
#             elif instrument_index == 11:
#                 # If it's a snare drum instrument, create a new drum instrument
#                 instrument = pretty_midi.Instrument(program=0, is_drum=True)
#             else:
#                 # For non-drum instruments, create a new melody instrument
#                 program = pretty_midi.instrument_name_to_program(instrument_name[instrument_index])
#                 instrument = pretty_midi.Instrument(program=program)
            
#             # Store the instrument in the dictionary
#             instruments[instrument_index] = instrument

#         # Create a new note
#         note = pretty_midi.Note(
#             velocity=100, pitch=pitch, start=start, end=end
#         )

#         # Add the note to the instrument
#         instrument.notes.append(note)

#     # Add the instruments to the MIDI object
#     for instrument in instruments.values():
#         pm.instruments.append(instrument)

#     # Write the MIDI object to a file
#     pm.write(out_file)

#     # Return the created PrettyMIDI object
#     return pm
