In [10]:
from IPython import display
import collections
import datetime
import fluidsynth
import glob
import numpy as np
import pathlib
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import pretty_midi
import seaborn as sns
import tensorflow as tf

from matplotlib import pyplot as plt
from typing import Dict, List, Optional, Sequence, Tuple

# Tomb added
import random

In [11]:
#Download Chorales
data_dir = pathlib.Path('/Volumes/MAGIC1/CS50/myMusicGen/data/chorales')
if not data_dir.exists():
  tf.keras.utils.get_file(
      'midi',
      origin='https://github.com/jamesrobertlloyd/infinite-bach/tree/master/data/chorales/midi',
      extract=True,
      cache_dir='.', cache_subdir='data',
  )
filenames = glob.glob(str(data_dir/'**/*.mid*'))
print(filenames)
print('Number of files:', len(filenames))

['/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000101b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000106b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000106trio.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000206b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000306b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000408b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000504b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000507b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000603b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000606b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000707b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000806b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/000907b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/001007b_.mid', '/Volumes/MAGIC1/CS50/myMusicGen/data/chorales/midi/001106b

In [12]:
class UnsupportedMidiFileException(Exception):
  "Unsupported MIDI File"

In [13]:
"""Tomb made a pretty useful function"""
def check_pianoroll_dim(pianoroll):
        rows = len(pianoroll)  # This gives the number of rows
        columns = len(pianoroll[0])  # This assumes all rows have the same length

        print("Number of rows:", rows) # represents sequence length
        print("Number of columns:", columns) # represents the 4 octave range in midi 36-83
        print("Total dimesions of pianoroll is", rows*columns)

In [14]:
def get_pianoroll(midi, nn_from, nn_thru, seqlen, tempo):
    pianoroll = midi.get_piano_roll(fs=2*tempo/60) # This is the core line which makes this matrix based on 8th note

    # print(f"piano_roll.shape[1] a.k.a song length!{pianoroll.shape[1]}")

    if pianoroll.shape[1] < seqlen:
        raise UnsupportedMidiFileException

    pianoroll = pianoroll[nn_from:nn_thru, 0:seqlen] # (48, 64) Pinoroll's value still NOT binary since it has velocity
    binary_pianoroll = np.heaviside(pianoroll, 0) # converting as a binary matrix
    transposed_pianoroll = np.transpose(binary_pianoroll) #(64, 48)
     
    # return binary_pianoroll
    return transposed_pianoroll # type numpy.ndarray

In [15]:
def read_midi(filename, sop_alto, seqlen):
  
  def add_rest_nodes(pianoroll):  # If all the elemets are zero, the rest node says 1, else 0
    rests = 1 - np.sum(pianoroll, axis=1)
    rests = np.expand_dims(rests, 1)
    return np.concatenate([pianoroll, rests], axis=1)
  
  
  # read midi file
  midi = pretty_midi.PrettyMIDI(filename)

  # An Exception error is thrown if there is a modulation(key change)
  if len(midi.key_signature_changes) !=1:
    raise UnsupportedMidiFileException

  # Modulate the given key to C major or C minor
  key_number = midi.key_signature_changes[0].key_number
  # transpose_to_c(midi, key_number)

  # Get Major key(keynode=0) or Minor key(keynode=1)
  keymode = np.array([int(key_number / 12)])

  # The Exception error thrown when tempo changes
  tempo_time, tempo = midi.get_tempo_changes()
  if len(tempo) != 1:
    raise UnsupportedMidiFileException
  if sop_alto:
    # The exception thrown if there are less than 2 parts
    if len(midi.instruments) < 2:
      raise UnsupportedMidiFileException
    # Get pianoRoll returns numpy.ndarray
    pr_s = get_pianoroll(midi.instruments[0], 36, 84, seqlen, tempo[0])
    pr_a = get_pianoroll(midi.instruments[1], 36, 84, seqlen, tempo[0])
    pr_b = get_pianoroll(midi.instruments[2], 36, 84, seqlen, tempo[0])
    
    
    sop_w_rest = add_rest_nodes(pr_s) 
    alt_w_rest = add_rest_nodes(pr_a)
    bass_w_rest = add_rest_nodes(pr_b)
    
    # return pr_s, pr_a, pr_b, keymode
    return sop_w_rest, alt_w_rest, bass_w_rest, keymode # All numpy.ndarray including keymode  

  else:
    #Get a pianoroll which gathered all the parts
    pr = get_pianoroll(midi, 36, 84, seqlen, tempo[0])
    return pr, keymode

In [16]:
"""Get the ingredients. 
Make the data(i.e manipulate the model as you tell it what you want) here for predict the 3rd note with given (x1,x2). 
Make list1 that has (xn, xn+1) pair elements 
and list2 which has (xn+2) elements"""

np.set_printoptions(threshold=np.inf) # Show the entire print, esp Matrix

x_all = [] 
y_all = [] 
keymodes = [] 
files = []

# repeat the process with all the midi files
for f in glob.glob(str(data_dir/"**/*.mid*")):
  # print(f)
  try:
    # sop, alt, bass, keymode = read_midi(f, True, 64)
    sop, alt, bass, keymode = read_midi(f, True, 64)
    for p in sop:
      input_ingredients = p[:2]
      target_ingredients = p[3:4]
      x_all.append(input_ingredients)
      y_all.append(target_ingredients)

    # add pianorolls which have been added rest elements to the lists
    
  # throw exception for midi data which can not be used
  except UnsupportedMidiFileException:
    print("nah")
n_notes = len(x_all) # Not sure if it is correct
print("n_notes!",n_notes)

# convert x_all and y_all into NumPy array to make them more useful later

# Tomb added. These three lines of code is for getting the number of pitch range
# The pitch range is 49 at this point as rest_node dim is added, refer to prof's docu
x_all = np.array(x_all) # shape(31680, 2)  495 * 64=31680
y_all = np.array(y_all) # shape(31680, 1)

# print(x_all.shape[2])
# pitch_range = x_all.shape[2] # not used yet

# x_all = np.array(x_all)
# y_all = np.array(y_all)

# You get an error with this  code but can see the shape
# x = pd.DataFrame(y_all)
# print(x)

# print("x_all shape",x_all.shape)
# print("y_all shape",y_all.shape)
# print(x_all[:11])
# print(y_all[:11])

nah
nah
nah
n_notes! 31680


In [35]:
"""get inputs (Xn, Xn+1) and target (Xn+2) """
def make_sequences(
        dataset: tf.data.Dataset,
        seq_length:int,
) -> tf.data.Dataset:
    
    seq_length = seq_length+1

    windows = dataset.window(seq_length, shift=1, stride=1, drop_remainder=True)
    # for w in windows:
    #    print(f"window!! {list(w.as_numpy_iterator())}")
    
    flatten = lambda x: x.batch(seq_length, drop_remainder=True) # Assing lambda function to the variable "flatten"
    sequences = windows.flat_map(flatten) # Flat_map falltens the "dataset of datasets" into a dataset of tensors

    def split_labels(sequences):
        inputs = sequences[:-1] # Could not see the inside as this func got in through map_func
        output_dense = sequences[-1]
        return inputs, output_dense
    
    return sequences.map(split_labels, num_parallel_calls=tf.data.AUTOTUNE)

In [36]:
sop_tf_data = [] #  A list which contains (64 ,49) tf.daset elements
sop, alt, bass, keymode = read_midi(f, True, 64)
# sop = sop[0]
# sop_tf = tf.data.Dataset.from_tensor_slices(sop)
# print(list(sop_tf.as_numpy_iterator()))

for i in sop:
    sop_tf = tf.data.Dataset.from_tensor_slices(i)
    sop_tf_data.append(sop_tf)

# for t in sop_tf_data:
    # print(list(t.as_numpy_iterator()))

single_sop_tf = sop_tf_data[0]

In [111]:
seq_length = 2
batch_size = 64
buffer_size = n_notes - seq_length

# print(make_sequences(single_sop_tf, seq_length))

seq_ds = make_sequences(single_sop_tf, seq_length) # 47 elements each input n output
# type <'tensorflow _ParallelMapDataset'>
 #(TensorSpec(shape=(50,), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.float64, name=None)


seqList = []
targetList = []

#<Check the dataset elements>
for seq, target in seq_ds:
   seqList.append(seq)
   targetList.append(target)

  # print('Input sequence shape:', seq.shape) # (2,)
  # print('Input sequence elements:', seq[0: 10]) 
  # print('target:', target) # shape ()
  # print()

seqList = np.expand_dims(seqList, axis=0)
targetList = np.expand_dims(targetList, axis=0)

print(seq_ds.element_spec)
# print(list(seq_ds.as_numpy_iterator())) # [(array([0., 0.]), 0.0), (array([0., 0.]), 0.0)...


47
(TensorSpec(shape=(2,), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.float64, name=None))


In [112]:
print(seqList.shape)
print(targetList.shape)

input_shape = len(seqList[0])

model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(
   128, input_shape=(input_shape,2),use_bias=True, activation="tanh", return_sequences=False
))
model.add(tf.keras.layers.Dense(
    47, use_bias=True, activation="linear"))

model.summary()

model.compile(optimizer="adam", loss="categorical_crossentropy",
 metrics="categorical_accuracy")

model.fit(seqList, targetList, epochs=10)


(1, 47, 2)
(1, 47)
Model: "sequential_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_17 (LSTM)              (None, 128)               67072     
                                                                 
 dense_55 (Dense)            (None, 47)                6063      
                                                                 
Total params: 73135 (285.68 KB)
Trainable params: 73135 (285.68 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x29a6a6fa0>

In [80]:
"""
This went through but sucks
seq_ds is a list of tuples where each tuple contains 
a pair consisting of an array of shape (2,) and a target value."""

# # Split the dataset into input sequences and target values
# input_sequences = np.array([data[0] for data in seq_ds])
# targets = np.array([data[1] for data in seq_ds])

# # Define the model using the functional API
# input_1 = tf.keras.layers.Input(shape=(2,))
# input_2 = tf.keras.layers.Input(shape=(2,))


# concatenated = tf.keras.layers.Concatenate()([input_1, input_2])# Concatenate the inputs
# hidden_layer = tf.keras.layers.Dense(4, activation='relu')(concatenated)# Dense layers for processing the concatenated inputs
# output_layer = tf.keras.layers.Dense(1, activation='linear')(hidden_layer)

# model = tf.keras.Model(inputs=[input_1, input_2], outputs=output_layer)# Define the model with multiple inputs and a single output

# model.compile(optimizer='adam', loss='mean_squared_error') # Compile the model

# model.summary()

# model.evaluate([input_sequences, input_sequences], return_dict=True) 

# # Train the model on the provided data
# model.fit([input_sequences, input_sequences], targets, batch_size=32, epochs=100) 


Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_11 (InputLayer)       [(None, 2)]                  0         []                            
                                                                                                  
 input_12 (InputLayer)       [(None, 2)]                  0         []                            
                                                                                                  
 concatenate_5 (Concatenate  (None, 4)                    0         ['input_11[0][0]',            
 )                                                                   'input_12[0][0]']            
                                                                                                  
 dense_38 (Dense)            (None, 4)                    20        ['concatenate_5[0][0]'] 

<keras.src.callbacks.History at 0x2994d5dc0>