<a href="https://colab.research.google.com/github/PhillDev-coder256/MelodyNet-AI-Music-Composer/blob/main/MelodyNet_AI_Music_Composer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
import zipfile
from pathlib import Path

# Define the path to the zip file and the destination folder
zip_file_path = '/content/drive/MyDrive/Lo-Fi Hip Hop MIDIs.zip'
extract_to_folder = '/content/drive/MyDrive/Songs'

# Create the destination folder if it doesn't exist
Path(extract_to_folder).mkdir(parents=True, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to_folder)

print(f'Files extracted to: {extract_to_folder}')


Files extracted to: /content/drive/MyDrive/Songs


In [11]:
from pathlib import Path

songs = []
folder = Path('/content/drive/MyDrive/Songs')
for file in folder.rglob('*.mid'):
  songs.append(file)
  print(file)
  print(len(songs))

/content/drive/MyDrive/Songs/1.mid
1
/content/drive/MyDrive/Songs/10.mid
2
/content/drive/MyDrive/Songs/11.mid
3
/content/drive/MyDrive/Songs/12.mid
4
/content/drive/MyDrive/Songs/13.mid
5
/content/drive/MyDrive/Songs/14.mid
6
/content/drive/MyDrive/Songs/15.mid
7
/content/drive/MyDrive/Songs/16.mid
8
/content/drive/MyDrive/Songs/17.mid
9
/content/drive/MyDrive/Songs/18.mid
10
/content/drive/MyDrive/Songs/19.mid
11
/content/drive/MyDrive/Songs/2.mid
12
/content/drive/MyDrive/Songs/20.mid
13
/content/drive/MyDrive/Songs/3.mid
14
/content/drive/MyDrive/Songs/4.mid
15
/content/drive/MyDrive/Songs/5.mid
16
/content/drive/MyDrive/Songs/6.mid
17
/content/drive/MyDrive/Songs/7.mid
18
/content/drive/MyDrive/Songs/8.mid
19
/content/drive/MyDrive/Songs/9.mid
20
/content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 1 - C Maj.mid
21
/content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 10 - F Min.mid
22
/content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 11 - A Maj.mid
23
/content/drive/MyDrive/

In [14]:
import random
# Get a subset of 1000 songs
result =  random.sample([x for x in songs], 90)

In [15]:
from music21 import converter, instrument, note, chord
notes = []
for i,file in enumerate(result):
    print(f'{i+1}: {file}')
    try:
      midi = converter.parse(file)
      notes_to_parse = None
      parts = instrument.partitionByInstrument(midi)
      if parts: # file has instrument parts
          notes_to_parse = parts.parts[0].recurse()
      else: # file has notes in a flat structure
          notes_to_parse = midi.flat.notes
      for element in notes_to_parse:
          if isinstance(element, note.Note):
              notes.append(str(element.pitch))
          elif isinstance(element, chord.Chord):
              notes.append('.'.join(str(n) for n in element.normalOrder))
    except:
      print(f'FAILED: {i+1}: {file}')

1: /content/drive/MyDrive/Songs/Cymatics - Lofi MIDI 3 - C Min.mid
2: /content/drive/MyDrive/Songs/Piano Chords MIDI.mid
3: /content/drive/MyDrive/Songs/5.mid
4: /content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 16 - A Maj.mid
5: /content/drive/MyDrive/Songs/Cymatics - Lofi MIDI 19 - G Maj.mid
6: /content/drive/MyDrive/Songs/8.mid
7: /content/drive/MyDrive/Songs/Rhodes MIDI (6).mid
8: /content/drive/MyDrive/Songs/Rhodes MIDI (3).mid
9: /content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 20 - A Min.mid
10: /content/drive/MyDrive/Songs/13.mid
11: /content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 9 - F Min.mid
12: /content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 22 - B Min.mid
13: /content/drive/MyDrive/Songs/Cymatics - Lofi MIDI 6 - D Min.mid
14: /content/drive/MyDrive/Songs/Rhodes MIDI (2).mid
15: /content/drive/MyDrive/Songs/Piano 1 MIDI.mid
16: /content/drive/MyDrive/Songs/15.mid
17: /content/drive/MyDrive/Songs/Cymatics - Eternity MIDI 11 - A Maj.mid
18: /content/drive

In [31]:
import pickle
import numpy as np
# from keras.utils import np_utils # Remove this line as np_utils is no longer available
# from keras.utils import to_categorical # Use to_categorical for one-hot encoding if needed
from tensorflow.keras.utils import to_categorical  # Correct import for to_categorical

# Save the notes to a file
with open('notes', 'wb') as filepath:
  pickle.dump(notes, filepath)

In [47]:
def prepare_sequences(notes, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 32

    # Get all unique pitchnames
    pitchnames = sorted(set(item for item in notes))
    numPitches = len(pitchnames)

    # Create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # Create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)

    # Reshape the input into a format compatible with LSTM layers
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    # Normalize input
    network_input = network_input / float(n_vocab)

    # One-hot encode the output
    network_output = to_categorical(network_output, num_classes=n_vocab)

    return network_input, network_output

# Example usage
# Replace 'notes' with your actual list of notes
n_vocab = len(set(notes))
network_input, network_output = prepare_sequences(notes, n_vocab)

In [48]:
import numpy as np
import pandas as pd
from tensorflow.keras.utils import to_categorical

def oversample(network_input, network_output, sequence_length=15):
    n_patterns = len(network_input)

    # Flatten network_input from (n_patterns, sequence_length, 1) to (n_patterns, sequence_length)
    network_input_flattened = network_input.reshape(n_patterns, sequence_length)

    # Create a DataFrame from the two matrices
    new_df = pd.concat([pd.DataFrame(network_input_flattened), pd.DataFrame(network_output)], axis=1)

    # Rename the columns to numbers and Notes
    new_df.columns = [x for x in range(sequence_length)] + ['Notes']

    print(new_df.tail(20))
    print('###################################################')
    print(f'Distribution of notes in the pre-oversampled DataFrame: {new_df["Notes"].value_counts()}')

    # Oversampling
    oversampled_df = new_df.copy()
    max_class_size = 700
    print('Size of biggest class: ', max_class_size)

    class_subsets = [oversampled_df.query('Notes == ' + str(i)) for i in range(len(new_df["Notes"].unique()))]

    for i in range(len(new_df['Notes'].unique())):
        try:
            class_subsets[i] = class_subsets[i].sample(max_class_size, random_state=42, replace=True)
        except:
            print(i)

    oversampled_df = pd.concat(class_subsets, axis=0).sample(frac=1.0, random_state=42).reset_index(drop=True)

    print('###################################################')
    print(f'Distribution of notes in the oversampled DataFrame: {oversampled_df["Notes"].value_counts()}')

    # Get a sample from the oversampled DataFrame
    sampled_df = oversampled_df.sample(n_patterns, replace=True)

    print('###################################################')
    print(f'Distribution of notes in the oversampled post-sampled DataFrame: {sampled_df["Notes"].value_counts()}')

    # Convert the training columns back to a 3D array
    network_in = sampled_df[[x for x in range(sequence_length)]]
    network_in = np.array(network_in)
    network_in = np.reshape(network_in, (n_patterns, sequence_length, 1))
    network_in = network_in / len(set(new_df['Notes']))  # Normalization
    print(network_in.shape)

    # Converts the target column into a OneHot encoded matrix
    network_out = pd.get_dummies(sampled_df['Notes'])
    print(network_out.shape)

    return network_in, network_out

# Example variables
# Make sure these are defined correctly in your actual environment
networkInput = np.random.randint(0, 10, size=(100, 15, 1))  # Example input
networkOutput = np.random.randint(0, 10, size=(100, 1))     # Example output
seqLength = 15

# Call the oversample function
networkInputShaped, networkOutputShaped = oversample(networkInput, networkOutput, sequence_length=seqLength)
networkOutputShaped = to_categorical(networkOutputShaped)
print(networkInputShaped.shape)
print(networkOutputShaped.shape)
print(networkOutputShaped)


    0  1  2  3  4  5  6  7  8  9  10  11  12  13  14  Notes
80  9  0  2  0  3  1  2  4  2  6   9   3   9   3   7      7
81  4  4  9  1  1  0  3  9  6  7   0   3   4   3   9      9
82  5  5  3  3  8  7  8  6  8  6   5   7   4   3   0      5
83  8  2  2  8  0  0  2  8  9  2   5   0   4   6   5      1
84  6  4  4  9  3  2  2  3  8  7   1   3   2   8   5      6
85  3  6  7  7  6  9  7  8  3  2   6   9   9   9   1      3
86  3  9  3  2  5  2  7  9  2  5   4   8   9   1   0      2
87  6  5  2  2  7  8  3  1  8  2   6   6   6   5   3      1
88  0  7  1  5  7  5  8  8  5  4   5   2   7   1   5      3
89  4  7  2  1  6  2  2  3  4  3   8   4   2   7   4      3
90  4  7  1  3  4  7  7  9  7  6   0   3   1   6   3      0
91  3  1  0  2  1  5  4  0  4  7   6   3   8   8   5      1
92  0  6  5  3  1  5  9  7  7  8   5   6   2   9   7      4
93  0  9  6  2  5  2  3  0  3  6   3   1   6   5   8      2
94  6  6  9  4  5  8  1  1  3  7   3   9   6   0   5      3
95  1  2  2  5  6  4  7  9  9  8   9   0

Now that we are done processing our songs, we now move on to training our model. But first, to recap what we have done so far, we have:

Collected our MIDI files
Loaded the MIDI files into memory
Transformed the MIDI files into a list of sequenced notes/chords
Transformed the list into a (n, m, 1) matrix and (n, 1) vector (n = 99968, m = 32)
For our model, we will be using an LSTM network to predict the 33rd note/chord taking into account our previous 32 notes/chords. We will be using LSTMs because of it’s feedback connections. They are very useful when dealing with sequenced data.

LSTMs are a type of recurrent neural network, but are different from other networks. Other networks repeat the module each time the entry receives new information. However, the LSTM will remember the problem longer and has a string-like structure to repeat the module.

LSTM are basically units as depicted:


Image taken from https://en.wikipedia.org/wiki/Long_short-term_memory
An LSTM unit is composed of a cell, an input gate, an output gate and a forget gate. Let’s take a look at what this means, and why LSTMs are good for sequential data.

The job of the forget gate is to decide whether to keep or forget the information. Only the information that comes from previously hidden layers and the current input is kept with the sigmoid function. Any value closer to one will remain, and any value closer to zero will disappear.

The input gate helps to update the status of the cells. The current input and previous state information is passed through the sigmoid function, which will update the value by multiplying it by 0 and 1. Similarly, to regulate the network, the data also goes through the tanh function. Now, the output of the sigmoid is multiplied by the output of tanh. The output of the sigmoid will identify valuable information to avoid the output of tanh.

The output gate determines the value of the next hidden state. To find the hidden state information, we need to multiply the sigmoid output by the tanh output. Now the new hidden state and the new cell state will travel to the next step.

When training an LSTM network it is requieres to use a GPU. In my case, I used Google Colab Pro when training the neural network. Google Colab has a set limit of compute units we can use when training with GPUs. You can use the free GPU for a couple of dozen of epochs.

In [49]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
import tensorflow as tf

In [50]:
# pitchnames = sorted(set(notes))  # Assuming notes is defined
# numPitches = len(pitchnames)

In [56]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Activation
from tensorflow.keras.utils import to_categorical

# Define the sequence length and number of features
sequence_length = 32  # Update as needed
num_features = networkInputShaped.shape[2]  # Ensure this matches your input features

# Define numPitches based on your data
pitchnames = sorted(set(notes))  # Replace notes with your actual notes list
numPitches = len(pitchnames)  # Ensure this matches the number of classes

# Ensure networkOutputShaped is one-hot encoded correctly
networkOutputShaped = to_categorical(networkOutput, num_classes=numPitches)

# Define the model
model = Sequential()
model.add(Dropout(0.2))
model.add(LSTM(512, input_shape=(sequence_length, num_features), return_sequences=True))
model.add(Dense(256))
model.add(Dense(256))
model.add(LSTM(512, return_sequences=True))
model.add(Dense(256))
model.add(LSTM(512))
model.add(Dense(numPitches))  # Match this to the number of classes
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# Print model summary to verify the shape and parameters
print(model.summary())

# Train the model
history = model.fit(networkInputShaped, networkOutputShaped, epochs=num_epochs, batch_size=64, callbacks=callbacks_list)


None
Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 718ms/step - accuracy: 0.0150 - loss: 5.4846 
Epoch 1: loss improved from inf to 5.47057, saving model to weights-improvement-01-5.4706-bigger_1.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 1s/step - accuracy: 0.0200 - loss: 5.4799  
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 677ms/step - accuracy: 0.1147 - loss: 4.5954
Epoch 2: loss improved from 5.47057 to 4.23961, saving model to weights-improvement-02-4.2396-bigger_1.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 966ms/step - accuracy: 0.1165 - loss: 4.4768
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1609 - loss: 3.4789
Epoch 3: loss improved from 4.23961 to 3.39187, saving model to weights-improvement-03-3.3919-bigger_1.keras
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.1573 - loss: 3.4499

What do we do when we’re done training our network? We choose a random number from 0 to the length of the network input, this will be the index of the row in the training matrix which we will use to make our predictions. We take this sequence of 32 notes/chords as starting point to make a prediction of 1 note. After this, we do this (n — 1) more times (n being 500 in this case). In every prediction we move a window of 32 notes/chords one element to the right. In other words, in the second prediction, once we have predicted one note/chord, we eliminate the first note, and our first prediction becomes the last note/chord in the sequence of length 32. The following images show the previously explained code

In [59]:
!pip install pretty_midi

Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m46.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.2-py3-none-any.whl.metadata (6.4 kB)
Collecting packaging~=23.1 (from mido>=1.1.16->pretty_midi)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Downloading mido-1.3.2-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading packaging-23.2-py3-none-any.whl (53 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty_midi: filename=pretty_midi-0.2.

In [61]:
import pretty_midi
import os

def load_notes_from_midi(directory):
    all_notes = []
    for filename in os.listdir(directory):
        if filename.endswith('.mid'):
            midi_path = os.path.join(directory, filename)
            midi_data = pretty_midi.PrettyMIDI(midi_path)
            for instrument in midi_data.instruments:
                for note in instrument.notes:
                    all_notes.append(note.pitch)  # Use note.name if you prefer note names
    return all_notes

# Load all notes from the /Songs directory
directory = '/content/drive/MyDrive/Songs'  # Replace with the actual path to your Songs directory
allNotes = load_notes_from_midi(directory)



In [64]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # pick a random sequence from the input as a starting point for the prediction
    # Selects a random row from the network_input
    start = numpy.random.randint(0, len(network_input)-1)
    print(f'start: {start}')
    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))

    # Random row from network_input
    pattern = network_input[start]
    prediction_output = []

    # generate 500 notes
    for note_index in range(500):
        # Reshapes pattern into a vector
        prediction_input = numpy.reshape(pattern, (1, len(pattern), 1))
        # Standarizes pattern
        prediction_input = prediction_input / float(n_vocab)

        # Predicts the next note
        prediction = model.predict(prediction_input, verbose=0)

        # Outputs a OneHot encoded vector, so this picks the columns
        # with the highest probability
        index = numpy.argmax(prediction)
        # Maps the note to its respective index
        result = int_to_note[index]
        # Appends the note to the prediction_output
        prediction_output.append(result)

        # Adds the predicted note to the pattern
        pattern = numpy.append(pattern,index)
        # Slices the array so that it contains the predicted note
        # eliminating the first from the array, so the model can
        # have a sequence
        pattern = pattern[1:len(pattern)]

    return prediction_output

n_vocab = len(set(allNotes))
pitchnames = sorted(set(item for item in allNotes))
prediction_output = generate_notes(model, networkInputShaped, pitchnames, n_vocab)

start: 33


With that, we’ve generated our output sequence of 500 notes, that looks like this

How do we turn this array of notes/chords back into a MIDI? That’s where music21 comes back into play! This library not only lets us transform MIDI into an array, it lets us transform an array back into a MIDI!

In [63]:
def create_midi(prediction_output):
    offset = 0
    output_notes = []
#
    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5
    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp='output.mid')

And there we have our prediction output as a MIDI! Now you just have to import it to your DAW and see what it sounds like! I recommend we should fine tune the model and play around with it to see what we can produce. I added some ambient noises and drums to one of the predictions, here are the results:

In [72]:
from music21 import note, chord, stream, instrument

def create_midi(prediction_output, output_file='/content/drive/MyDrive/Songs/output'):
    offset = 0
    output_notes = []

    # Create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        pattern_str = str(pattern)  # Convert pattern to string

        # pattern is a chord
        if ('.' in pattern_str) or pattern_str.isdigit():
            notes_in_chord = pattern_str.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(int(pattern))
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # Increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp=output_file)

# Define the path where you want to save the MIDI file
output_file_path = '/content/drive/MyDrive/Songs/output/output.mid'

# Call the function to create and save the MIDI file
create_midi(prediction_output, output_file_path)


In [66]:
!pwd

/content
