In [2]:
# Importing Libraries
import tensorflow
import numpy as np
import pandas as pd
from collections import Counter
import random
import IPython
from IPython.display import Image, Audio
import music21
from music21 import *
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adamax
import seaborn as sns
import matplotlib.patches as mpatches
%matplotlib inline
import sys
import warnings
from hmmlearn import hmm  # Import the HMM library
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")
np.random.seed(42)
import os

filepath = "PR_Dataset/chopin/"

# Getting midi files
all_midis = []
for i in os.listdir(filepath):
    if i.endswith(".mid"):
        tr = filepath + i
        midi = converter.parse(tr)
        all_midis.append(midi)

def extract_notes(file):
    notes = []
    pick = None
    for j in file:
        songs = instrument.partitionByInstrument(j)
        for part in songs.parts:
            pick = part.recurse()
            for element in pick:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append(".".join(str(n) for n in element.normalOrder))
    return notes

def chords_n_notes(Snippet):
    Melody = []
    offset = 0  # Incremental
    for i in Snippet:
        # If it is a chord
        if ('.' in i or i.isdigit()):
            chord_notes = i.split('.')  # Separating the notes in the chord
            notes = []
            for j in chord_notes:
                inst_note = int(j)
                note_snip = note.Note(inst_note)
                notes.append(note_snip)
            chord_snip = chord.Chord(notes)
            chord_snip.offset = offset
            Melody.append(chord_snip)
        # Pattern is a note
        else:
            note_snip = note.Note(i)
            note_snip.offset = offset
            Melody.append(note_snip)
        # Increase offset each iteration so that notes do not stack
        offset += 1
    Melody_midi = stream.Stream(Melody)
    return Melody_midi

# Getting the list of notes as Corpus
Corpus = extract_notes(all_midis)
print("Total notes in all the Chopin midis in the dataset:", len(Corpus))

# Removing rare chords
count_num = Counter(Corpus)
rare_note = [key for key, value in count_num.items() if value < 100]
Corpus = [element for element in Corpus if element not in rare_note]
print("Length of Corpus after elimination of rare notes:", len(Corpus))

# Building dictionary to access the vocabulary from indices and vice versa
symb = sorted(list(set(Corpus)))
L_corpus = len(Corpus)
L_symb = len(symb)
mapping = dict((c, i) for i, c in enumerate(symb))
reverse_mapping = dict((i, c) for i, c in enumerate(symb))

print("Total number of characters:", L_corpus)
print("Number of unique characters:", L_symb)

# Prepare data for LSTM model
length = 40
features = []
targets = []
for i in range(0, L_corpus - length, 1):
    feature = Corpus[i:i + length]
    target = Corpus[i + length]
    features.append([mapping[j] for j in feature])
    targets.append(mapping[target])

L_datapoints = len(targets)
print("Total number of sequences in the Corpus:", L_datapoints)

# Reshape X and normalize
X = (np.reshape(features, (L_datapoints, length, 1))) / float(L_symb)
# One hot encode the output variable
y = tensorflow.keras.utils.to_categorical(targets)
X_train, X_seed, y_train, y_seed = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the LSTM Model
model = Sequential()
model.add(LSTM(512, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(256))
model.add(Dense(256))
model.add(Dropout(0.1))
model.add(Dense(y.shape[1], activation='softmax'))
opt = Adamax(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=opt)
model.summary()
history = model.fit(X_train, y_train, batch_size=256, epochs=100)

# Integrating Gaussian HMM
def train_gaussian_hmm(notes):
    # Convert notes to numerical values
    note_indices = [mapping[note] for note in notes]
    note_indices = np.array(note_indices).reshape(-1, 1)

    # Train Gaussian HMM
    model_hmm = hmm.GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000)
    model_hmm.fit(note_indices)
    return model_hmm

# Training the HMM
hmm_model = train_gaussian_hmm(Corpus)





Total notes in all the Chopin midis in the dataset: 57894
Length of Corpus after elimination of rare notes: 53110
Total number of characters: 53110
Number of unique characters: 127
Total number of sequences in the Corpus: 53070


Epoch 1/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m435s[0m 3s/step - loss: 4.7525
Epoch 2/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m529s[0m 3s/step - loss: 4.6168
Epoch 3/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m520s[0m 3s/step - loss: 4.6074
Epoch 4/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m564s[0m 3s/step - loss: 4.6084
Epoch 5/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m529s[0m 3s/step - loss: 4.6085
Epoch 6/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m518s[0m 3s/step - loss: 4.6036
Epoch 7/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m563s[0m 3s/step - loss: 4.5954
Epoch 8/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m516s[0m 3s/step - loss: 4.5292
Epoch 9/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m423s[0m 2s/step - loss: 4.5082
Epoch 10/100
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4

In [3]:
def Malody_Generator(Note_Count, save_as_midi=False, midi_filename='generated_melody.mid'):
    seed = X_seed[np.random.randint(0, len(X_seed) - 1)]
    Music = ""
    Notes_Generated = []

    # Initial sequence generated by the LSTM model
    for i in range(Note_Count):
        seed = seed.reshape(1, length, 1)
        prediction = model.predict(seed, verbose=0)[0]
        prediction = np.log(prediction) / 1.0  # diversity
        exp_preds = np.exp(prediction)
        prediction = exp_preds / np.sum(exp_preds)
        index = np.argmax(prediction)
        index_N = index / float(L_symb)
        Notes_Generated.append(index)
        seed = np.insert(seed[0], len(seed[0]), index_N)
        seed = seed[1:]

    # Incorporating HMM to modify the generated sequence
    hmm_sequence = hmm_model.sample(Note_Count)[0]
    hmm_sequence = np.round(hmm_sequence).astype(int).flatten()

    # Ensure indices are within valid range
    hmm_sequence = np.mod(hmm_sequence, L_symb)

    # Combine LSTM generated notes with HMM adjustments
    Music = [reverse_mapping[char] for char in hmm_sequence[:Note_Count]]

    # Create a music21 Stream object with notes and chords
    Melody = chords_n_notes(Music)

    # Save as MIDI file if specified
    if save_as_midi:
        midi_filename = midi_filename if midi_filename.endswith('.mid') else midi_filename + '.mid'
        Melody.write('midi', fp=midi_filename)
        print(f"Generated melody saved as {midi_filename}")

    return Music, Melody


In [4]:
# Generating a melody and saving it as a MIDI file
generated_music, generated_midi = Malody_Generator(Note_Count=100, save_as_midi=True, midi_filename='my_generated_melody.mid')


Generated melody saved as my_generated_melody.mid


In [5]:
def pitch_class_histogram(midi_stream):
    histogram = np.zeros(12)
    for el in midi_stream.flat.notes:
        if isinstance(el, note.Note):
            histogram[el.pitch.pitchClass] += 1
        elif isinstance(el, chord.Chord):
            for p in el.pitches:
                histogram[p.pitchClass] += 1
    return histogram / sum(histogram)

def note_range(midi_stream):
    pitches = []
    for el in midi_stream.flat.notes:
        if isinstance(el, note.Note):
            pitches.append(el.pitch.midi)
        elif isinstance(el, chord.Chord):
            pitches.extend([p.midi for p in el.pitches])
    return max(pitches) - min(pitches) if pitches else 0

def pitch_variety(midi_stream):
    pitches = set()
    for el in midi_stream.flat.notes:
        if isinstance(el, note.Note):
            pitches.add(el.pitch.midi)
        elif isinstance(el, chord.Chord):
            pitches.update([p.midi for p in el.pitches])
    return len(pitches)

def rhythmic_variety(midi_stream):
    rhythms = set()
    for el in midi_stream.flat.notes:
        rhythms.add(el.quarterLength)
    return len(rhythms)

def melodic_intervals(midi_stream):
    intervals = []
    prev_pitch = None
    for el in midi_stream.flat.notes:
        if isinstance(el, note.Note):
            pitch = el.pitch.midi
            if prev_pitch is not None:
                intervals.append(pitch - prev_pitch)
            prev_pitch = pitch
    return intervals


In [8]:
print("Pitch Class Histogram:", pitch_class_histogram(generated_midi))
# print("Note Range:", note_range(generated_music))
# print("Pitch Variety:", pitch_variety(generated_music))
# print("Rhythmic Variety:", rhythmic_variety(generated_music))
# print("Melodic Intervals:", melodic_intervals(generated_music))

Pitch Class Histogram: [0.07878788 0.09090909 0.06060606 0.12121212 0.06060606 0.0969697
 0.09090909 0.05454545 0.07272727 0.07272727 0.12727273 0.07272727]


In [11]:
print("Note Range:", note_range(generated_midi))
print("Pitch Variety:", pitch_variety(generated_midi))
print("Rhythmic Variety:", rhythmic_variety(generated_midi))
print("Melodic Intervals:", melodic_intervals(generated_midi))

Note Range: 53
Pitch Variety: 35
Rhythmic Variety: 1
