In [1]:
import torch
import numpy as np
import pandas
import music21
from music21 import *
import os
import IPython
from PIL import Image
from collections import Counter
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.nn.functional as F
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F

Matplotlib is building the font cache; this may take a moment.


In [2]:
path = 'dataset/mozart'
all_midis = []

for i in os.listdir(path):
    if i.endswith('.mid'):
        tr = os.path.join(path, i)
        try:
            midi = music21.converter.parse(tr)
            all_midis.append(midi)
        except Exception as e:
            print(f"Error loading {tr}: {e}")

print(f"Loaded {len(all_midis)} MIDI files.")




Loaded 21 MIDI files.


In [3]:
def extract_notes(file):
    notes = []
    pick = None
    for f in file:
        song = instrument.partitionByInstrument(f)
        for part in song.parts:
            pick = part.recurse()
            for element in pick:
                if isinstance(element, note.Note):
                    notes.append(str(element.pitch))
                elif isinstance(element, chord.Chord):
                    notes.append('.'.join(str(n) for n in element.normalOrder))
    return notes
notes = extract_notes(all_midis)
print(len(notes))

55802


In [4]:
def show(music):
    music.write('midi', 'img/output.mid')

def chords_n_notes(notes):
    melody = []
    offset = 0
    for n in notes:
        if ('.' in n) or n.isdigit():
            chord_notes = n.split('.')
            note_objs = [note.Note(int(j)) for j in chord_notes]
            chord_snip = chord.Chord(note_objs)
            chord_snip.offset = offset
            melody.append(chord_snip)
        else:
            note_snip = note.Note(n)
            note_snip.offset = offset
            melody.append(note_snip)
        offset += 1

    melody_midi = stream.Stream(melody)
    return melody_midi

# melody_midi = chords_n_notes(notes)
# show(melody_midi)


In [5]:
count_num = Counter(notes)
print("Total unique notes in the Corpus:", len(count_num))
keys = list(count_num.keys())
values = list(count_num.values())

def Average(lst):
    return sum(lst) / len(lst)
print("Average recurrenc for a note in Corpus:", Average(values))
print("Most frequent note in Corpus appeared:", max(values), "times")
print("Least frequent note in Corpus appeared:", min(values), "time")

Total unique notes in the Corpus: 229
Average recurrenc for a note in Corpus: 243.6768558951965
Most frequent note in Corpus appeared: 2355 times
Least frequent note in Corpus appeared: 1 time


In [6]:
#Getting a list of rare chords
rare_note = []
for index, (key, value) in enumerate(count_num.items()):
    if value < 100:
        m =  key
        rare_note.append(m)

print("Total number of notes that occur less than 100 times:", len(rare_note))

Total number of notes that occur less than 100 times: 145


In [7]:
symb = sorted(list(set(notes)))

L_corpus = len(notes)
L_symb = len(symb)

mapping = dict((c, i) for i, c in enumerate(symb))
reverse_mapping = dict((i, c) for i, c in enumerate(symb))

print("Total number of characters:", L_corpus)
print("Number of unique characters:", L_symb)

Total number of characters: 55802
Number of unique characters: 229


In [8]:
length = 50
features = []
targets = []
for i in range(0, L_corpus - length, 1):
    feature = notes[i:i + length]
    target = notes[i + length]
    features.append([mapping[j] for j in feature])
    targets.append(mapping[target])


L_datapoints = len(targets)
print("Total number of sequences in the Corpus:", L_datapoints)

Total number of sequences in the Corpus: 55752


In [9]:
X = (np.reshape(features, (L_datapoints, length, 1)))/ float(L_symb)
y = torch.nn.functional.one_hot(torch.tensor(targets))

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [11]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train)
y_train_tensor = torch.argmax(y_train_tensor, dim=1)

X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val)
y_val_tensor = torch.argmax(y_val_tensor, dim=1)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test)
y_test_tensor = torch.argmax(y_test_tensor, dim=1)

  y_train_tensor = torch.tensor(y_train)
  y_val_tensor = torch.tensor(y_val)
  y_test_tensor = torch.tensor(y_test)


In [12]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size_1=512, hidden_size_2=256, dropout_rate=0.2, output_size=None):
        super(LSTMModel, self).__init__()

        self.lstm1 = nn.LSTM(input_size, hidden_size_1, num_layers=3, batch_first=True, bidirectional=True)
        self.norm1 = nn.LayerNorm(hidden_size_1 * 2)

        self.lstm2 = nn.LSTM(hidden_size_1 * 2, hidden_size_2, num_layers=3, batch_first=True, bidirectional=True)
        self.norm2 = nn.LayerNorm(hidden_size_2 * 2)

        self.lstm3 = nn.LSTM(hidden_size_2 * 2, hidden_size_2, num_layers=3, batch_first=True, bidirectional=True)
        self.norm3 = nn.LayerNorm(hidden_size_2*2)

        self.bn1 = nn.BatchNorm1d(hidden_size_2*2)
        self.fc1 = nn.Linear(hidden_size_2 * 2, hidden_size_2)
        self.mlp = nn.Sequential(
            nn.Linear(hidden_size_2, hidden_size_2),
            nn.GELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size_2, hidden_size_2),
            nn.GELU(),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size_2, hidden_size_2),
        )
        self.dropout1 = nn.Dropout(dropout_rate)
        self.dropout2 = nn.Dropout(dropout_rate)

        # self.fc2 = nn.Linear(hidden_size_2, output_size)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.norm1(x)

        x, _ = self.lstm2(x)
        x = self.norm2(x)

        x, _ = self.lstm3(x)
        x = self.norm3(x)

        x = x[:, -1, :]
        x = self.bn1(x)
        x = F.gelu(self.fc1(x))
        x = self.dropout2(x)

        x = self.mlp(x)

        return x


In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = LSTMModel(input_size=X_train.shape[2], output_size=y.shape[1]).to(device)
optimizer = torch.optim.Adamax(model.parameters(), lr=0.0001)
criterion = torch.nn.CrossEntropyLoss()

batch_size = 256
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = torch.utils.data.TensorDataset(X_val_tensor, y_val_tensor)
val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True)


epochs = 200
best_val_loss = float('inf')
patience = 5
trigger_times = 0

train_loss_history = []
val_loss_history = []

for epoch in range(1, epochs + 1):
    print(f"Epoch {epoch}/{epochs}")
    model.train()
    epoch_train_loss = 0
    epoch_val_loss = 0
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        optimizer.zero_grad()
        y_pred = model(batch_X)
        loss = criterion(y_pred, batch_y)
        loss.backward()
        optimizer.step()
        epoch_train_loss += loss.item()

    epoch_train_loss /= len(train_loader)
    train_loss_history.append(epoch_train_loss)

    model.eval()
    with torch.no_grad():
      for batch_X, batch_y in val_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        val_pred = model(batch_X)
        val_loss = criterion(val_pred, batch_y)
        epoch_val_loss += val_loss.item()

      val_loss_history.append(epoch_val_loss / len(val_loader))

    if epoch % 10 == 0:
        plt.figure(figsize=(10, 5))
        plt.plot(train_loss_history, label='Train Loss')
        plt.plot(val_loss_history, label='Validation Loss')
        plt.title(f'Epoch {epoch} - Training and Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)
        plt.show()
        plt.clf()
        plt.close()

Epoch 1/200


KeyboardInterrupt: 

In [None]:
def melody_generator(note_count, model, X_test, reverse_mapping, L_symb, temperature=1.0):
    model.eval()

    with torch.no_grad():
        music = []
        notes_generated = []

        for _ in range(note_count):
            seed = X_test[np.random.randint(0, len(X_test) - 1)]
            seed_tensor = torch.tensor(seed, dtype=torch.float32).unsqueeze(0).to(device)
            prediction = model(seed_tensor)
            prediction = prediction.squeeze(0)

            prediction = torch.log_softmax(prediction / temperature, dim=-1)
            prediction_probs = torch.exp(prediction)
            index = torch.argmax(prediction_probs).item()

            index_N = index / float(L_symb)
            notes_generated.append(index)
            music.append(reverse_mapping[index])

            seed = np.append(seed, [[index_N]], axis=0)
            seed = seed[1:]

        melody = chords_n_notes(music)
        melody_midi = stream.Stream(melody)

    return music, melody_midi

music_notes, melody = melody_generator(50, model, X_test, reverse_mapping, L_symb, temperature=1.0)
melody.write('midi', 'drive/MyDrive/Colab Notebooks/output.mid')
