In [2]:
import os
import re
import torch
import wandb
import numpy as np
import torch.nn as nn
from torch.optim import lr_scheduler
from music21 import converter, instrument, note, chord
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split

In [7]:
import os
import librosa
import vamp
import pretty_midi

def extract_melody_to_midi(source_folder, target_folder):
    for subdir, dirs, files in os.walk(source_folder):
        for file in files:
            filepath = os.path.join(subdir, file)
            if filepath.endswith(".mp3") or filepath.endswith(".wav"):
                print(f"Processing file: {filepath}")
                
                # Load audio file
                y, sr = librosa.load(filepath, sr=None)
                
                # Extract melody using Melodia
                params = {"voicing": 0.2}
                melody, timestamps = vamp.collect(y, sr, "mtg-melodia:melodia", parameters=params)
                pitch = melody['vector'][0]
                
                # Create a PrettyMIDI object
                midi = pretty_midi.PrettyMIDI()
                instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program('Acoustic Grand Piano'))
                
                # Convert extracted pitch to notes
                notes = [pretty_midi.Note(velocity=100, pitch=int(p), start=t, end=t+0.1) for p, t in zip(pitch, timestamps) if p > 0]
                instrument.notes.extend(notes)
                midi.instruments.append(instrument)
                
                # Ensure target directory exists
                target_subdir = os.path.join(target_folder, os.path.basename(subdir) + "_midi")
                if not os.path.exists(target_subdir):
                    os.makedirs(target_subdir)
                
                # Save MIDI file
                midi_file_path = os.path.join(target_subdir, file.replace('.mp3', '.mid').replace('.wav', '.mid'))
                midi.write(midi_file_path)
                print(f"MIDI file saved to: {midi_file_path}")

extract_melody_to_midi('DATA', 'DATA_MIDI')

ModuleNotFoundError: No module named 'vamp'

In [2]:
def load_midi_files(midi_folder):
    notes = []
    for file in os.listdir(midi_folder):
        midi = converter.parse(os.path.join(midi_folder, file))
        notes_to_parse = None
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except:
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                if re.match(r"[A-G](#|-)?\d", str(element.pitch)):
                    notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                # Filter each note in the chord
                chord_notes = '.'.join(str(n) for n in element.normalOrder if re.match(r"[A-G](#|-)?\d", str(n)))
                if chord_notes:
                    notes.append(chord_notes)
    return notes

notes = load_midi_files('indian_classical')
# Extract the unique pitches in the dataset
pitchnames = sorted(set(item for item in notes))
# Create a dictionary to map pitches to integers
note_to_int = {note: num for num, note in enumerate(pitchnames)}

In [None]:
def load_midi_files(midi_folder):
    notes = []
    for file in os.listdir(midi_folder):
        midi = converter.parse(os.path.join(midi_folder, file))
        notes_to_parse = None
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse() 
        except:
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, note.Note):
                if re.match(r"[A-G](#|-)?\d", str(element.pitch)):
                    notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                # Filter each note in the chord
                chord_notes = '.'.join(str(n) for n in element.normalOrder if re.match(r"[A-G](#|-)?\d", str(n)))
                if chord_notes:
                    notes.append(chord_notes)
    return notes

notes = load_midi_files('indian_classical')
# Extract the unique pitches in the dataset
pitchnames = sorted(set(item for item in notes))
# Create a dictionary to map pitches to integers
note_to_int = {note: num for num, note in enumerate(pitchnames)}

In [3]:
print(len(note_to_int))

54


In [4]:
# Prepare sequences
sequence_length = 100
network_input = []
network_output = []

for i in range(len(notes) - sequence_length):
    sequence_in = notes[i:i + sequence_length]
    sequence_out = notes[i + sequence_length]
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])

network_input = np.reshape(network_input, (len(network_input), sequence_length, 1))
network_input = torch.tensor(network_input / float(len(pitchnames)), dtype=torch.float32)
network_output = torch.tensor(network_output, dtype=torch.long)

# Assuming network_input and network_output are already defined
train_input, val_input, train_output, val_output = train_test_split(network_input, network_output, test_size=0.2, random_state=42)

train_dataset = TensorDataset(train_input, train_output)
val_dataset = TensorDataset(val_input, val_output)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False)


In [5]:
import torch
import torch.nn as nn

class AdvancedMusicLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(AdvancedMusicLSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2, dropout=0.3)
        self.batch_norm1 = nn.BatchNorm1d(hidden_size)
        self.lstm2 = nn.LSTM(hidden_size, hidden_size, batch_first=True, num_layers=2, dropout=0.3)
        self.batch_norm2 = nn.BatchNorm1d(hidden_size)
        self.fc1 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size // 2, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x, _ = self.lstm1(x)
        x = self.batch_norm1(x[:, -1, :])  # Apply batch normalization to the output of the last time step
        x, _ = self.lstm2(x.unsqueeze(1))
        x = self.batch_norm2(x[:, -1, :])
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x

model = AdvancedMusicLSTM(1, 512, len(pitchnames))

In [12]:
from torch.optim import lr_scheduler

def train_model_with_checkpoint(model, dataloader, validation_dataloader, epochs):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    model.to(device)
    criterion = nn.NLLLoss()  # Using NLLLoss which is suitable for LogSoftmax
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5, verbose=True)
    best_loss = float('inf')
    
    wandb.init(project="music-generation-3")

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_train_loss = total_loss / len(dataloader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {avg_train_loss}')
        wandb.log({"epoch": epoch + 1, "loss": avg_train_loss})

        # Validation phase
        if epoch % 10 == 0:
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for inputs, labels in validation_dataloader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()
            avg_val_loss = val_loss / len(validation_dataloader)
            print(f'Validation Phase, Loss: {avg_val_loss}')
            wandb.log({"Validation loss" : avg_val_loss})

        # Scheduler step
        scheduler.step(avg_val_loss)

        # Checkpoint model
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            torch.save(model.state_dict(), 'three-best_model.pth')
            print(f'Best model saved with validation loss: {best_loss}')

train_model_with_checkpoint(model, train_dataloader, validation_dataloader, 200)

cuda




0,1
epoch,▁
loss,▁

0,1
epoch,1.0
loss,3.01493


Epoch 1/200, Loss: 2.991222266227968
Validation Phase, Loss: 3.2523501457706576
Best model saved with validation loss: 3.2523501457706576
Epoch 2/200, Loss: 2.9472225096917923
Epoch 3/200, Loss: 2.910635079106977
Epoch 4/200, Loss: 2.8864362605156435
Epoch 5/200, Loss: 2.860309354720577
Epoch 6/200, Loss: 2.8316703977123385
Epoch 7/200, Loss: 2.8175263385618887
Epoch 8/200, Loss: 2.758324344311991
Epoch 9/200, Loss: 2.7206716249066014
Epoch 10/200, Loss: 2.7075700356114294
Epoch 11/200, Loss: 2.6989801333796595
Validation Phase, Loss: 3.979385322140109
Epoch 12/200, Loss: 2.670434671063577
Epoch 13/200, Loss: 2.66147312425798
Epoch 14/200, Loss: 2.6148676122388532
Epoch 15/200, Loss: 2.619694244477057
Epoch 16/200, Loss: 2.5680073903452967
Epoch 17/200, Loss: 2.5584173510151524
Epoch 18/200, Loss: 2.5601494293059073
Epoch 19/200, Loss: 2.55109824672822
Epoch 20/200, Loss: 2.5274645109330454
Epoch 21/200, Loss: 2.4983017848383997
Validation Phase, Loss: 2.7504618398604856
Best model sav

In [13]:
import torch
import numpy as np

# Assuming 'AdvancedMusicLSTM' is the class of your model
model = AdvancedMusicLSTM(input_size=1, hidden_size=512, output_size=len(pitchnames))  # Adjust parameters as necessary

# Load the model state
model_path = 'best_model.pth'  # Replace with your actual model path
model.load_state_dict(torch.load(model_path))
model.eval()

def generate_music(model, network_input, pitchnames, note_to_int, num_generate=500):
    """ Generate music given a sequence of notes """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # Pick a random sequence from the input as a starting point for the generation
    start = np.random.randint(0, len(network_input)-1)
    int_to_note = {num: note for note, num in note_to_int.items()}
    pattern = network_input[start].numpy().flatten().tolist()  # Ensure pattern is a list of integers

    prediction_output = []

    # Generate notes
    for note_index in range(num_generate):
        # Convert pattern to tensor and ensure it is in the correct shape
        prediction_input = torch.tensor([pattern], dtype=torch.float32).unsqueeze(-1).to(device)  # Add an extra dimension
        prediction = model(prediction_input)
        _, index = torch.max(prediction, 1)
        
        result = int_to_note[index.item()]
        prediction_output.append(result)
        
        # Update pattern by appending the new index and removing the first element
        pattern.append(index.item())
        pattern = pattern[1:]  # Ensure pattern remains a list of integers

    return prediction_output

# Generate a piece of music
generated_notes = generate_music(model, network_input, pitchnames, note_to_int)

In [14]:
from music21 import stream, note, chord, midi

def create_midi(prediction_output, output_path='output.mid'):
    """ Convert the output from the prediction to MIDI file """
    offset = 0
    output_notes = []

    # Create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # Pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # Pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # Increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    midi_stream.write('midi', fp=output_path)

# Create a MIDI file from the generated notes
create_midi(generated_notes, '2-out.mid')