In [53]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import pretty_midi
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt

In [17]:
# Step 1: Load and Parse MIDI Files
def parse_midi(file_path):
    """
    Parse a MIDI file and extract note pitches.
    Each note is represented as (start_time, end_time, pitch).
    """
    midi_data = pretty_midi.PrettyMIDI(file_path)
    notes = []
    for instrument in midi_data.instruments:
        for note in instrument.notes:
            notes.append((note.start, note.end, note.pitch))
    return notes

def load_multiple_midi_files(folder_path):
    """
    Load and parse multiple MIDI files from a folder.
    """
    all_notes = []
    for file_path in tqdm(glob.glob(f"{folder_path}/*.midi"), desc="Processing MIDI files"):
        try:
            notes = parse_midi(file_path)
            all_notes.extend(notes)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    return all_notes

In [47]:
# Step 2: Preprocess Data
def preprocess_notes(notes, sequence_length=50):
    pitches = [note[2] for note in notes]
    unique_pitches = sorted(set(pitches))
    pitch_to_idx = {pitch: i for i, pitch in enumerate(unique_pitches)}
    idx_to_pitch = {i: pitch for pitch, i in pitch_to_idx.items()}
    
    encoded = [pitch_to_idx[p] for p in pitches]
    sequences = []
    targets = []
    for i in range(len(encoded) - sequence_length):
        sequences.append(encoded[i:i+sequence_length])
        targets.append(encoded[i+sequence_length])
    
    return np.array(sequences), np.array(targets), pitch_to_idx, idx_to_pitch

In [49]:
# Step 3: Define the Vanilla LSTM Model
def create_model(vocab_size, embed_size, hidden_size):
    model = Sequential()
    model.add(Embedding(input_dim=vocab_size, output_dim=embed_size, input_length=50))
    model.add(LSTM(hidden_size))  # Single LSTM layer (Vanilla LSTM)
    model.add(Dense(vocab_size, activation='softmax'))
    return model

In [9]:
# Step 4: Train the Model
def train_model(model, sequences, targets, batch_size=64, epochs=10):
    model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])
    history = model.fit(sequences, targets, batch_size=batch_size, epochs=epochs, shuffle=True)
    return history

In [23]:
# Step 5: Generate Music
def generate_sequence(model, start_sequence, idx_to_pitch, seq_length=100):
    generated = list(start_sequence)
    input_seq = np.array(start_sequence).reshape(1, -1)
    for _ in range(seq_length):
        output = model.predict(input_seq, verbose=0)
        predicted = np.argmax(output, axis=1)
        generated.append(predicted[0])
        input_seq = np.append(input_seq[:, 1:], [[predicted[0]]], axis=1)
    return [idx_to_pitch[idx] for idx in generated]

In [13]:
# Step 6: Save MIDI File
def save_to_midi(sequence, output_file):
    midi = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=0)
    start_time = 0
    for pitch in sequence:
        note = pretty_midi.Note(velocity=100, pitch=pitch, start=start_time, end=start_time + 0.5)
        instrument.notes.append(note)
        start_time += 0.5
    midi.instruments.append(instrument)
    midi.write(output_file)

In [25]:
if __name__ == "__main__":
    folder_path = 'C:/Users/HP/Downloads/train-20241205T124602Z-001/train/2011'  # Folder path containing MIDI files
    print("Loading MIDI files...")
    notes = load_multiple_midi_files(folder_path)  # Parse all files
    
    print("Preprocessing notes...")
    sequences, targets, pitch_to_idx, idx_to_pitch = preprocess_notes(notes)
    
    print(f"Total sequences: {len(sequences)}")
    
    vocab_size = len(pitch_to_idx)
    embed_size = 64
    hidden_size = 128
    num_layers = 2
    
    print("Creating model...")
    model = create_model(vocab_size, embed_size, hidden_size, num_layers)
    
    print("Starting training...")
    train_model(model, sequences, targets, batch_size=64, epochs=10)
    print("Training completed.")
    
    # Save the trained model (Optional)
    model.save('trained_music_lstm.h5')
    
    # Generate and Save MIDI
    start_sequence = sequences[0]  # You can choose a different starting sequence
    generated_sequence = generate_sequence(model, start_sequence, idx_to_pitch, seq_length=100)
    save_to_midi(generated_sequence, 'generated_music.midi')
    print("Generated MIDI saved as 'generated_music.midi'.")

Loading MIDI files...


Processing MIDI files: 100%|██████████| 163/163 [00:21<00:00,  7.44it/s]


Preprocessing notes...
Total sequences: 612014
Creating model...
Starting training...
Epoch 1/10
[1m9563/9563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m481s[0m 50ms/step - accuracy: 0.0807 - loss: 3.5518
Epoch 2/10
[1m9563/9563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 42ms/step - accuracy: 0.1537 - loss: 3.1250
Epoch 3/10
[1m9563/9563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m401s[0m 42ms/step - accuracy: 0.1996 - loss: 2.9676
Epoch 4/10
[1m9563/9563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m405s[0m 42ms/step - accuracy: 0.2266 - loss: 2.8655
Epoch 5/10
[1m9563/9563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m409s[0m 43ms/step - accuracy: 0.2460 - loss: 2.7899
Epoch 6/10
[1m9563/9563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m416s[0m 43ms/step - accuracy: 0.2622 - loss: 2.7290
Epoch 7/10
[1m9563/9563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m424s[0m 44ms/step - accuracy: 0.2720 - loss: 2.6881
Epoch 8/10
[1m9563/9563[0m [32m━━━━━



Training completed.
Generated MIDI saved as 'generated_music.midi'.


In [35]:
start_sequence = sequences[0]  # You can choose a different starting sequence
generated_sequence = generate_sequence(model, start_sequence, idx_to_pitch, seq_length=100)
save_to_midi(generated_sequence, 'generated_music.midi')
print("Generated MIDI saved as 'generated_music.midi'.")

Generated MIDI saved as 'generated_music.midi'.


In [37]:
start_sequence = sequences[5]  # You can choose a different starting sequence
generated_sequence = generate_sequence(model, start_sequence, idx_to_pitch, seq_length=100)
save_to_midi(generated_sequence, 'generated_music.midi')
print("Generated MIDI saved as 'generated_music.midi'.")

Generated MIDI saved as 'generated_music.midi'.


In [39]:
start_sequence = sequences[10]  # You can choose a different starting sequence
generated_sequence = generate_sequence(model, start_sequence, idx_to_pitch, seq_length=100)
save_to_midi(generated_sequence, 'generated_music.midi')
print("Generated MIDI saved as 'generated_music.midi'.")

Generated MIDI saved as 'generated_music.midi'.


In [41]:
start_sequence = sequences[15]  # You can choose a different starting sequence
generated_sequence = generate_sequence(model, start_sequence, idx_to_pitch, seq_length=100)
save_to_midi(generated_sequence, 'generated_music.midi')
print("Generated MIDI saved as 'generated_music.midi'.")

Generated MIDI saved as 'generated_music.midi'.


In [43]:
start_sequence = sequences[20]  # You can choose a different starting sequence
generated_sequence = generate_sequence(model, start_sequence, idx_to_pitch, seq_length=100)
save_to_midi(generated_sequence, 'generated_music.midi')
print("Generated MIDI saved as 'generated_music.midi'.")

Generated MIDI saved as 'generated_music.midi'.


In [45]:
model.summary()