<a href="https://colab.research.google.com/github/Pasindu-Madhuwantha/special_carnival/blob/main/MIDI_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#MIDI File Analysis


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
from music21 import converter, note, chord
import json

def analyze_midi_files_in_folder(folder_path):
    analyzed_results = []

    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".mid"):
                midi_file_path = os.path.join(root, file)
                print(f"Analyzing MIDI file: {midi_file_path}")
                analysis_result = analyze_midi_file(midi_file_path)
                analyzed_results.append(analysis_result)

    return analyzed_results

def analyze_midi_file(midi_file):
    midi = converter.parse(midi_file)

    notes = []
    chords = []
    tempo = None
    key = None
    mode = None
    time_signature = None

    for part in midi.parts:
        for element in part.flat.notesAndRests:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                chord_notes = '.'.join(str(n) for n in element.normalOrder)
                chords.append(chord_notes)

        if not tempo:
            try:
                tempo = part.getElementsByClass('MetronomeMark')[0].getQuarterBPM()
            except IndexError:
                pass

        if not key:
            try:
                key = part.analyze("key").tonic.name
                mode = part.analyze("key").mode
            except AttributeError:
                pass

        if not time_signature:
            try:
                time_signature = part.getTimeSignatures()[0].ratioString
            except IndexError:
                pass

    return {
        "notes": notes,
        "chords": chords,
        "tempo": tempo,
        "key": key,
        "mode": mode,
        "time_signature": time_signature,
        # You can add more features here
    }

def main():
    folder_path = "/content/drive/MyDrive/midi_test"
    analyzed_results = analyze_midi_files_in_folder(folder_path)

    for i, result in enumerate(analyzed_results):
        print(f"Analysis for MIDI file {i + 1}:")
        print("Notes:", result["notes"])
        print("Chords:", result["chords"])
        print("Tempo:", result["tempo"])
        print("Key:", result["key"])
        print("Mode:", result["mode"])
        print("Time Signature:", result["time_signature"])
        print()

    # Save the analyzed_results to a JSON file
    with open('analyzed_results.json', 'w') as f:
        json.dump(analyzed_results, f, indent=4)  # Indent for better readability


    print("Analysis complete!")

if __name__ == '__main__':
    main()





Analyzing MIDI file: /content/drive/MyDrive/midi_test/a/a-day-to-remember/downfall-of-us-all/pre-chorus-and-chorus_symbol_key.mid


  return self.iter().getElementsByClass(classFilterList)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Notes: ['G4', 'F#4', 'E4', 'F#4', 'E4', 'D4', 'E4', 'E4', 'E4', 'B3', 'G4', 'F#4', 'E4', 'F#4', 'E4', 'D4', 'E4', 'E4', 'E4', 'G4', 'F#4', 'F#4', 'E4', 'E4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'D4', 'B4', 'A4', 'A4', 'A4', 'B4', 'G4', 'F#4', 'F#4', 'F#4', 'E4', 'D4', 'G4', 'G4', 'G4', 'G4', 'F#4', 'F#4', 'F#4', 'E4', 'E4', 'E4', 'E4', 'B4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'B4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'A4', 'B4']
Chords: []
Tempo: None
Key: D
Mode: major
Time Signature: 4/4

Analysis for MIDI file 641:
Notes: []
Chords: []
Tempo: None
Key: None
Mode: None
Time Signature: None

Analysis for MIDI file 642:
Notes: []
Chords: []
Tempo: None
Key: None
Mode: None
Time Signature: None

Analysis for MIDI file 643:
Notes: ['G4', 'C5', 'E-5', 'D5', 'D5', 'C5', 'E-5', 'C5', 'D5', 'C5', 'G#4', 'B-4', 'G4', 'G4'

#Data Preprocessing

In [None]:
import numpy as np
import json

# Load the analyzed_results from the JSON file
with open('analyzed_results.json', 'r') as f:
    analyzed_results = json.load(f)

# Define your note_to_int and chord_to_int mappings
note_to_int = {}
chord_to_int = {}

# Helper function to update mappings
def update_mappings(data, mapping_dict):
    for entry in data:
        for item in entry:
            if item not in mapping_dict:
                mapping_dict[item] = len(mapping_dict)

# Build mappings from analyzed results
update_mappings((entry["notes"] for entry in analyzed_results), note_to_int)
update_mappings((entry["chords"] for entry in analyzed_results), chord_to_int)

# Define the maximum tempo value for normalization
max_tempo_value = 240

# Print the analysis results
for i, result in enumerate(analyzed_results):
    print(f"Analysis for MIDI file {i + 1}:")
    print("Notes:", result["notes"])
    print("Chords:", result["chords"])
    #print("Tempo:", result["tempo"])
    print("Key:", result["key"])
    print("Mode:", result["mode"])
    print("Time Signature:", result["time_signature"])
    print()

# Rest of your preprocess_features function
def preprocess_features(extracted_features, note_to_int, chord_to_int):
    preprocessed_data = []

    for song in analyzed_results:
      notes = song["notes"]
      chords = song["chords"]
     # tempo = song["tempo"]
      key = song["key"]
      mode = song["mode"]
      time_signature = song["time_signature"]

      notes_int = [note_to_int[note] for note in notes]
      chords_int = [chord_to_int[chord] for chord in chords]

      print("Notes:", notes_int)
      print("Chords:", chords_int)
     # print("Tempo:", tempo)
      print("Key:", key)
      print("Mode:", mode)
      print("Time Signature:", time_signature)
      print()


    for song in extracted_features:
        notes = song["notes"]
        chords = song["chords"]
       # tempo = song["tempo"]
        key = song["key"]
        mode = song["mode"]
        time_signature = song["time_signature"]



        # Skip songs with missing tempo
     #   if tempo is None:
       #     continue

        # Convert notes and chords to integers
        notes_int = [note_to_int[note] for note in notes]
        chords_int = [chord_to_int[chord] for chord in chords]

        # Normalize features
      #  tempo = tempo / max_tempo_value  # Normalize by maximum tempo value

        # Create input-output pairs (sequences)
        sequence_length = 100  # Adjust this as needed
        for i in range(0, len(notes_int) - sequence_length):
            input_sequence = notes_int[i:i + sequence_length]
            output_sequence = notes_int[i + sequence_length]
            preprocessed_data.append({
                "input_sequence": input_sequence,
                "output_sequence": output_sequence,
             #   "tempo": tempo,
                "key": key,
                "mode": mode,
                "time_signature": time_signature,
            })

    return preprocessed_data

# Call preprocess_features with your analyzed_results, note_to_int, chord_to_int, and max_tempo_value
preprocessed_data = preprocess_features(analyzed_results, note_to_int, chord_to_int)

# Save the preprocessed_data to a JSON file
with open('preprocessed_data.json', 'w') as f:
    json.dump(preprocessed_data, f, indent=4)  # Indent for better readability

# Now you have the preprocessed data ready for your model


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Time Signature: None

Notes: [15, 20, 22, 20, 22, 22, 34, 34, 34, 15, 20, 20, 15, 22, 21, 22, 25, 22, 22, 22, 22, 24, 25, 15, 15, 20, 22, 24, 15, 15, 20, 22, 21, 34, 34, 34, 15, 20, 20, 15, 21, 21, 20, 21, 22, 24, 24, 22, 22, 22, 24, 24, 25, 25, 11, 10, 14, 14, 9, 13, 20, 28, 10, 12, 14, 28, 10, 28, 18, 10]
Chords: []
Key: F#
Mode: major
Time Signature: 4/4

Notes: [12, 12, 4, 4, 12, 13, 3, 3, 15, 12, 4, 4, 12, 14, 12, 12, 14, 14, 3, 13, 15, 15, 3, 3, 12, 4, 4, 12, 13, 3, 3, 15, 12, 4, 4, 12, 14, 12, 12, 14, 14, 3, 13, 15, 15, 3, 3, 28, 10, 28, 10]
Chords: []
Key: C#
Mode: minor
Time Signature: 4/4

Notes: []
Chords: []
Key: None
Mode: None
Time Signature: None

Notes: [9, 14, 9, 14, 14, 15]
Chords: []
Key: C#
Mode: major
Time Signature: 4/4

Notes: [10, 4, 10, 19, 19, 10, 19, 4, 2, 9, 2, 9, 18, 19, 18, 11, 11, 18, 11, 18, 16, 19, 18, 19]
Chords: []
Key: C
Mode: minor
Time Signature: 4/4

Notes: [29, 16, 10, 14, 10, 10, 1

#Model Adaptation

In [None]:
import numpy as np
import json
import pickle
import os
from keras.models import Sequential, load_model, Model
from keras.layers import Input, Dense, Dropout, LSTM, Activation, BatchNormalization, Bidirectional
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from tensorflow.keras.layers import LeakyReLU
from keras.regularizers import l2

def train_network():
    """ Train a Neural Network to generate music """
    notes, chords = get_notes_and_chords()

    unique_notes, unique_chords = extract_unique_notes_and_chords(preprocessed_data)

    n_vocab_notes = len(unique_notes)
    n_vocab_chords = len(unique_chords)

    network_input, network_output = prepare_sequences(notes, chords, unique_notes, unique_chords)

    model = create_network(network_input, n_vocab_notes + n_vocab_chords)

    train(model, network_input, network_output, n_vocab_notes + n_vocab_chords)

def get_notes_and_chords():
    """ Get the first half of the notes and chords from the preprocessed data """
    notes = []
    chords = []

    # Modify here to select only the first half of the data
    num_entries_to_use = len(preprocessed_data) // 2
    selected_data = preprocessed_data[:num_entries_to_use]

    for entry in selected_data:
        notes.extend(entry["input_sequence"])
        notes.append(entry["output_sequence"])

        if "chords" in entry:
            chords.extend(entry["chords"])

    return notes, chords


def extract_unique_notes_and_chords(preprocessed_data):
    unique_notes = set()
    unique_chords = set()

    for entry in preprocessed_data:
        unique_notes.update(entry["input_sequence"])
        unique_notes.add(entry["output_sequence"])

        if "chords" in entry:
            unique_chords.update(entry["chords"])

    return sorted(unique_notes), sorted(unique_chords)

def prepare_sequences(notes, chords, unique_notes, unique_chords):
    """ Prepare the sequences used by the Neural Network """
    sequence_length = 100

    note_to_int = dict((note, number) for number, note in enumerate(unique_notes))
    chord_to_int = dict((chord, number + len(unique_notes)) for number, chord in enumerate(unique_chords))

    network_input = []
    network_output = []

    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])

    n_patterns = len(network_input)
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    network_input = network_input / float(len(unique_notes) + len(unique_chords))

    network_output = to_categorical(network_output, num_classes=len(unique_notes) + len(unique_chords))

    return network_input, network_output

def create_network(network_input, n_vocab):
    inputs = Input(shape=(network_input.shape[1], network_input.shape[2]))
    x = Bidirectional(LSTM(512, recurrent_dropout=0.3, return_sequences=True))(inputs)
    x = Bidirectional(LSTM(512, recurrent_dropout=0.3, return_sequences=True))(x)
    x = Bidirectional(LSTM(512, recurrent_dropout=0.3, return_sequences=True))(x)
    x = Bidirectional(LSTM(512, recurrent_dropout=0.3, return_sequences=True))(x)
    x = Bidirectional(LSTM(512))(x)
    x = Dropout(0.2)(x)
    x = Dense(512, kernel_regularizer=l2(0.01))(x)
    x = LeakyReLU(alpha=0.1)(x)
    x = Dropout(0.2)(x)
    outputs = Dense(n_vocab, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    return model

def train(model, network_input, network_output, n_vocab):
    # Define the file paths for saving and loading weights
    save_filepath = "/content/drive/MyDrive/weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
    load_filepath = "/content/drive/MyDrive/weights-improvement-01-1.8387-bigger.hdf5"

    # Create the checkpoint and early stopping callbacks
    checkpoint = ModelCheckpoint(save_filepath, monitor='loss', verbose=0, save_best_only=True, mode='min')
    early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

    # Define a learning rate schedule function
    def learning_rate_scheduler(epoch, lr):
        if epoch < 10:
            return lr  # Use the initial learning rate for the first 10 epochs
        else:
            return lr * 0.95  # Reduce the learning rate by 5% after 10 epochs

    # Add the learning rate scheduler to the callbacks
    lr_scheduler = LearningRateScheduler(learning_rate_scheduler)

    callbacks_list = [checkpoint, early_stopping, lr_scheduler]

    # Initialize the last_epoch variable
    last_epoch = 0

    # Load existing model weights if available
    if os.path.exists(load_filepath):
        last_epoch = get_last_epoch(load_filepath)  # Get the last epoch from the filename
        print("Loaded weights from an existing checkpoint at epoch:", last_epoch)
        model.load_weights(load_filepath)

    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    # Continue training
    model.fit(network_input, network_output, initial_epoch=last_epoch, epochs=10, batch_size=128, callbacks=callbacks_list)

def get_last_epoch(filepath):
    # Extract the last epoch from the checkpoint filename
    filename = os.path.basename(filepath)
    parts = filename.split("-")
    epoch_str = parts[2]
    epoch_str = epoch_str.zfill(2)  # Ensure that the epoch number is zero-padded (e.g., 01 instead of 1)
    return int(epoch_str)  # Convert the zero-padded epoch number to an integer

if __name__ == '__main__':
    # Load preprocessed data
    with open('/content/drive/MyDrive/preprocessed_data.json', 'r') as f:
        preprocessed_data = json.load(f)

    train_network()




Loaded weights from an existing checkpoint at epoch: 1
Epoch 2/10

  saving_api.save_model(


Epoch 3/10
  75/1628 [>.............................] - ETA: 1:16:43 - loss: 0.6662

#Prediction

In [None]:
import numpy as np
from keras.models import load_model
from music21 import converter, stream, note, chord, instrument
from keras.utils import to_categorical

# Load your trained model
model = load_model("/content/drive/MyDrive/weights-improvement-10-0.2486-bigger.hdf5")

# Step 1: Extract Notes and Chords from User-Input MIDI
user_input_midi_file = "/content/drive/MyDrive/midi/n/naruto/sadness-and-sorrow/instrumental_symbol_key.mid"  # Replace with the user's input MIDI file
midi_stream = converter.parse(user_input_midi_file)

# Extract notes and chords from the MIDI stream
notes = []  # Store extracted notes and chords here

for element in midi_stream.flat:
    if isinstance(element, note.Note):
        notes.append(str(element.pitch))
    elif isinstance(element, chord.Chord):
        notes.append('.'.join(str(n) for n in element.normalOrder))

# Step 2: Generate Lo-fi Music based on Extracted Notes
def generate_lofi_music(model, input_notes, sequence_length=100, generated_length=1000):
    # Map unique notes and chords to integers
    unique_notes = sorted(set(input_notes))
    note_to_int = dict((note, number) for number, note in enumerate(unique_notes))

    # Create reverse mapping for integer to note
    int_to_note = dict((number, note) for number, note in enumerate(unique_notes))

    # Generate lo-fi music sequence
    start_index = np.random.randint(0, len(input_notes) - sequence_length - 1)
    generated_notes = input_notes[start_index:start_index + sequence_length]

    for i in range(generated_length):
        input_sequence = [note_to_int[note] for note in generated_notes]
        input_sequence = np.array(input_sequence) / float(len(unique_notes))
        input_sequence = np.reshape(input_sequence, (1, sequence_length, 1))

        # Predict the next note
        predicted_note = model.predict(input_sequence, verbose=0)

        # Ensure the predicted integer is within valid range
        predicted_index = np.argmax(predicted_note)
        if predicted_index < 0 or predicted_index >= len(int_to_note):
            continue  # Skip this prediction if it's out of range

        # Convert the predicted note to a string note/chord
        next_note = int_to_note[predicted_index]

        # Append the next note to the generated sequence
        generated_notes.append(next_note)

        # Remove the first note to keep the sequence length constant
        generated_notes = generated_notes[1:]

    return generated_notes

# Generate lo-fi music based on the extracted notes
generated_lofi_notes = generate_lofi_music(model, notes, sequence_length=100, generated_length=1000)

# Step 3: Save Generated Lo-fi Music as MIDI
generated_lofi_stream = stream.Stream()

for element in generated_lofi_notes:
    if '.' in element:
        notes_in_chord = element.split('.')
        chord_notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
            chord_notes.append(new_note)
        new_chord = chord.Chord(chord_notes)
        generated_lofi_stream.append(new_chord)
    else:
        new_note = note.Note(element)
        new_note.storedInstrument = instrument.Piano()
        generated_lofi_stream.append(new_note)

generated_lofi_midi_filename = "/content/drive/MyDrive/generated_lofi_music.mid"
generated_lofi_stream.write("midi", fp=generated_lofi_midi_filename)

print(f"Generated lo-fi music saved as {generated_lofi_midi_filename}")




Generated lo-fi music saved as /content/drive/MyDrive/generated_lofi_music.mid


In [None]:
from difflib import SequenceMatcher

# Get content of each midi file
with open("/content/drive/MyDrive/midi/n/naruto/sadness-and-sorrow/instrumental_symbol_key.mid", 'rb') as f:
    content1 = f.read()

with open("/content/drive/MyDrive/generated_lofi_music.mid", 'rb') as f2:
    content2 = f2.read()

# Compare them with SequenceMatcher
m = SequenceMatcher(None, content1, content2)
print(m.ratio())  # Your percentage


0.060836501901140684
