#prerun

In [2]:
!pip install mido
!pip install midiutil

Collecting mido
  Downloading mido-1.3.2-py3-none-any.whl (54 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/54.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting packaging~=23.1 (from mido)
  Downloading packaging-23.2-py3-none-any.whl (53 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.0/53.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: packaging, mido
  Attempting uninstall: packaging
    Found existing installation: packaging 24.1
    Uninstalling packaging-24.1:
      Successfully uninstalled packaging-24.1
Successfully installed mido-1.3.2 packaging-23.2
Collecting midiutil
  Downloading MIDIUtil-1.2.1.tar.gz (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [6]:
import mido
import pandas as pd
import numpy as np
import os

from sklearn.preprocessing import MinMaxScaler
import joblib

#midi extraction

In [None]:
def midi_to_dataframe(file_path):
    mid = mido.MidiFile(file_path)
    data = []

    for i, track in enumerate(mid.tracks):
        absolute_time = 0
        for msg in track:
            absolute_time += msg.time
            if msg.type in ['note_on', 'note_off']:
                event = {
                    'note': msg.note,
                    'file_name': os.path.basename(file_path)
                }
                data.append(event)

    df = pd.DataFrame(data)
    return df

def process_midi_folder(folder_path):
    all_data = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.mid'):
            file_path = os.path.join(folder_path, file_name)
            df = midi_to_dataframe(file_path)
            all_data.append(df)

    combined_df = pd.concat(all_data, ignore_index=True)
    return combined_df


combined_df = process_midi_folder('midis')

combined_df.to_csv('midi.csv', index=False)


#creating our dataset

In [None]:
def generate_sequences(df, sequence_length):
    sequences = []
    predictions = []
    file_names = []

    grouped = df.groupby('file_name')

    for file_name, group in grouped:
        notes = group['note'].tolist()

        if len(notes) > sequence_length:
            for i in range(len(notes) - sequence_length):
                sequences.append(notes[i:i + sequence_length])
                predictions.append(notes[i + sequence_length])
                file_names.append(file_name)

    new_df = pd.DataFrame({'file_name': file_names, 'sequence': sequences, 'prediction': predictions})

    return new_df


combined_df = pd.read_csv("midi.csv")
sequence_length = 5
new_df = generate_sequences(combined_df, sequence_length)
new_df.to_csv("midi_dataset.csv")

#preprocessing

In [4]:
dataset = pd.read_csv("midi_dataset.csv")

In [44]:
def normalize_df(df, scaler_filename='scaler.pkl'):
    scaler = MinMaxScaler()

    sequences = df['sequence'].apply(lambda x: eval(x) if isinstance(x, str) else x)

    flattened_sequences = [item for sublist in sequences for item in sublist]
    predictions = df['prediction'].tolist()
    combined_data = np.array(flattened_sequences + predictions).reshape(-1, 1)

    scaler.fit(combined_data)
    normalized_combined_data = scaler.transform(combined_data).reshape(-1)

    joblib.dump(scaler, scaler_filename)

    normalized_sequences = []
    index = 0
    for seq in sequences:
        seq_length = len(seq)
        normalized_sequences.append(normalized_combined_data[index:index + seq_length].tolist())
        index += seq_length

    normalized_predictions = normalized_combined_data[index:index + len(predictions)]

    normalized_df = df.copy()
    normalized_df['sequence'] = normalized_sequences
    normalized_df['prediction'] = normalized_predictions

    return normalized_df

def denormalize_sequence(normalized_list, scaler_filename='scaler.pkl'):
    scaler = joblib.load(scaler_filename)

    normalized_array = np.array(normalized_list).reshape(-1, 1)

    denormalized_array = scaler.inverse_transform(normalized_array).reshape(-1)

    denormalized_array = np.round(denormalized_array).astype(int)

    return denormalized_array.tolist()


def normalize_sequence(sequence, scaler_filename='scaler.pkl'):
    scaler = joblib.load(scaler_filename)

    sequence_array = np.array(sequence).reshape(-1, 1)

    normalized_sequence = scaler.transform(sequence_array).reshape(-1)

    return normalized_sequence.tolist()

In [14]:
normalized_df = normalize_df(dataset)

#model

In [28]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [51]:
def preprocess_data(df):
    encoder = LabelEncoder()
    all_notes = df['sequence'].explode().tolist() + df['prediction'].tolist()
    encoder.fit(all_notes)

    df['encoded_sequence'] = df['sequence'].apply(lambda seq: encoder.transform(seq))
    df['encoded_prediction'] = encoder.transform(df['prediction'])

    sequences = np.array(df['encoded_sequence'].tolist())
    predictions = np.array(df['encoded_prediction'].tolist())

    num_unique_notes = len(encoder.classes_)
    sequences = to_categorical(sequences, num_classes=num_unique_notes)
    predictions = to_categorical(predictions, num_classes=num_unique_notes)

    return sequences, predictions, encoder

def create_model_demo(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(LSTM(128))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


def create_model_demo2(input_shape, num_classes):
    model = Sequential()

    model.add(LSTM(256, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.3))

    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(0.3))

    model.add(LSTM(128))

    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes, activation='softmax'))


    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Adding early stopping
    #early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    return model


def create_model(input_shape, num_classes):

    model = Sequential()
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(128))
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')


    return model

def predict_next_note(model, input_sequence, encoder):
    num_unique_notes = len(encoder.classes_)
    input_sequence = to_categorical(input_sequence, num_classes=num_unique_notes)
    input_sequence = np.expand_dims(input_sequence, axis=0)

    prediction = model.predict(input_sequence)
    predicted_note = encoder.inverse_transform([np.argmax(prediction)])

    note = denormalize_sequence(predicted_note[0])

    return note[0]

def predict_next_note1(model, input_sequence, encoder):
    num_unique_notes = len(encoder.classes_)
    input_sequence_encoded = encoder.transform(input_sequence)
    input_sequence_encoded = np.array(input_sequence_encoded)
    input_sequence_encoded = input_sequence_encoded.reshape(1, -1)
    input_sequence_encoded = to_categorical(input_sequence_encoded, num_classes=num_unique_notes)


    prediction = model.predict(input_sequence_encoded)
    next_note_index = np.argmax(prediction)

    next_note = encoder.inverse_transform([next_note_index])[0]



    note = denormalize_sequence([next_note])


    return note[0]



def predict_next_note2(model, input_sequence, encoder):
    num_unique_notes = len(encoder.classes_)
    input_sequence_encoded = encoder.transform(input_sequence)
    input_sequence_encoded = np.array(input_sequence_encoded)
    input_sequence_encoded = input_sequence_encoded.reshape(1, -1)
    input_sequence_encoded = to_categorical(input_sequence_encoded, num_classes=num_unique_notes)


    prediction = model.predict(input_sequence_encoded)
    next_note_index = np.argmax(prediction)

    next_note = encoder.inverse_transform([next_note_index])[0]

    return next_note

def generate_notes(model, initial_sequence, num_notes,encoder):

  initial_sequence = normalize_sequence(initial_sequence)
  notes = initial_sequence

  while len(notes) < num_notes  :

    new_note = predict_next_note2(model, notes[-5:], encoder)
    notes.append(new_note)

  return denormalize_sequence(notes)


In [31]:

sequences, predictions, encoder = preprocess_data(normalized_df)
input_shape = (sequences.shape[1], sequences.shape[2])
num_classes = predictions.shape[1]

model = create_model(input_shape, num_classes)
model.fit(sequences, predictions, epochs=135, batch_size=64)
model.save('note_prediction_model.h5')


Epoch 1/135
Epoch 2/135
Epoch 3/135
Epoch 4/135
Epoch 5/135
Epoch 6/135
Epoch 7/135
Epoch 8/135
Epoch 9/135
Epoch 10/135
Epoch 11/135
Epoch 12/135
Epoch 13/135
Epoch 14/135
Epoch 15/135
Epoch 16/135
Epoch 17/135
Epoch 18/135
Epoch 19/135
Epoch 20/135
Epoch 21/135
Epoch 22/135
Epoch 23/135
Epoch 24/135
Epoch 25/135
Epoch 26/135
Epoch 27/135
Epoch 28/135
Epoch 29/135
Epoch 30/135
Epoch 31/135
Epoch 32/135
Epoch 33/135
Epoch 34/135
Epoch 35/135
Epoch 36/135
Epoch 37/135
Epoch 38/135
Epoch 39/135
Epoch 40/135
Epoch 41/135
Epoch 42/135
Epoch 43/135
Epoch 44/135
Epoch 45/135
Epoch 46/135
Epoch 47/135
Epoch 48/135
Epoch 49/135
Epoch 50/135
Epoch 51/135
Epoch 52/135
Epoch 53/135
Epoch 54/135
Epoch 55/135
Epoch 56/135
Epoch 57/135
Epoch 58/135
Epoch 59/135
Epoch 60/135
Epoch 61/135
Epoch 62/135
Epoch 63/135
Epoch 64/135
Epoch 65/135
Epoch 66/135
Epoch 67/135
Epoch 68/135
Epoch 69/135
Epoch 70/135
Epoch 71/135
Epoch 72/135
Epoch 73/135
Epoch 74/135
Epoch 75/135
Epoch 76/135
Epoch 77/135
Epoch 78

  saving_api.save_model(


#usage

In [52]:
loaded_model = load_model('note_prediction_model.h5')
input_sequence = [46, 58, 62, 65, 69]
input_sequence = normalize_sequence(input_sequence)
predicted_note = predict_next_note1(loaded_model, input_sequence, encoder)
print(f"The predicted next note is: {predicted_note}")


The predicted next note is: 46


In [55]:
initial_sequence = [75, 77, 77, 80, 46]
num_notes_needed = 50


generated_notes = generate_notes(loaded_model, initial_sequence, num_notes_needed,encoder)
print(f"Generated sequence: {generated_notes}")

Generated sequence: [75, 77, 77, 80, 46, 80, 77, 49, 77, 49, 70, 73, 49, 73, 70, 80, 49, 80, 77, 51, 77, 75, 75, 77, 77, 75, 51, 75, 73, 51, 73, 72, 51, 72, 72, 56, 72, 68, 68, 70, 70, 72, 56, 72, 73, 56, 73, 75, 56, 75]


#convert notes to midi

In [54]:
from midiutil.MidiFile import MIDIFile

In [56]:
def notes_to_midi(note_numbers, output_file='output.mid', tempo=120):

    midi = MIDIFile(1)


    track = 0
    midi.addTrackName(track, 0, "Sample Track")
    midi.addTempo(track, 0, tempo)


    time = 0
    for note_number in note_numbers:

        midi.addNote(track, 0, note_number, time, 1, 100)
        time += 1

    with open(output_file, 'wb') as f:
        midi.writeFile(f)

    print(f"MIDI file generated: {output_file}")


In [57]:

notes_to_midi(generated_notes, output_file='output.mid')


MIDI file generated: output.mid


#to save notebook

In [None]:
!pip install nbconvert
!apt-get install texlive texlive-xetex texlive-latex-extra pandoc

from google.colab import drive
drive.mount("/content/drive")


!jupyter nbconvert --to pdf "/content/drive/MyDrive/Colab Notebooks/ml_midi_maker.ipynb"
