In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Install pretty_midi
# !pip install pretty_midi

import pretty_midi

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Data Pre-processing
def preprocess_data(midi_files):
    midi_data = []
    for file in midi_files:
        try:
            midi = pretty_midi.PrettyMIDI(file)
            midi_data.append(midi)
        except Exception as e:
            print(f"Skipping file {file} due to error: {e}")
    # Apply data augmentation techniques if necessary
    return midi_data

# Feature Extraction
def extract_features(midi_data, composer_label):
    features = []
    labels = []
    for midi in midi_data:
        # Extract features such as notes, chords, tempo
        notes = midi.instruments[0].notes
        pitch_sequence = [note.pitch for note in notes]  # Example feature: pitch of notes
        features.append(pitch_sequence)
        labels.append(composer_label)
    return features, labels

# Model Building
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
    model.add(LSTM(128))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Model Training
def train_model(model, X_train, y_train, X_val, y_val):
    history = model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val))
    return history

# Model Evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    accuracy = accuracy_score(y_true, y_pred_classes)
    precision = precision_score(y_true, y_pred_classes, average='macro')
    recall = recall_score(y_true, y_pred_classes, average='macro')
    return accuracy, precision, recall

# Function to get MIDI file paths from a directory
def get_midi_files_from_directory(directory_path):
    midi_files = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.mid') or file.endswith('.midi'):
                midi_files.append(os.path.join(root, file))
    return midi_files

# Main
if __name__ == "__main__":
    # Base directory in Google Drive
    base_directory = '/content/drive/My Drive/Colab Notebooks/midiclassics'

    # Specify the subdirectories containing MIDI files
    composers = {
        'Bach': 0,
        'Beethoven': 1,
        'Chopin': 2,
        'Mozart': 3
    }

    midi_files = []
    labels = []

    # Get MIDI files from directories and assign labels
    for composer, label in composers.items():
        composer_directory = os.path.join(base_directory, composer)
        composer_files = get_midi_files_from_directory(composer_directory)
        composer_features, composer_labels = extract_features(preprocess_data(composer_files), label)
        midi_files.extend(composer_features)
        labels.extend(composer_labels)

    # Pad sequences to ensure they have the same length
    max_sequence_length = 1000  # You can adjust this value based on your data
    midi_files_padded = pad_sequences(midi_files, maxlen=max_sequence_length, padding='post', truncating='post')

    # Convert lists to numpy arrays
    midi_files = np.array(midi_files_padded)
    labels = np.array(labels)

    # Reshape data to fit model input requirements
    X = midi_files.reshape(midi_files.shape[0], midi_files.shape[1], 1)
    y = to_categorical(labels, num_classes=4)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Build and train models
    lstm_model = build_lstm_model((X_train.shape[1], 1))
    cnn_model = build_cnn_model((X_train.shape[1], 1))
    lstm_history = train_model(lstm_model, X_train, y_train, X_test, y_test)
    cnn_history = train_model(cnn_model, X_train, y_train, X_test, y_test)

    # Evaluate models
    lstm_accuracy, lstm_precision, lstm_recall = evaluate_model(lstm_model, X_test, y_test)
    cnn_accuracy, cnn_precision, cnn_recall = evaluate_model(cnn_model, X_test, y_test)

    # Print evaluation results
    print(f"LSTM Model - Accuracy: {lstm_accuracy}, Precision: {lstm_precision}, Recall: {lstm_recall}")
    print(f"CNN Model - Accuracy: {cnn_accuracy}, Precision: {cnn_precision}, Recall: {cnn_recall}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Beethoven/Anhang 14-3.mid due to error: Could not decode key with 3 flats and mode 255
Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Mozart/Piano Sonatas/Nueva carpeta/K281 Piano Sonata n03 3mov.mid due to error: Could not decode key with 2 flats and mode 2
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 

In [None]:
import os
import numpy as np
import pandas as pd
import pretty_midi
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Custom wrapper
class KerasClassifierCustom:
    def __init__(self, build_fn=None, **sk_params):
        self.build_fn = build_fn
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **fit_params):
        self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
        return self.model.fit(X, y, **fit_params)

    def predict(self, X):
        return self.model.predict(X)

    def score(self, X, y):
        y_pred = self.predict(X)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = np.argmax(y, axis=1)
        return accuracy_score(y_true, y_pred_classes)

    def get_params(self, deep=True):
        params = self.sk_params.copy()
        params['build_fn'] = self.build_fn
        return params

    def set_params(self, **params):
        for key, value in params.items():
            if key == "build_fn":
                self.build_fn = value
            else:
                self.sk_params[key] = value
        return self

    def filter_sk_params(self, fn):
        res = {}
        fn_params = fn.__code__.co_varnames[:fn.__code__.co_argcount]
        for name, value in self.sk_params.items():
            if name in fn_params:
                res[name] = value
        return res

# Model Building
def create_lstm_model(optimizer='adam', units=128, dropout_rate=0.2):
    model = Sequential()
    model.add(LSTM(units, input_shape=(max_sequence_length, 1), return_sequences=True))
    model.add(LSTM(units))
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_cnn_model(optimizer='adam', filters=64, kernel_size=3, dropout_rate=0.2):
    model = Sequential()
    model.add(Conv1D(filters, kernel_size=kernel_size, activation='relu', input_shape=(max_sequence_length, 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to get MIDI file paths from a directory
def get_midi_files_from_directory(directory_path):
    midi_files = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.mid') or file.endswith('.midi'):
                midi_files.append(os.path.join(root, file))
    return midi_files

# Data Pre-processing
def preprocess_data(midi_files):
    midi_data = []
    for file in midi_files:
        try:
            midi = pretty_midi.PrettyMIDI(file)
            midi_data.append(midi)
        except Exception as e:
            print(f"Skipping file {file} due to error: {e}")
    return midi_data

# Data Augmentation
def augment_data(sequence):
    shift = np.random.randint(-5, 6)
    return np.clip(np.array(sequence) + shift, 0, 127)

# Feature Extraction
def extract_features(midi_data, composer_label):
    features = []
    labels = []
    for midi in midi_data:
        if len(midi.instruments) > 0:
            notes = midi.instruments[0].notes
            pitch_sequence = [note.pitch for note in notes]
            features.append(pitch_sequence)
            labels.append(composer_label)
    return features, labels

# Model evaluation function
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    accuracy = accuracy_score(y_true, y_pred_classes)
    precision = precision_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    return accuracy, precision, recall

# Main
if __name__ == "__main__":
    # Base directory in Google Drive
    base_directory = '/content/drive/My Drive/Colab Notebooks/midiclassics'

    # Specify the subdirectories containing MIDI files
    composers = {
        'Bach': 0,
        'Beethoven': 1,
        'Chopin': 2,
        'Mozart': 3
    }

    midi_files = []
    labels = []

    # Get MIDI files from directories and assign labels
    for composer, label in composers.items():
        composer_directory = os.path.join(base_directory, composer)
        composer_files = get_midi_files_from_directory(composer_directory)
        if not composer_files:
            print(f"No MIDI files found for {composer}")
        composer_features, composer_labels = extract_features(preprocess_data(composer_files), label)
        if not composer_features:
            print(f"No features extracted for {composer}")
        augmented_features = [augment_data(seq) for seq in composer_features]
        midi_files.extend(composer_features)
        midi_files.extend(augmented_features)
        labels.extend(composer_labels)
        labels.extend(composer_labels)  # Augmented data has the same labels

    # Pad sequences to ensure they have the same length
    max_sequence_length = 1000  # You can adjust this value based on your data
    midi_files_padded = pad_sequences(midi_files, maxlen=max_sequence_length, padding='post', truncating='post')

    # Convert lists to numpy arrays
    midi_files = np.array(midi_files_padded)
    labels = np.array(labels)

    # Check if dataset is empty
    if midi_files.shape[0] == 0:
        raise ValueError("No data available after preprocessing. Check your MIDI files and preprocessing steps.")

    # Reshape data to fit model input requirements
    X = midi_files.reshape(midi_files.shape[0], midi_files.shape[1], 1)
    y = to_categorical(labels, num_classes=4)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Example usage with LSTM model
    lstm_model_custom = KerasClassifierCustom(build_fn=create_lstm_model, epochs=10, batch_size=32, verbose=0)
    cnn_model_custom = KerasClassifierCustom(build_fn=create_cnn_model, epochs=10, batch_size=32, verbose=0)

    # Hyperparameter tuning for LSTM model
    lstm_param_grid = {
        'epochs': [50],
        'batch_size': [32],
        'optimizer': ['rmsprop'],
        'units': [64],
        'dropout_rate': [0.2]
    }
    lstm_grid = GridSearchCV(estimator=lstm_model_custom, param_grid=lstm_param_grid, n_jobs=-1, cv=3, error_score='raise')
    lstm_grid_result = lstm_grid.fit(X_train, y_train)

    # Hyperparameter tuning for CNN model
    cnn_param_grid = {
        'epochs': [100],
        'batch_size': [32],
        'optimizer': ['adam'],
        'filters': [32],
        'kernel_size': [3],
        'dropout_rate': [0.5]
    }
    cnn_grid = GridSearchCV(estimator=cnn_model_custom, param_grid=cnn_param_grid, n_jobs=-1, cv=3, error_score='raise')
    cnn_grid_result = cnn_grid.fit(X_train, y_train)

    # Print the best hyperparameters for both models
    print(f"Best LSTM Model: {lstm_grid_result.best_score_} using {lstm_grid_result.best_params_}")
    print(f"Best CNN Model: {cnn_grid_result.best_score_} using {cnn_grid_result.best_params_}")

    # Evaluate the best LSTM model
    best_lstm_model = lstm_grid_result.best_estimator_.model
    lstm_accuracy, lstm_precision, lstm_recall = evaluate_model(best_lstm_model, X_test, y_test)

    # Evaluate the best CNN model
    best_cnn_model = cnn_grid_result.best_estimator_.model
    cnn_accuracy, cnn_precision, cnn_recall = evaluate_model(best_cnn_model, X_test, y_test)

    # Print evaluation results
    print(f"LSTM Model - Accuracy: {lstm_accuracy}, Precision: {lstm_precision}, Recall: {lstm_recall}")
    print(f"CNN Model - Accuracy: {cnn_accuracy}, Precision: {cnn_precision}, Recall: {cnn_recall}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Beethoven/Anhang 14-3.mid due to error: Could not decode key with 3 flats and mode 255
Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Mozart/Piano Sonatas/Nueva carpeta/K281 Piano Sonata n03 3mov.mid due to error: Could not decode key with 2 flats and mode 2
Best LSTM Model: 0.6223356094521738 using {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 50, 'optimizer': 'rmsprop', 'units': 64}
Best CNN Model: 0.5716289072117293 using {'batch_size': 32, 'dropout_rate': 0.5, 'epochs': 100, 'filters': 32, 'kernel_size': 3, 'optimizer': 'adam'}
LSTM Model - Accuracy: 0.6225490196078431, Precision: 0.510877866238998, Recall: 0.6225490196078431
CNN Model - Accuracy: 0.49836601307189543, Precision: 0.6196943998305922, Recall: 0.49836601307189543


In [None]:
import os
import numpy as np
import pandas as pd
import pretty_midi
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Custom wrapper
class KerasClassifierCustom:
    def __init__(self, build_fn=None, **sk_params):
        self.build_fn = build_fn
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **fit_params):
        self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
        return self.model.fit(X, y, **fit_params)

    def predict(self, X):
        return self.model.predict(X)

    def score(self, X, y):
        y_pred = self.predict(X)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = np.argmax(y, axis=1)
        return accuracy_score(y_true, y_pred_classes)

    def get_params(self, deep=True):
        params = self.sk_params.copy()
        params['build_fn'] = self.build_fn
        return params

    def set_params(self, **params):
        for key, value in params.items():
            if key == "build_fn":
                self.build_fn = value
            else:
                self.sk_params[key] = value
        return self

    def filter_sk_params(self, fn):
        res = {}
        fn_params = fn.__code__.co_varnames[:fn.__code__.co_argcount]
        for name, value in self.sk_params.items():
            if name in fn_params:
                res[name] = value
        return res

# Model Building
def create_lstm_model(optimizer='adam', units=128, dropout_rate=0.2):
    model = Sequential()
    model.add(LSTM(units, input_shape=(max_sequence_length, 1), return_sequences=True))
    model.add(LSTM(units))
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_cnn_model(optimizer='adam', filters=64, kernel_size=3, dropout_rate=0.2):
    model = Sequential()
    model.add(Conv1D(filters, kernel_size=kernel_size, activation='relu', input_shape=(max_sequence_length, 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to get MIDI file paths from a directory
def get_midi_files_from_directory(directory_path):
    midi_files = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.mid') or file.endswith('.midi'):
                midi_files.append(os.path.join(root, file))
    return midi_files

# Data Pre-processing
def preprocess_data(midi_files):
    midi_data = []
    for file in midi_files:
        try:
            midi = pretty_midi.PrettyMIDI(file)
            midi_data.append(midi)
        except Exception as e:
            print(f"Skipping file {file} due to error: {e}")
    return midi_data

# Improved Data Augmentation
def augment_data(sequence):
    shift = np.random.randint(-5, 6)
    stretched_sequence = np.interp(np.linspace(0, len(sequence), len(sequence) * 2), np.arange(len(sequence)), sequence)
    return np.clip(np.array(sequence) + shift, 0, 127), np.clip(stretched_sequence, 0, 127)

# Feature Extraction
def extract_features(midi_data, composer_label):
    features = []
    labels = []
    for midi in midi_data:
        if len(midi.instruments) > 0:
            notes = midi.instruments[0].notes
            pitch_sequence = [note.pitch for note in notes]
            features.append(pitch_sequence)
            labels.append(composer_label)
    return features, labels

# Model evaluation function
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    accuracy = accuracy_score(y_true, y_pred_classes)
    precision = precision_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    return accuracy, precision, recall

# Main
if __name__ == "__main__":
    # Base directory in Google Drive
    base_directory = '/content/drive/My Drive/Colab Notebooks/midiclassics'

    # Specify the subdirectories containing MIDI files
    composers = {
        'Bach': 0,
        'Beethoven': 1,
        'Chopin': 2,
        'Mozart': 3
    }

    midi_files = []
    labels = []

    # Get MIDI files from directories and assign labels
    for composer, label in composers.items():
        composer_directory = os.path.join(base_directory, composer)
        composer_files = get_midi_files_from_directory(composer_directory)
        if not composer_files:
            print(f"No MIDI files found for {composer}")
        composer_features, composer_labels = extract_features(preprocess_data(composer_files), label)
        if not composer_features:
            print(f"No features extracted for {composer}")
        for seq in composer_features:
            augmented_seq, stretched_seq = augment_data(seq)
            midi_files.append(seq)
            midi_files.append(augmented_seq)
            midi_files.append(stretched_seq)
            labels.extend([label] * 3)

    # Pad sequences to ensure they have the same length
    max_sequence_length = 1000  # You can adjust this value based on your data
    midi_files_padded = pad_sequences(midi_files, maxlen=max_sequence_length, padding='post', truncating='post')

    # Convert lists to numpy arrays
    midi_files = np.array(midi_files_padded)
    labels = np.array(labels)

    # Check if dataset is empty
    if midi_files.shape[0] == 0:
        raise ValueError("No data available after preprocessing. Check your MIDI files and preprocessing steps.")

    # Reshape data to fit model input requirements
    X = midi_files.reshape(midi_files.shape[0], midi_files.shape[1], 1)
    y = to_categorical(labels, num_classes=4)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Example usage with LSTM model
    lstm_model_custom = KerasClassifierCustom(build_fn=create_lstm_model, epochs=10, batch_size=32, verbose=0)
    cnn_model_custom = KerasClassifierCustom(build_fn=create_cnn_model, epochs=10, batch_size=32, verbose=0)

    # Hyperparameter tuning for LSTM model
    lstm_param_grid = {
        'epochs': [50],
        'batch_size': [32],
        'optimizer': ['rmsprop'],
        'units': [64],
        'dropout_rate': [0.2]
    }
    lstm_grid = GridSearchCV(estimator=lstm_model_custom, param_grid=lstm_param_grid, n_jobs=-1, cv=3, error_score='raise')
    lstm_grid_result = lstm_grid.fit(X_train, y_train)

    # Hyperparameter tuning for CNN model
    cnn_param_grid = {
        'epochs': [100],
        'batch_size': [32],
        'optimizer': ['adam'],
        'filters': [32, 64],
        'kernel_size': [3, 5],
        'dropout_rate': [0.5]
    }
    cnn_grid = GridSearchCV(estimator=cnn_model_custom, param_grid=cnn_param_grid, n_jobs=-1, cv=3, error_score='raise')
    cnn_grid_result = cnn_grid.fit(X_train, y_train)

    # Print the best hyperparameters for both models
    print(f"Best LSTM Model: {lstm_grid_result.best_score_} using {lstm_grid_result.best_params_}")
    print(f"Best CNN Model: {cnn_grid_result.best_score_} using {cnn_grid_result.best_params_}")

    # Evaluate the best LSTM model
    best_lstm_model = lstm_grid_result.best_estimator_.model
    lstm_accuracy, lstm_precision, lstm_recall = evaluate_model(best_lstm_model, X_test, y_test)

    # Evaluate the best CNN model
    best_cnn_model = cnn_grid_result.best_estimator_.model
    cnn_accuracy, cnn_precision, cnn_recall = evaluate_model(best_cnn_model, X_test, y_test)

    # Print evaluation results
    print(f"LSTM Model - Accuracy: {lstm_accuracy}, Precision: {lstm_precision}, Recall: {lstm_recall}")
    print(f"CNN Model - Accuracy: {cnn_accuracy}, Precision: {cnn_precision}, Recall: {cnn_recall}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Beethoven/Anhang 14-3.mid due to error: Could not decode key with 3 flats and mode 255
Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Mozart/Piano Sonatas/Nueva carpeta/K281 Piano Sonata n03 3mov.mid due to error: Could not decode key with 2 flats and mode 2


  pid = os.fork()


Best LSTM Model: 0.6029414847893998 using {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 50, 'optimizer': 'rmsprop', 'units': 64}
Best CNN Model: 0.6152119830900645 using {'batch_size': 32, 'dropout_rate': 0.5, 'epochs': 100, 'filters': 64, 'kernel_size': 5, 'optimizer': 'adam'}
LSTM Model - Accuracy: 0.6019629225736096, Precision: 0.5331888488829879, Recall: 0.6019629225736096
CNN Model - Accuracy: 0.6150490730643402, Precision: 0.5312179105085324, Recall: 0.6150490730643402


In [None]:
import os
import numpy as np
import pandas as pd
import pretty_midi
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Custom wrapper
class KerasClassifierCustom:
    def __init__(self, build_fn=None, **sk_params):
        self.build_fn = build_fn
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **fit_params):
        self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
        return self.model.fit(X, y, **fit_params)

    def predict(self, X):
        return self.model.predict(X)

    def score(self, X, y):
        y_pred = self.predict(X)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = np.argmax(y, axis=1)
        return accuracy_score(y_true, y_pred_classes)

    def get_params(self, deep=True):
        params = self.sk_params.copy()
        params['build_fn'] = self.build_fn
        return params

    def set_params(self, **params):
        for key, value in params.items():
            if key == "build_fn":
                self.build_fn = value
            else:
                self.sk_params[key] = value
        return self

    def filter_sk_params(self, fn):
        res = {}
        fn_params = fn.__code__.co_varnames[:fn.__code__.co_argcount]
        for name, value in self.sk_params.items():
            if name in fn_params:
                res[name] = value
        return res

# Model Building
def create_lstm_model(optimizer='adam', units=128, dropout_rate=0.2):
    model = Sequential()
    model.add(LSTM(units, input_shape=(max_sequence_length, 1), return_sequences=True))
    model.add(LSTM(units))
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_cnn_model(optimizer='adam', filters=64, kernel_size=3, dropout_rate=0.2):
    model = Sequential()
    model.add(Conv1D(filters, kernel_size=kernel_size, activation='relu', input_shape=(max_sequence_length, 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to get MIDI file paths from a directory
def get_midi_files_from_directory(directory_path):
    midi_files = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.mid') or file.endswith('.midi'):
                midi_files.append(os.path.join(root, file))
    return midi_files

# Data Pre-processing
def preprocess_data(midi_files):
    midi_data = []
    for file in midi_files:
        try:
            midi = pretty_midi.PrettyMIDI(file)
            midi_data.append(midi)
        except Exception as e:
            print(f"Skipping file {file} due to error: {e}")
    return midi_data

# Improved Data Augmentation
def augment_data(sequence):
    shift = np.random.randint(-5, 6)
    stretched_sequence = np.interp(np.linspace(0, len(sequence), len(sequence) * 2), np.arange(len(sequence)), sequence)
    return np.clip(np.array(sequence) + shift, 0, 127), np.clip(stretched_sequence, 0, 127)

# Feature Extraction
def extract_features(midi_data, composer_label):
    features = []
    labels = []
    for midi in midi_data:
        if len(midi.instruments) > 0:
            notes = midi.instruments[0].notes
            pitch_sequence = [note.pitch for note in notes]
            features.append(pitch_sequence)
            labels.append(composer_label)
    return features, labels

# Normalize sequences
def normalize_sequences(sequences):
    return (sequences - np.min(sequences)) / (np.max(sequences) - np.min(sequences))

# Model evaluation function
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    accuracy = accuracy_score(y_true, y_pred_classes)
    precision = precision_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    return accuracy, precision, recall

# Main
if __name__ == "__main__":
    # Base directory in Google Drive
    base_directory = '/content/drive/My Drive/Colab Notebooks/midiclassics'

    # Specify the subdirectories containing MIDI files
    composers = {
        'Bach': 0,
        'Beethoven': 1,
        'Chopin': 2,
        'Mozart': 3
    }

    midi_files = []
    labels = []

    # Get MIDI files from directories and assign labels
    for composer, label in composers.items():
        composer_directory = os.path.join(base_directory, composer)
        composer_files = get_midi_files_from_directory(composer_directory)
        if not composer_files:
            print(f"No MIDI files found for {composer}")
        composer_features, composer_labels = extract_features(preprocess_data(composer_files), label)
        if not composer_features:
            print(f"No features extracted for {composer}")
        for seq in composer_features:
            augmented_seq, stretched_seq = augment_data(seq)
            midi_files.append(seq)
            midi_files.append(augmented_seq)
            midi_files.append(stretched_seq)
            labels.extend([label] * 3)

    # Pad sequences to ensure they have the same length
    max_sequence_length = 1000  # You can adjust this value based on your data
    midi_files_padded = pad_sequences(midi_files, maxlen=max_sequence_length, padding='post', truncating='post')

    # Normalize sequences
    midi_files_padded = normalize_sequences(midi_files_padded)

    # Convert lists to numpy arrays
    midi_files = np.array(midi_files_padded)
    labels = np.array(labels)

    # Check if dataset is empty
    if midi_files.shape[0] == 0:
        raise ValueError("No data available after preprocessing. Check your MIDI files and preprocessing steps.")

    # Reshape data to fit model input requirements
    X = midi_files.reshape(midi_files.shape[0], midi_files.shape[1], 1)
    y = to_categorical(labels, num_classes=4)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Example usage with LSTM model
    lstm_model_custom = KerasClassifierCustom(build_fn=create_lstm_model, epochs=10, batch_size=32, verbose=0)
    cnn_model_custom = KerasClassifierCustom(build_fn=create_cnn_model, epochs=10, batch_size=32, verbose=0)

    # Hyperparameter tuning for LSTM model
    lstm_param_grid = {
        'epochs': [50, 100],
        'batch_size': [32],
        'optimizer': ['rmsprop', 'adam'],
        'units': [64, 128],
        'dropout_rate': [0.2, 0.5]
    }
    lstm_grid = GridSearchCV(estimator=lstm_model_custom, param_grid=lstm_param_grid, n_jobs=-1, cv=3, error_score='raise')
    lstm_grid_result = lstm_grid.fit(X_train, y_train)

    # Hyperparameter tuning for CNN model
    cnn_param_grid = {
        'epochs': [50, 100],
        'batch_size': [32],
        'optimizer': ['adam'],
        'filters': [32, 64],
        'kernel_size': [3, 5],
        'dropout_rate': [0.5]
    }
    cnn_grid = GridSearchCV(estimator=cnn_model_custom, param_grid=cnn_param_grid, n_jobs=-1, cv=3, error_score='raise')
    cnn_grid_result = cnn_grid.fit(X_train, y_train)

    # Print the best hyperparameters for both models
    print(f"Best LSTM Model: {lstm_grid_result.best_score_} using {lstm_grid_result.best_params_}")
    print(f"Best CNN Model: {cnn_grid_result.best_score_} using {cnn_grid_result.best_params_}")

    # Evaluate the best LSTM model
    best_lstm_model = lstm_grid_result.best_estimator_.model
    lstm_accuracy, lstm_precision, lstm_recall = evaluate_model(best_lstm_model, X_test, y_test)

    # Evaluate the best CNN model
    best_cnn_model = cnn_grid_result.best_estimator_.model
    cnn_accuracy, cnn_precision, cnn_recall = evaluate_model(best_cnn_model, X_test, y_test)

    # Print evaluation results
    print(f"LSTM Model - Accuracy: {lstm_accuracy}, Precision: {lstm_precision}, Recall: {lstm_recall}")
    print(f"CNN Model - Accuracy: {cnn_accuracy}, Precision: {cnn_precision}, Recall: {cnn_recall}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Beethoven/Anhang 14-3.mid due to error: Could not decode key with 3 flats and mode 255
Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Mozart/Piano Sonatas/Nueva carpeta/K281 Piano Sonata n03 3mov.mid due to error: Could not decode key with 2 flats and mode 2


  pid = os.fork()


Best LSTM Model: 0.6233955121848513 using {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 50, 'optimizer': 'rmsprop', 'units': 64}
Best CNN Model: 0.6154825295671835 using {'batch_size': 32, 'dropout_rate': 0.5, 'epochs': 100, 'filters': 64, 'kernel_size': 3, 'optimizer': 'adam'}
LSTM Model - Accuracy: 0.6335877862595419, Precision: 0.4620305068066262, Recall: 0.6335877862595419
CNN Model - Accuracy: 0.6335877862595419, Precision: 0.4718486686719356, Recall: 0.6335877862595419


In [3]:
import os
import numpy as np
import pandas as pd
import pretty_midi
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Custom wrapper
class KerasClassifierCustom:
    def __init__(self, build_fn=None, **sk_params):
        self.build_fn = build_fn
        self.sk_params = sk_params
        self.model = None

    def fit(self, X, y, **fit_params):
        self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
        return self.model.fit(X, y, **fit_params)

    def predict(self, X):
        return self.model.predict(X)

    def score(self, X, y):
        y_pred = self.predict(X)
        y_pred_classes = np.argmax(y_pred, axis=1)
        y_true = np.argmax(y, axis=1)
        return accuracy_score(y_true, y_pred_classes)

    def get_params(self, deep=True):
        params = self.sk_params.copy()
        params['build_fn'] = self.build_fn
        return params

    def set_params(self, **params):
        for key, value in params.items():
            if key == "build_fn":
                self.build_fn = value
            else:
                self.sk_params[key] = value
        return self

    def filter_sk_params(self, fn):
        res = {}
        fn_params = fn.__code__.co_varnames[:fn.__code__.co_argcount]
        for name, value in self.sk_params.items():
            if name in fn_params:
                res[name] = value
        return res

# Model Building
def create_lstm_model(optimizer='adam', units=128, dropout_rate=0.2):
    model = Sequential()
    model.add(LSTM(units, input_shape=(max_sequence_length, 1), return_sequences=True))
    model.add(LSTM(units))
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_cnn_model(optimizer='adam', filters=64, kernel_size=3, dropout_rate=0.2):
    model = Sequential()
    model.add(Conv1D(filters, kernel_size=kernel_size, activation='relu', input_shape=(max_sequence_length, 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dropout(dropout_rate))
    model.add(Dense(4, activation='softmax'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to get MIDI file paths from a directory
def get_midi_files_from_directory(directory_path):
    midi_files = []
    for root, _, files in os.walk(directory_path):
        for file in files:
            if file.endswith('.mid') or file.endswith('.midi'):
                midi_files.append(os.path.join(root, file))
    return midi_files

# Data Pre-processing
def preprocess_data(midi_files):
    midi_data = []
    for file in midi_files:
        try:
            midi = pretty_midi.PrettyMIDI(file)
            midi_data.append(midi)
        except Exception as e:
            print(f"Skipping file {file} due to error: {e}")
    return midi_data

# Improved Data Augmentation
def augment_data(sequence):
    shift = np.random.randint(-5, 6)
    stretched_sequence = np.interp(np.linspace(0, len(sequence), len(sequence) * 2), np.arange(len(sequence)), sequence)
    return np.clip(np.array(sequence) + shift, 0, 127), np.clip(stretched_sequence, 0, 127)

# Feature Extraction
def extract_features(midi_data, composer_label):
    features = []
    labels = []
    for midi in midi_data:
        if len(midi.instruments) > 0:
            notes = midi.instruments[0].notes
            pitch_sequence = [note.pitch for note in notes]
            features.append(pitch_sequence)
            labels.append(composer_label)
    return features, labels

# Normalize sequences
def normalize_sequences(sequences):
    return (sequences - np.min(sequences)) / (np.max(sequences) - np.min(sequences))

# Model evaluation function
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    accuracy = accuracy_score(y_true, y_pred_classes)
    precision = precision_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    recall = recall_score(y_true, y_pred_classes, average='weighted', zero_division=0)
    return accuracy, precision, recall

# Main
if __name__ == "__main__":
    # Base directory in Google Drive
    base_directory = '/content/drive/My Drive/Colab Notebooks/midiclassics'

    # Specify the subdirectories containing MIDI files
    composers = {
        'Bach': 0,
        'Beethoven': 1,
        'Chopin': 2,
        'Mozart': 3
    }

    midi_files = []
    labels = []

    # Get MIDI files from directories and assign labels
    for composer, label in composers.items():
        composer_directory = os.path.join(base_directory, composer)
        composer_files = get_midi_files_from_directory(composer_directory)
        if not composer_files:
            print(f"No MIDI files found for {composer}")
        composer_features, composer_labels = extract_features(preprocess_data(composer_files), label)
        if not composer_features:
            print(f"No features extracted for {composer}")
        for seq in composer_features:
            augmented_seq, stretched_seq = augment_data(seq)
            midi_files.append(seq)
            midi_files.append(augmented_seq)
            midi_files.append(stretched_seq)
            labels.extend([label] * 3)

    # Pad sequences to ensure they have the same length
    max_sequence_length = 1000  # You can adjust this value based on your data
    midi_files_padded = pad_sequences(midi_files, maxlen=max_sequence_length, padding='post', truncating='post')

    # Normalize sequences
    midi_files_padded = normalize_sequences(midi_files_padded)

    # Convert lists to numpy arrays
    midi_files = np.array(midi_files_padded)
    labels = np.array(labels)

    # Check if dataset is empty
    if midi_files.shape[0] == 0:
        raise ValueError("No data available after preprocessing. Check your MIDI files and preprocessing steps.")

    # Reshape data to fit model input requirements
    X = midi_files.reshape(midi_files.shape[0], midi_files.shape[1], 1)
    y = to_categorical(labels, num_classes=4)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Example usage with LSTM model
    lstm_model_custom = KerasClassifierCustom(build_fn=create_lstm_model, epochs=10, batch_size=32, verbose=0)
    cnn_model_custom = KerasClassifierCustom(build_fn=create_cnn_model, epochs=10, batch_size=32, verbose=0)

    # Hyperparameter tuning for LSTM model
    lstm_param_grid = {
        'epochs': [50],
        'batch_size': [32],
        'optimizer': ['rmsprop'],
        'units': [64],
        'dropout_rate': [0.2]
    }
    lstm_grid = GridSearchCV(estimator=lstm_model_custom, param_grid=lstm_param_grid, n_jobs=-1, cv=3, error_score='raise')
    lstm_grid_result = lstm_grid.fit(X_train, y_train)

    # Hyperparameter tuning for CNN model
    cnn_param_grid = {
        'epochs': [100],
        'batch_size': [32],
        'optimizer': ['adam'],
        'filters': [32, 64],
        'kernel_size': [3, 5],
        'dropout_rate': [0.5]
    }
    cnn_grid = GridSearchCV(estimator=cnn_model_custom, param_grid=cnn_param_grid, n_jobs=-1, cv=3, error_score='raise')
    cnn_grid_result = cnn_grid.fit(X_train, y_train)

    # Print the best hyperparameters for both models
    print(f"Best LSTM Model: {lstm_grid_result.best_score_} using {lstm_grid_result.best_params_}")
    print(f"Best CNN Model: {cnn_grid_result.best_score_} using {cnn_grid_result.best_params_}")

    # Evaluate the best LSTM model
    best_lstm_model = lstm_grid_result.best_estimator_.model
    lstm_accuracy, lstm_precision, lstm_recall = evaluate_model(best_lstm_model, X_test, y_test)

    # Evaluate the best CNN model
    best_cnn_model = cnn_grid_result.best_estimator_.model
    cnn_accuracy, cnn_precision, cnn_recall = evaluate_model(best_cnn_model, X_test, y_test)

    # Print evaluation results
    print(f"LSTM Model - Accuracy: {lstm_accuracy}, Precision: {lstm_precision}, Recall: {lstm_recall}")
    print(f"CNN Model - Accuracy: {cnn_accuracy}, Precision: {cnn_precision}, Recall: {cnn_recall}")


Mounted at /content/drive




Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Beethoven/Anhang 14-3.mid due to error: Could not decode key with 3 flats and mode 255
Skipping file /content/drive/My Drive/Colab Notebooks/midiclassics/Mozart/Piano Sonatas/Nueva carpeta/K281 Piano Sonata n03 3mov.mid due to error: Could not decode key with 2 flats and mode 2


  pid = os.fork()






Best LSTM Model: 0.599943838744486 using {'batch_size': 32, 'dropout_rate': 0.2, 'epochs': 50, 'optimizer': 'rmsprop', 'units': 64}
Best CNN Model: 0.603490830191827 using {'batch_size': 32, 'dropout_rate': 0.5, 'epochs': 100, 'filters': 32, 'kernel_size': 5, 'optimizer': 'adam'}
LSTM Model - Accuracy: 0.6248636859323882, Precision: 0.46579632483147193, Recall: 0.6248636859323882
CNN Model - Accuracy: 0.6030534351145038, Precision: 0.5011940719600039, Recall: 0.6030534351145038


In [2]:
!pip install pretty_midi

Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/5.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/5.6 MB[0m [31m75.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m5.6/5.6 MB[0m [31m109.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m69.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting mido>=1.1.16 (from pretty_midi)
  Downloading mido-1.3.2-py3-none-any.whl.metadata (6.4 kB)
Collecting packaging~=23.1 (from mido>=1.1.16->pretty_midi)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Downloading mido-1.3.2-py3-none-any.whl (54 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.6/54.6 kB[0m [31m5.1 MB/s[0m