<a href="https://colab.research.google.com/github/Stalinosmj/BeetZart/blob/main/revision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries😊😊

In [11]:
from music21 import converter, instrument, note, chord
from midi2audio import FluidSynth
import wave
import numpy as np
from scipy.io.wavfile import write
import librosa
import librosa.display
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Function to convert MIDI to Raw audio

In [12]:
# Load MIDI files and convert to raw audio
def load_midi_to_audio(midi_file):
    # Create a synthesizer
    fs = FluidSynth()

    # Convert the MIDI file to an audio file
    audio_file = 'output.wav'
    fs.midi_to_audio(midi_file, audio_file)

    # Open the audio file and read the raw audio data
    with wave.open(audio_file, 'rb') as wave_file:
        raw_audio_data = wave_file.readframes(wave_file.getnframes())

    # Return the raw audio data
    return raw_audio_data

# Functions to create Spectrogram

In [13]:
# Create spectrograms using short-time Fourier transform
def create_stft_spectrogram(audio, window_size, hop_length):
    stft = librosa.stft(audio, window=window_size, hop_length=hop_length)
    spectrogram = np.abs(stft)
    return spectrogram


In [14]:
# Create spectrograms using constant-Q transform
def create_cqt_spectrogram(audio, hop_length):
    cqt = librosa.cqt(audio, hop_length=hop_length)
    spectrogram = np.abs(cqt)
    return spectrogram


# Audio and MIDI splitting

In [15]:
# Split audio and MIDI into one-second windows
def split_into_one_second_windows(audio, midi, sample_rate):
    num_windows = int(len(audio) / sample_rate)
    audio_windows = []
    midi_windows = []
    for i in range(num_windows):
        start = i * sample_rate
        end = start + sample_rate
        audio_windows.append(audio[start:end])
        midi_windows.append(midi[start:end])
    return audio_windows, midi_windows

In [16]:
# Split audio and MIDI into eighth-second windows
def split_into_eighth_second_windows(audio, midi, sample_rate):
    num_windows = int(len(audio) / (sample_rate / 8))
    audio_windows = []
    midi_windows = []
    for i in range(num_windows):
        start = i * (sample_rate / 8)
        end = start + (sample_rate / 8)
        audio_windows.append(audio[start:end])
        midi_windows.append(midi[start:end])
    return audio_windows, midi_windows

# Load trainning data

In [17]:
# Example usage
midi_file = '/content/maestro-v3.0.0_2004_MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav (1).midi'
audio = load_midi_to_audio(midi_file)
sample_rate = 22050

# Spectogram Creation

In [None]:
# Create spectrograms using one-second windows
audio_windows, midi_windows = split_into_one_second_windows(audio, midi_file, sample_rate)
spectrograms = []
for audio_window in audio_windows:
    stft_spectrogram = create_stft_spectrogram(audio_window, window_size=2048, hop_length=512)
    cqt_spectrogram = create_cqt_spectrogram(audio_window, hop_length=512)
    spectrogram = np.stack([stft_spectrogram, cqt_spectrogram], axis=-1)
    spectrograms.append(spectrogram)

In [None]:
# Create spectrograms using eighth-second windows
audio_windows, midi_windows = split_into_eighth_second_windows(audio, midi_file, sample_rate)
spectrograms = []
for audio_window in audio_windows:
    stft_spectrogram = create_stft_spectrogram(audio_window, window_size=2048, hop_length=512)
    cqt_spectrogram = create_cqt_spectrogram(audio_window, hop_length=512)
    spectrogram = np.stack([stft_spectrogram, cqt_spectrogram], axis=-1)

# CNN Model definition

In [None]:
# Train CNN on spectrograms
def train_cnn(spectrograms, labels):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=spectrograms.shape[1:]))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    model.fit(spectrograms, labels, epochs=10, batch_size=32, validation_data=(val_spectrograms, val_labels))


# Trainning Model

In [None]:
# Train CNN on one-second window spectrograms
train_cnn(np.array(spectrograms), np.array(midi_windows))

In [None]:

# Train CNN on eighth-second window spectrograms
train_cnn(np.array(spectrograms), np.array(midi_windows))