In [7]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [25]:
def create_training_data_chat(file, n_mels, n_fft, hop_length, onset_list):
    # Load the audio file
    print(file)
    y, sr = librosa.load(file)
    print(sr)
    
    # Compute the Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    num_frames = mel_spectrogram.shape[1]
    
    # Determine the time per frame
    frame_duration = hop_length / sr
    
    # Initialize empty DataFrame
    start_end_spect_target = pd.DataFrame(columns=['Start sample', 'End sample', 'Spectogram', 'onset'])
    
    if n_fft == hop_length:
        # Case where n_fft equals hop_length
        start_samples = np.arange(num_frames) * frame_duration
        end_samples = start_samples + frame_duration
        
        start_end_spect_target = pd.DataFrame({
            'Start sample': start_samples,
            'End sample': end_samples,
            'Spectogram': [mel_spectrogram[:, i] for i in range(num_frames)],
            'onset': 0
        })
    else:
        # Case where n_fft is not equal to hop_length
        start_samples = np.arange(1, num_frames) * frame_duration
        end_samples = start_samples + frame_duration
        
        diffs = [mel_spectrogram[:, i - 1] - mel_spectrogram[:, i] for i in range(1, num_frames)]
        
        start_end_spect_target = pd.DataFrame({
            'Start sample': start_samples,
            'End sample': end_samples,
            'Spectogram': diffs,
            'onset': 0
        })
    
    # Mark the onset frames based on the onset list
    for onset in onset_list:
        mask = (start_end_spect_target['Start sample'] <= onset) & (onset <= start_end_spect_target['End sample'])
        start_end_spect_target.loc[mask, 'onset'] = 1

    return start_end_spect_target


In [27]:
'''
    Do for every file:
        n_fft: 128, hop_length: 64
        n_fft: 128, hop_length: 128
        n_fft: 256, hop_length: 128
        n_fft: 256, hop_length: 256

'''
onset = pd.read_csv('../Music_data/onsets_ISMIR_2012/onsets_ISMIR_2012/annotations/onsets/ah_development_percussion_castagnet1.onsets',header=None)
onset = onset[0].values.tolist()
n_mels = 254
n_fft = 64
hop_length = 64
file_path = '../Music_data/onsets_ISMIR_2012/onsets_ISMIR_2012/audio/ah_development_percussion_castagnet1.flac'
test1 = create_training_data_chat(file_path,n_mels,n_fft,hop_length,onset)
test1

../Music_data/onsets_ISMIR_2012/onsets_ISMIR_2012/audio/ah_development_percussion_castagnet1.flac
22050


  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)


Unnamed: 0,Start sample,End sample,Spectogram,onset
0,0.000000,0.002902,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
1,0.002902,0.005805,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
2,0.005805,0.008707,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
3,0.008707,0.011610,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
4,0.011610,0.014512,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
...,...,...,...,...
3096,8.986122,8.989025,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
3097,8.989025,8.991927,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
3098,8.991927,8.994830,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
3099,8.994830,8.997732,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",0
