<a href="https://colab.research.google.com/github/JamesBolt22/Supervised_Contrastive_learning_for_onset_detection/blob/main/ProcessData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This code was taken from https://github.com/rohitma38/cnn-onset-detection with small adaptations to produce inputs for a onset detection CNN. 

In [None]:
import numpy as np
import os
import librosa
import tensorflow as tf
import h5py

  _resample_loop_p(x, t_out, interp_win, interp_delta, num_table, scale, y)


In [None]:
#function to zero pad ends of spectrogram
def zeropad2d(x,n_frames):
	y=np.hstack((np.zeros([x.shape[0],n_frames]), x))
	y=np.hstack((y,np.zeros([x.shape[0],n_frames])))
	return y

#function to create N-frame overlapping chunks of the full audio spectrogram  
def makechunks(x,duration):
	y=np.zeros([x.shape[1],x.shape[0],duration])
	for i_frame in range(x.shape[1]-duration):
		y[i_frame]=x[:,i_frame:i_frame+duration]
	return y

#data dirs
audio_dir='/content/drive/MyDrive/onsets_audio/onsets/audio'
onset_dir='/content/drive/MyDrive/onsets_annotations/onsets/annotations/onsets'
save_dir='/content/drive/MyDrive/Processed_data_2'

#data stats for normalization
stats=np.load('/content/drive/MyDrive/means_stds.npy')
means=stats[0]
stds=stats[1]

#context parameters
contextlen=7 #+- frames
duration=2*contextlen+1

#load a list of song names for processing
songlist=np.loadtxt('/content/drive/MyDrive/SongNameList.txt',dtype=str)

#audio format type
audio_format='.wav'

#initialisation for specific lists
labels_master={}
weights_master={}
filelist=[]

#all_data contains all the created inputs, the number of data points is set up for the synthesized data set
number_of_data_points = 51788 
all_data = np.empty((number_of_data_points, 80,15,3))
all_data_dict = {}
idx = 0

#runs through each song
for item in songlist:
  print(item)

  #load audio and onsets
  x,fs=librosa.load(os.path.join(audio_dir,item+audio_format), sr=44100)
  if not os.path.exists(os.path.join(onset_dir,item+'.onsets')): continue
  onsets=np.loadtxt(os.path.join(onset_dir,item+'.onsets'))

  #get mel spectrogram
  melgram1=librosa.feature.melspectrogram(x,sr=fs,n_fft=1024, hop_length=441,n_mels=80, fmin=27.5, fmax=16000)
  melgram2=librosa.feature.melspectrogram(x,sr=fs,n_fft=2048, hop_length=441,n_mels=80, fmin=27.5, fmax=16000)
  melgram3=librosa.feature.melspectrogram(x,sr=fs,n_fft=4096, hop_length=441,n_mels=80, fmin=27.5, fmax=16000)

  #log scaling
  melgram1=10*np.log10(1e-10+melgram1)
  melgram2=10*np.log10(1e-10+melgram2)
  melgram3=10*np.log10(1e-10+melgram3)

  #normalize
  melgram1=(melgram1-np.atleast_2d(means[0]).T)/np.atleast_2d(stds[0]).T
  melgram2=(melgram2-np.atleast_2d(means[1]).T)/np.atleast_2d(stds[1]).T
  melgram3=(melgram3-np.atleast_2d(means[2]).T)/np.atleast_2d(stds[2]).T

  #zero pad ends
  melgram1=zeropad2d(melgram1,contextlen)
  melgram2=zeropad2d(melgram2,contextlen)
  melgram3=zeropad2d(melgram3,contextlen)

  #make chunks
  melgram1_chunks=makechunks(melgram1,duration)
  melgram2_chunks=makechunks(melgram2,duration)
  melgram3_chunks=makechunks(melgram3,duration)

  #generate song labels
  hop_dur=10e-3
  labels=np.zeros(melgram1_chunks.shape[0])
  weights=np.ones(melgram1_chunks.shape[0])
  idxs=np.array(np.round(onsets/hop_dur),dtype=int)
  labels[idxs]=1

  #target smearing
  labels[idxs-1]=1
  labels[idxs+1]=1
  weights[idxs-1]=0.25
  weights[idxs+1]=0.25
  labels_dict={}
  weights_dict={}

  #save
  savedir=os.path.join(save_dir,item)
  if not os.path.exists(savedir): os.makedirs(savedir)

  #runs through each mel spectrum segement for the current song
  for i_chunk in range(melgram1_chunks.shape[0]):

    #reshapes segments for concatenation
    melgram1_reshape = np.reshape(melgram1_chunks[i_chunk],(80,15,1))
    melgram2_reshape = np.reshape(melgram2_chunks[i_chunk],(80,15,1))
    melgram3_reshape = np.reshape(melgram3_chunks[i_chunk],(80,15,1))

    #creates the savepath
    savepath=os.path.join(savedir,str(i_chunk)+'.pt')

    #concatenates and saves the chunks
    save_melgram = np.concatenate((melgram1_reshape,melgram2_reshape,melgram3_reshape), axis=2)
    all_data[idx] = save_melgram
    all_data_dict[savepath] = idx
    np.save(savepath, save_melgram)

    #appends the names of the chunks to the labels and weights dictionaries
    filelist.append(savepath)
    labels_dict[savepath]=labels[i_chunk]
    weights_dict[savepath]=weights[i_chunk]
    idx += 1

  #append labels to master
  labels_master.update(labels_dict)
  weights_master.update(weights_dict)

#saves the final files
np.save('/content/drive/MyDrive/Final_data/labels_master',labels_master)
np.save('/content/drive/MyDrive/Sythn_data/weights_master',weights_master)
np.save('/content/drive/MyDrive/Sythn_data/all_data_dict',all_data_dict)

#saves the total data list
with h5py.File('/content/drive/MyDrive/Sythn_data/all_data.h5', 'w') as hf:
    hf.create_dataset("input_data",  data=all_data)
    hf.close

ah_development_guitar_2684_TexasMusicForge_Dandelion_pt1
ah_development_oud_Diverse_-_01_-_Taksim_pt1
ah_test_cello_03-Cello_Sonata_3__I_Allegro_ma_non_tanto_pt1
ah_test_cello_14_VioloncelloTaksim_pt1
ah_test_guitar_guitar2
ah_test_guitar_Guitar_Licks_51-10
ah_test_kemence_08_-_HicazTaksim_cut
ah_test_kemence_10_huseyni_taksim_ve_cecenkizi_cut
ah_test_kemence_11_RastTaksim_Kemence
ah_test_ney_ne_se01
ah_test_oud_ud_taksimleri_-_17_-_ussak_taksim
ah_test_sax_Tubby_Hayes_-_The_Eighth_Wonder_-_11_-_Unidentified_12_Bar_Theme_pt1
ah_test_trumpet_waldhorn33_-_Paloseco_pt1
al_Albums-AnaBelen_Veneo-13(1.8-11.8)
sb_Albums-Chrisanne3-07(3.0-13.0)
sb_Albums-I_Like_It2-01(13.1-23.1)
al_Albums-Latin_Jam2-03(6.1-16.1)
al_Albums-Latino_Latino-09(8.8-18.8)
sb_Albums-Step_By_Step-09(2.0-12.0)
api_3-you_think_too_muchb
api_RM-C003
ff123_ItCouldBeSweet
ff123_kraftwerk
jpb_Jaillet70
jpb_Jaillet75
jpb_metheny
jpb_wilco
lame_t1
lame_vbrtest
al_Media-103917(13.5-23.5)
al_Media-104218(9.3-19.3)
al_Media-10540