These functions were used in the composition of https://ronaldbustamante.bandcamp.com/album/convexidad-concavidad.

This notebook contains the functions defined in the notebook https://github.com/Atsvb/Audio-pattern-extractor-generator/blob/master/Audio%20Beat%20Clustering%20and%20Simple%20Sequence%20Generator.ipynb and additional functions to strecth or repitch the audio segments.

For the feature extraction we will use pyAudioAnalysis by Theodoros Giannakopoulos (https://github.com/tyiannak/pyAudioAnalysis)

In [1]:
import numpy as np
import librosa
import sys
from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction
from sklearn.preprocessing import normalize



The next function is a slight modification of the mono PaulStrecth function (https://github.com/paulnasca/paulstretch_python)

In [3]:
def paulstretch(samplerate,data,stretch,windowsize_seconds):

    windowsize=int(windowsize_seconds*samplerate)
    if windowsize<16:
        windowsize=16
    windowsize=int(windowsize/2)*2

    half_windowsize=int(windowsize/2)

    
    end_size=int(samplerate*0.05)
    if end_size<16:
        end_size=16

    if len(data)> end_size:
        data[len(data)-end_size:len(data)]*=np.linspace(1,0,end_size)
    else:
        data=np.pad(data, (0,end_size-len(data)), 'constant', constant_values=( 0))

        data[:end_size]*=np.linspace(1,0,end_size)
    
    start_pos=0.0
    displace_pos=(windowsize*0.5)/stretch

    window=0.5-np.cos(np.arange(windowsize,dtype='float')*2.0*np.pi/(windowsize-1))*0.5

    old_windowed_buf=np.zeros(windowsize)
    hinv_sqrt2=(1+np.sqrt(0.5))*0.5
    hinv_buf=hinv_sqrt2-(1.0-hinv_sqrt2)*np.cos(np.arange(half_windowsize,dtype='float')*2.0*np.pi/half_windowsize)
    outw=np.empty(0,dtype=np.int16)
    i=1
    while True:

        istart_pos=int(np.floor(start_pos))
        buf=data[istart_pos:istart_pos+windowsize]
        if len(buf)<windowsize:
            buf=np.append(buf,np.zeros(windowsize-len(buf)))
        buf=buf*window
    

        freqs=abs(np.fft.rfft(buf))

        ph=np.random.uniform(0,2*np.pi,len(freqs))*1j
        freqs=freqs*np.exp(ph)

        buf=np.fft.irfft(freqs)


        buf*=window


        output=buf[0:half_windowsize]+old_windowed_buf[half_windowsize:windowsize]
        old_windowed_buf=buf

        output*=hinv_buf
        
        output[output>1.0]=1.0
        output[output<-1.0]=-1.0

        outw=np.append(outw,np.int16(output*32767.0))
        start_pos+=displace_pos
        i=i+1
        if start_pos>=len(data):
            break
        sys.stdout.write ("%d %% \r" % int(100.0*start_pos/len(data)))
        sys.stdout.flush()

    return outw

In [4]:
def beatsinfo(bpm, samplerate, data):
    beats=bpm/60*(1/samplerate)*data.shape[0]
    numberofbeats=np.int(beats)
    step=np.int(data.shape[0]/beats)
    return numberofbeats,step

def framesize(step, numberofframes):
    return 2*step/(numberofframes+1)

def extract_feature_matrix(data, step, samplerate, numberofbeats, numberofframes,x):
    frame_size=framesize(step,numberofframes)
    feat_0,_ = audioFeatureExtraction.stFeatureExtraction(data[:step], samplerate, frame_size, frame_size*0.5);
    
    X=feat_0[np.ix_(x)]
    numberofrows=X.shape[0]*X.shape[1]
    print(X.shape)
    X=X.reshape(numberofrows ,1)
    for  n in range(1, numberofbeats):
        X_temp,_ = audioFeatureExtraction.stFeatureExtraction(data[n*step:(n+1)*step], samplerate, frame_size, frame_size*0.5)
        X_temp=X_temp[np.ix_(x)]
        X=np.append(X,X_temp.reshape(numberofrows,1),axis=1)
    X=normalize(X, axis=1)  
    return X

def get_clusters_dict(labels,numberofclusters):
    clusters={}
    for i in range(numberofclusters):
        clusters[str(i)] =np.flatnonzero(labels == i)
    return clusters

def generate_sequence(labels1, labels2, cluster_dict):
    #labels: labels of the clustering of the original file
    #cluster_dict: class dictionary of the new file
    #numberofbeats: number of beats of the original file
    numberofbeats=len(labels1)
    beat_sequence=np.zeros(numberofbeats,dtype=int)
    for i in range(numberofbeats):
        if len(cluster_dict[str(labels1[i])])==0:#in case this cluster has no representative from the new file
            a=np.random.choice(len(labels2))
        else:
            a=np.random.choice(cluster_dict[str(labels1[i])])
        beat_sequence[i]=a
    return beat_sequence

def generate_data_array(data, beat_sequence, step):
    #data: data of the new file
    #cluster: class dictionary of the new file
    numberofbeats=len(beat_sequence)
    data_new=np.zeros((numberofbeats*step,),dtype=np.int16)
    for n in range(numberofbeats):
        data_new[n*step:(n+1)*step]=data[beat_sequence[n]*step:(beat_sequence[n]+1)*step]
    return data_new

#extract features and create a matrix with shape number of features x number of points
def get_frames(data, sample_rate=44100):
    data_16=data/32768.0
    onset_frames = librosa.onset.onset_detect(data_16, sr=sample_rate, backtrack=True)
    onset_times = librosa.frames_to_time(onset_frames, sr=sample_rate)
    onset_samples = librosa.frames_to_samples(onset_frames)
    #print(onset_samples.shape)
    #add 0 and the last sample mumber to the array of samples corresponding to the onsets
    frames=np.append(onset_samples,[data.shape[0]])
    frames=np.append([0],frames)
    #print(frames.shape)
    frames=np.unique(frames)
    frames.shape
    return frames

def extract_feature_matrix_w_onset(data, onset_array, samplerate, numberofframes, x):
    steps=np.diff(onset_array)
    numberofbeats=len(steps)
    frame_size=framesize(steps,numberofframes)
    feat_0,_ = audioFeatureExtraction.stFeatureExtraction(data[:steps[0]], samplerate, frame_size[0], frame_size[0]*0.5);
    
    X=feat_0[np.ix_(x)]
    numberofrows=X.shape[0]*X.shape[1]
    print(numberofrows)
    print("this is the shape of initial X: "+str(X.shape))
    X=X.reshape(numberofrows ,1)
    for  n in range(1, numberofbeats):
        #print(n)
        X_temp,_ = audioFeatureExtraction.stFeatureExtraction(data[onset_array[n]:onset_array[n+1]], samplerate, frame_size[n], frame_size[n]*0.5)
        #print(X_temp.shape)
        X_temp=X_temp[np.ix_(x)]
        #print(X_temp.shape)
        X=np.append(X,X_temp.reshape(numberofrows,1),axis=1)
    X=normalize(X, axis=1)
    return X

def generate_data_array_w_onset(data02, beat_sequence, onset_sample_array, numberofbeats):
    data_new=np.empty(0,dtype=np.int16)
    for n in range(numberofbeats):
        data_new=np.append(data_new, data02[onset_sample_array[beat_sequence[n]]:onset_sample_array[beat_sequence[n]+1]])
    return data_new

def generate_data_array_w_onset_same_size(data02, beat_sequence, onset_sample_array1,onset_sample_array2):
    numberofbeats=len(onset_sample_array1)-1
    steps=np.diff(onset_sample_array1)
    data_new=np.empty(0,dtype=np.int16)
    for n in range(numberofbeats):
        temp_frame=data02[onset_sample_array2[beat_sequence[n]]:onset_sample_array2[beat_sequence[n]+1]]
        if steps[n]>len(temp_frame):
            temp_frame= np.pad(temp_frame, (0,steps[n]-len(temp_frame)), 'constant', constant_values=( 0))
        else:
            temp_frame=temp_frame[:steps[n]]
        data_new=np.append(data_new, temp_frame)
    return data_new

def generate_data_array_w_stretch(data, labels_in,labels_out,frame_input, frame_output, class_seq, stretch_window=0.1,samplerate=44100.0):
    #class_sequence1=class_sequence_sil
    numberofbeats=len(labels_in)
    steps_in=np.diff(frame_input)
    steps_out=np.diff(frame_output)
    data_new=np.empty(0,dtype=np.int16)
    frame_sizes=np.zeros(numberofbeats)
    for n in range(numberofbeats):
        stretch=steps_in[n]/steps_out[class_seq[n]]*1.0
        data_temp=data[frame_output[class_seq[n]]:frame_output[class_seq[n]+1]]
        frame_temp=paulstretch(samplerate,data_temp, stretch, stretch_window )
        if len(frame_temp)>=steps_in[n]:
            frame_temp=frame_temp[:steps_in[n]]
        else: 
            frame_temp2=np.zeros([steps_in[n],])
            frame_temp2[:len(frame_temp)]=frame_temp
            frame_temp=frame_temp2
        frame_sizes[n]=len(frame_temp)
        data_new=np.append(data_new, frame_temp)
    return data_new, frame_sizes



#extract features and create a matrix with shape number of features x number of points
def extract_feature_matrix_w_onset_pitch(data, onset_array, samplerate, numberofframes, x):
    steps=np.diff(onset_array)
    numberofbeats=len(steps)
    pitch=np.zeros(numberofbeats)
    frame_size=framesize(steps,numberofframes)
    feat_0,_ = audioFeatureExtraction.stFeatureExtraction(data[:steps[0]], samplerate, frame_size[0], frame_size[0]*0.5);
    X=feat_0[np.ix_(x)]
    pitch[0]=np.argmax(np.mean(X[5:,:],axis=1))
    numberofrows=X.shape[0]*X.shape[1]
    print("this is the shape of initial X: "+str(X.shape))
    X=X.reshape(numberofrows ,1)
    for  n in range(1, numberofbeats):
        X_temp,_ = audioFeatureExtraction.stFeatureExtraction(data[onset_array[n]:onset_array[n+1]], samplerate, frame_size[n], frame_size[n]*0.5)
        X_temp=X_temp[np.ix_(x)]
        pitch[n]=np.argmax(np.mean(X_temp[5:,:],axis=1))
        X=np.append(X,X_temp.reshape(numberofrows,1),axis=1)
    X=normalize(X, axis=1)
    return X, pitch