<a href="https://colab.research.google.com/github/CeciliaMarson/CPAC-P3-Kandinsky/blob/main/pyscript/feature_kandinsky.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import librosa, librosa.display
import scipy.fftpack as sf
from scipy import signal
import matplotlib.pyplot as plt



In [79]:
def feature_extractor(signal, frame_length):
    features = []

    #on set detection is used to choose if a shape is a line or a figure
    oenv = librosa.onset.onset_strength(y=signal, sr=sr, hop_length=frame_length)
    on_set=np.expand_dims(oenv,axis=0)

    #Needed for stroke
    zcr = librosa.feature.zero_crossing_rate(signal, frame_length, frame_length)
    #Needed for the first dimesion
    s_centroid = librosa.feature.spectral_centroid(signal, hop_length = frame_length, win_length = frame_length, n_fft=frame_length)
    
    #The higher value note in the chromogram is needed for choosing the shape 
    #The second and the third one complete the chord... the three together choose the color
    chroma_key0, max_c=extract_max_chroma(signal,frame_length,0)
    chroma_key1=extract_max_chroma(signal,frame_length,1)
    chroma_key2=extract_max_chroma(signal,frame_length,2)

    
    #entropy_s=entropy.spectral_entropy(signal,method='fft',nperseg=frame_length)
    chroma0=np.expand_dims(chroma_key0, axis=0)
    chroma1=np.expand_dims(chroma_key1, axis=0)
    chroma2=np.expand_dims(chroma_key2, axis=0)
    max_ch=np.expand_dims(max_c, axis=0)

    note=np.zeros((12,1))
    for c in max_ch.T:
      note[c]=note[c]+1
    
    f_min=np.argmin(note)
    shap=assign_shape(on_set, max_ch, f_min)
    shapes=np.expand_dims(shap, axis=0)

    print(shapes.shape)
    #pitch is used for second dimension and position 
    pitch=detect_pitch(signal,frame_length)
    pitch=np.expand_dims(pitch, axis=0)

    #energy is used to computet the transparency of the image
    energy=librosa.feature.rms(y=signal, hop_length=frame_length)

    #features.append(on_set)
    features.append(shapes)
    features.append(zcr)
    features.append(s_centroid)
    features.append(chroma0)
    features.append(chroma1)
    features.append(chroma2)
    #features.append(max_ch)
    features.append(pitch)
    features.append(energy)

    return np.array(features), f_min




In [88]:
def assign_shape(onset, max_ch, f_min):
  shapes=[]
  for i, j in enumerate(onset.T):
    if((f_min==max_ch.T[i]) & (j>5)):
      shape=9  #special condition for grid
    else:
      if(j>5): #lines
        print(j)
        shape=max_ch.T[i]%3
      else:    #fill figure
        shape=(max_ch.T[i]%6)+3
    
    shapes.append(shape)
  return np.array(shapes)

In [61]:
def create_dict(arr, f_min):
    out = [{#'Background': strumento principale, 
    #'Figure fill': int((c[0]<=5)), #choose if less "rythmic"
    #'Figure line': int((c[0]>5)),  #choose if "rythmic"
    'Shape': int(c[0]), #based on note
    'R': int(c[3]*255), #based on value of max note (chord)
    'B': int(c[4]*255), #based on value of second max note (chord)
    'G': int(c[5]*255), #based on value of third max note (chord)
    'Y_dim':int(c[2]), #based on brigthness
    'Stroke':int(c[1]*100), #based on "rougthness"
    'X_dim':int(c[6]), #Based on Pitch
    'Transparency':float(c[7]*10), #based on the energy of the track
    #'Grid': int((f_min==c[6])&((c[0]>5))) #less frequent note in figure line mode
        } for c in arr.T]
    return out

In [54]:
def extract_max_chroma(signal, frame_length, num):
    max_chroma=[]
    max_arg=[]
    chroma = librosa.feature.chroma_stft(y=signal, hop_length=frame_length, win_length=frame_length)

    for c in chroma.T:
        max_a=np.argmax(c)
        c_frame=np.sort(c)
        max_chroma.append(c_frame[num])
        max_arg.append(max_a)
    if(num==0):
      return np.array(max_chroma),np.array(max_arg)
    return np.array(max_chroma)

In [55]:
def detect_pitch(y, frame_length):
    S=librosa.stft(y=y,hop_length=2048,win_length=2048, center='false')
    Xmag=librosa.amplitude_to_db(np.abs(S))
    pitches, magnitudes = librosa.piptrack(S=Xmag, fmin=80, fmax=1000)
    pitch=[]
    for i,mag in enumerate(magnitudes.T):
        index = mag.argmax()  
        pitch.append(pitches[index][i])
    
    return np.array(pitch)

In [None]:
x,sr=librosa.load("Loops.wav",sr=None)
frame_l=sr*110
audio_features, f_min=feature_extractor(x,2048)
audio_features.shape

In [None]:

features=np.reshape(audio_features,(audio_features.shape[0],audio_features.shape[2]))
for i,c in enumerate(features.T):
  if(c[0]<3):
    print(c[0])
feat=create_dict(features, f_min)


In [None]:
print(feat)
