<a href="https://colab.research.google.com/github/CeciliaMarson/CPAC-P3-Kandinsky/blob/main/utility/feature_kandinsky.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import librosa, librosa.display
import scipy.fftpack as sf
from scipy import signal
import matplotlib.pyplot as plt



In [2]:
def feature_extractor(signal, frame_length):
    features = []

    #on set detection is used to choose if a shape is a line or a figure
    oenv = librosa.onset.onset_strength(y=signal, sr=sr, hop_length=frame_length)
    on_set=np.expand_dims(oenv,axis=0)

    #Needed for stroke
    zcr = librosa.feature.zero_crossing_rate(signal, frame_length, frame_length)
    #Needed for the first dimesion
    s_centroid = librosa.feature.spectral_centroid(signal, hop_length = frame_length, win_length = frame_length, n_fft=frame_length)
    
    #The higher value note in the chromogram is needed for choosing the shape 
    #The second and the third one complete the chord... the three together choose the color
    chroma_key0, max_0=extract_max_chroma(signal,frame_length,0)
    chroma_key1, max_1=extract_max_chroma(signal,frame_length,1)
    chroma_key2, max_2=extract_max_chroma(signal,frame_length,2)
    
    

    chroma0=np.expand_dims(chroma_key0, axis=0)
    chroma1=np.expand_dims(chroma_key1, axis=0)
    chroma2=np.expand_dims(chroma_key2, axis=0)
    max_ch=np.expand_dims(max_0, axis=0)

    note=np.zeros((12,1))
    for c in max_ch.T:
      note[c]=note[c]+1
    
    f_min=np.argmin(note)

    #indicative if the principal chords is major, minor or not clear
    chords=major_minor(signal, frame_length)

    #assign a primary color based on the position of the higher note in respect to the others
    colors=prevalent_color(max_0,max_1,max_2)

    #the shape is choosen using infomation from the chroma and from the onset detection 
    shap=assign_shape(on_set, max_ch, chords, colors, f_min)
    shapes=np.expand_dims(shap, axis=0)

    #pitch is used for second dimension and position 
    pitch=detect_pitch(signal,frame_length)
    pitch=np.expand_dims(pitch, axis=0)

    #energy is used to computet the transparency of the image
    energy=librosa.feature.rms(y=signal, hop_length=frame_length)
    
    
    

    RGB=RGB_offset(colors, chroma0, max_0, chroma1, max_1, chroma2, max_2)
    
    RGB=RGB.reshape(RGB.shape[0],RGB.shape[1])
    
    features.append(shapes)
    features.append(zcr)
    features.append(s_centroid)
    features.append(np.expand_dims(RGB[0], axis=0))
    features.append(np.expand_dims(RGB[1], axis=0))
    features.append(np.expand_dims(RGB[2], axis=0))
    features.append(pitch)
    features.append(energy)

    return np.array(features)




In [3]:
def prevalent_color(A,B,C):
  colors=[]
  for i,a in enumerate(A):
    b=B[i]
    c=C[i]
    if((a<b) & (a<c)):
      color=0
    if(((a>b) & (a<c)) | ((a<b) & (a>c))):
      color=1
    if((a>b) & (a>c)):
      color=2
    colors.append(color)
  return np.array(colors)

In [4]:
def RGB_offset(colors, chroma0, max_0, chroma1, max_1, chroma2, max_2):
  RGB=[]
  R=[]
  G=[]
  B=[]
  for i,c in enumerate(colors):
    if(c==0):
      if(max_1[i]<max_2[i]):
        r=chroma0.T[i]
        g=chroma1.T[i]
        b=chroma2.T[i]
      else:
        r=chroma0.T[i]
        g=chroma2.T[i]
        b=chroma1.T[i]
    if(c==1):
      if(max_1[i]<max_2[i]):
        r=chroma1.T[i]
        g=chroma0.T[i]
        b=chroma2.T[i]
      else:
        r=chroma2.T[i]
        g=chroma0.T[i]
        b=chroma1.T[i]
    if(c==2):
      if(max_1[i]<max_2[i]):
        r=chroma1.T[i]
        g=chroma2.T[i]
        b=chroma0.T[i]
      else:
        r=chroma2.T[i]
        g=chroma1.T[i]
        b=chroma0.T[i]
    R.append(r)
    G.append(g)
    B.append(b)
  RGB.append(R)
  RGB.append(G)
  RGB.append(B)
  return np.array(RGB)



In [5]:
def major_minor(signal, frame_length):
    max_chroma=[]
    max_arg=[]
    chroma = librosa.feature.chroma_stft(y=signal, hop_length=frame_length, win_length=frame_length)
    chords=[]
    for c in chroma.T:
        
       
       
        mean=np.mean(c)
        max_a=np.argmax(c)
        third=(max_a+4)%12
        fifth=(max_a+7)%12
        minor=(max_a+3)%12
        if(third>minor):
          if((c[third]>=mean) | (c[fifth]>=mean)):
            key=0
          else: key=2
        else:
          if((c[minor]>=mean) | (c[fifth] >=mean)):
            key=1
          else: key=2
        chords.append(key)
    return np.array(chords) 
        
    

In [49]:
def assign_shape(onset, max_ch, chords, color, f_min):
  shapes=[]
  for i, j in enumerate(onset.T):
    if((f_min==max_ch.T[i])):
      shape=9  #special condition for grid
    else:
      if(j>5): #lines
        shape=chords[i]
      else:    #fill figure
        if(chords[i]==0):
          if(color[i]==0):
            shape=3;
          if(color[i]==1):
            shape=4
          if(color[i]==2):
            shape=5
        if(chords[i]==1):
          if(color[i]==0):
            shape=6;
          if(color[i]==1):
            shape=7
          if(color[i]==2):
            shape=8
        if(chords[i]==2): 
          shape=int(max_ch.T[i]%6)+3
    shapes.append(shape)   
  return np.array(shapes)

In [7]:
def extract_max_chroma(signal, frame_length, num):
    max_chroma=[]
    max_arg=[]
    chroma = librosa.feature.chroma_stft(y=signal, hop_length=frame_length, win_length=frame_length)

    for c in chroma.T:
        max_a=np.argsort(c)
        c_frame=np.sort(c)
        max_chroma.append(c_frame[11-num])
        max_arg.append(max_a[11-num])
        
    return np.array(max_chroma),np.array(max_arg)
    

In [8]:
def detect_pitch(y, frame_length):
    S=librosa.stft(y=y,hop_length=2048,win_length=2048, center='false')
    Xmag=librosa.amplitude_to_db(np.abs(S))
    pitches, magnitudes = librosa.piptrack(S=Xmag, fmin=80, fmax=1000)
    pitch=[]
    for i,mag in enumerate(magnitudes.T):
        index = mag.argmax()  
        pitch.append(pitches[index][i])
    
    return np.array(pitch)

In [None]:
def create_dict(arr):
    out = [{ 
    'Shape': int(c[0]), #based on note
    'R': int(c[3]*255), #based on value of max note (chord)
    'B': int(c[4]*255), #based on value of second max note (chord)
    'G': int(c[5]*255), #based on value of third max note (chord)
    'Y_dim':int(c[7]*1000), #based on energy
    'Stroke':int(c[1]*100), #based on "rougthness"
    'X_dim':int(c[6]), #Based on Pitch
    'Transparency':float(c[2]/100), #based on brightness
        } for c in arr.T]
    return out


    #Shape indices:
    #Arc=0,Line=1,Wave=2,Square=3,ArcFill=4,Circle=5,Rect=6,Triangle=7,Ellipse=8,Grid=9

In [50]:
names=["Astoria", "Defective Bleeding", "Glass Bottles", "Iron Lion", "Loops", "Music Box", "Pax 6", "We Make Our Own Holidays"]


features=[]

x1,sr=librosa.load( "This One Is for You.wav",sr=None)
a_feat=feature_extractor(x1,2048)
print(a_feat.shape)
a_feat=np.reshape(a_feat,(a_feat.shape[0],a_feat.shape[2]))
for n in names:
  x,sr=librosa.load(n+".wav",sr=None)
  audio_features=feature_extractor(x,2048)
  audio_features=np.reshape(audio_features,(audio_features.shape[0],audio_features.shape[2]))
  features.append(audio_features)






(8, 1, 641)


In [51]:
features=np.asarray(features)

In [53]:

for i,s in enumerate(features):
  print(names[i])
  count=0
  count2=0
  for j,c in enumerate(s.T):
    if(c[0]<3):
      count+=1
    if(c[0]==9):
      count2+=1
  print("Line")
  print(count)
  print("Grid")
  print(count2)
    

feat=np.array(feat)
print("This One Is for You")
count=0
count2=0
for j,c in enumerate(a_feat.T):
    if(c[0]<3):
      count+=1
    if(c[0]==9):
      count2+=1

print("Line")
print(count)
print("Grid")
print(count2)
    
#feat=create_dict(features)


Astoria
Line
73
Grid
4
Defective Bleeding
Line
15
Grid
2
Glass Bottles
Line
77
Grid
0
Iron Lion
Line
25
Grid
14
Loops
Line
29
Grid
5
Music Box
Line
40
Grid
1
Pax 6
Line
24
Grid
2
We Make Our Own Holidays
Line
100
Grid
6
This One Is for You
Line
94
Grid
4


In [None]:
print(feat)
