<a href="https://colab.research.google.com/github/Jpw306/Music-Genre-RNN/blob/main/Copy_of_Music_Genre_RNN_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Libraries

In [1]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import pandas as pd
import math

Parse Music Data

In [2]:
# Import CSV File
df = pd.read_csv("features_3_sec.csv")

# Seperate columns into features
chroma_mean = df["chroma_stft_mean"].to_numpy()
chroma_var = df["chroma_stft_var"].to_numpy()
rms_mean = df["rms_mean"].to_numpy()
rms_var = df["rms_var"].to_numpy()
scm = df["spectral_centroid_mean"].to_numpy()
scv = df["spectral_centroid_var"].to_numpy()
sbm = df["spectral_bandwidth_mean"].to_numpy()
sbv = df["spectral_bandwidth_var"].to_numpy()

# Convert label from string to int
map = {}
y = []
for label in df['label']:
  if label not in map:
    map.update({label: len(map)})
  y.append(map.get(label))

Normalize Music Data

In [3]:
features = [chroma_mean, chroma_var, rms_mean, rms_var, scm, scv, sbm, sbv]

# normalize between -1 and 1
def normalize_range(arr):
  arr_min = arr.min()
  arr_max = arr.max()
  for i in range(len(arr)):
    arr[i] = (2 * (arr[i] - arr_min) / (arr_max - arr_min)) -1

# normalize all data
for x in features:
  normalize_range(x)

Set up RNN

Initializing parameters and weight

In [4]:
input_size = 8 # number of features
output_size = 2 # number of genres to detect (subject to change)
hidden_size = math.floor(input_size * 2 / 3) + output_size # (2/3 input size) + output size

# INITIALIZE WEIGHTS
# initalize weights for inputs to hidden layer
u = np.random.rand(input_size, hidden_size)

# initialize weights for hidden layer to hidden layer
w = np.random.rand(hidden_size, hidden_size)

# initialize weights for hidden layer to output layer
v = np.random.rand(output_size, hidden_size)

# INITIALIZE BIASES
# initialize bias for inputs to hidden layer
b_ih = np.random.rand(hidden_size)

# initialize bias for hidden layer to hidden layer
b_hh = np.random.rand(hidden_size)

# initialize bias for hidden layer to output
b_ho = np.random.rand(output_size)

Get all features from a sample

In [5]:
"""
Given an index of test data, return all features for that sample
"""
def get_features(sample_index):
  return_arr = [x[sample_index] for x in features]
  return return_arr

print(get_features(0))


[-0.28920239345338317, 0.43351398236789285, -0.4137334798119917, -0.784090682893563, -0.47565414285940366, -0.9304322634294081, -0.08159019673048362, -0.8117405004376876]


Recurrence Neural Network (Forward)

In [6]:
"""
This function does 1 iteration of a recurrence neural network training model
it takes in two arguments:
   mem_h:
     The current state of the hidden layer either calculated in previous rounds
       or defined as h_-1

   features_x:
     The features of the current audio sample. This is assumed to be from the
       list of 8 features defined earlier. This will be used in calculating the
       hidden layer state.
"""
def RNN(mem_h, features_x):
  # update state of memory
  mem_h = np.add(
      np.add(np.dot(w, mem_h), b_hh),
      np.add(np.dot(features_x, u), b_ih)
  )

  # calculate output Y
  # this will only be used on the final iteration of RNN
  output_y = np.add(
      np.dot(v, mem_h),
      b_ho
  )

  return mem_h, output_y


RNN Wrapper Function

In [7]:
"""
Target specifies a 30 second sample to pick. When selecting a target, the number
is multiplied by 10 and iterated upon
"""
def RNN_Wrapper(target):

  # initialize hidden memory with identity matrix
  init_mem_h = np.ones(hidden_size)

  # define containers for storing state
  hidden_states = np.zeros((10 + 1, hidden_size))
  hidden_states[0] = init_mem_h # initialize with filler value

  outputs = np.zeros((10, output_size))

  # loop through each round
  for i in range(10):
    print('Round: ' + str(i))
    true_target = (target * 10) + i
    hidden_states[i + 1], outputs[i] = RNN(hidden_states[i], get_features(true_target))
    print('output: ', outputs[i])
    print('hidden state: ', hidden_states[i+1])

  return hidden_states, outputs

RNN_Wrapper(0)

Round: 0
output:  [9.04391776 9.08418157]
hidden state:  [1.81362676 2.16045125 4.67890796 1.20664635 1.28763516 4.17960451
 3.57683175]
Round: 1
output:  [26.04727472 29.05141126]
hidden state:  [ 8.09296653  6.69696963 11.81287103  3.42111424  3.59902797 13.83604396
 11.31983111]
Round: 2
output:  [85.9923342  97.38912015]
hidden state:  [29.44733536 25.86148962 34.72320416 12.75192486 15.94149614 44.57928366
 35.66308094]
Round: 3
output:  [295.62837066 332.70935503]
hidden state:  [101.22666943  92.47593016 115.79421983  48.42250769  60.94419915
 150.82863155 118.36988804]
Round: 4
output:  [1029.09639084 1154.22456393]
hidden state:  [350.30713586 324.24200124 401.89656288 173.9767326  217.81606971
 519.9570582  409.59110084]
Round: 5
output:  [3591.33480226 4024.09547632]
hidden state:  [1220.09067356 1131.9822973  1402.02743455  612.58151616  764.45222935
 1810.67694713 1428.41684841]
Round: 6
output:  [12537.71393523 14047.19760161]
hidden state:  [4258.54374251 3952.34956049 4

(array([[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
         1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
        [1.81362676e+00, 2.16045125e+00, 4.67890796e+00, 1.20664635e+00,
         1.28763516e+00, 4.17960451e+00, 3.57683175e+00],
        [8.09296653e+00, 6.69696963e+00, 1.18128710e+01, 3.42111424e+00,
         3.59902797e+00, 1.38360440e+01, 1.13198311e+01],
        [2.94473354e+01, 2.58614896e+01, 3.47232042e+01, 1.27519249e+01,
         1.59414961e+01, 4.45792837e+01, 3.56630809e+01],
        [1.01226669e+02, 9.24759302e+01, 1.15794220e+02, 4.84225077e+01,
         6.09441992e+01, 1.50828632e+02, 1.18369888e+02],
        [3.50307136e+02, 3.24242001e+02, 4.01896563e+02, 1.73976733e+02,
         2.17816070e+02, 5.19957058e+02, 4.09591101e+02],
        [1.22009067e+03, 1.13198230e+03, 1.40202743e+03, 6.12581516e+02,
         7.64452229e+02, 1.81067695e+03, 1.42841685e+03],
        [4.25854374e+03, 3.95234956e+03, 4.89462318e+03, 2.14131505e+03,
         2

Activation and Cost Function

In [8]:
def softmax(x, derivative = False):
  e = np.exp(x)
  softmax =  e / np.sum(e)

  if not derivative:
    return softmax
  else:
    softmax = softmax.reshape(-1, 1)
    return np.diagflat(softmax) - np.dot(softmax, softmax.T)

def cost_function(p, q, derivative = False):
  sum = 0

  # Find derivative for back progpogation implementation
  if derivative:
    for i in range(p.size):
      sum += math.log(p[i]/q[i]) + 1

  # Find standard cost from expected output
  else:
    for i in range(p.size):
      sum += p[i] * math.log(p[i]/q[i], 10)

  return sum

Gradient Descent Function

In [9]:
def gradient_descent():
  # TODO
  return;

Back Propagation

In [10]:
def back_progpagation(y_hat, y, h, w):
  for layer in range(h.size):
    d_activation = softmax(h[layer], True)

    # Check if back propagation in final layer
    if layer == 0:
      d_error = cost_function(y_hat, y, True)
    else:
      d_error = np.dot(delta, np.transpose(w))

    delta = np.dot(d_error, d_activation)
    gradient = np.dot(h[layer], delta)
    gradient_descent()