<a href="https://colab.research.google.com/github/PlasticRosewood/COP4331-P1-Group2/blob/main/Copy_of_Music_Genre_RNN_v3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Libraries

In [25]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import pandas as pd

Parse Music Data

In [26]:
# Import CSV File
df = pd.read_csv("features_3_sec.csv")

# Seperate columns into features
chroma_mean = df["chroma_stft_mean"].to_numpy()
chroma_var = df["chroma_stft_var"].to_numpy()
rms_mean = df["rms_mean"].to_numpy()
rms_var = df["rms_var"].to_numpy()
scm = df["spectral_centroid_mean"].to_numpy()
scv = df["spectral_centroid_var"].to_numpy()
sbm = df["spectral_bandwidth_mean"].to_numpy()
sbv = df["spectral_bandwidth_var"].to_numpy()

# Convert label from string to int
map = {}
y = []
for label in df['label']:
  if label not in map:
    map.update({label: len(map)})
  y.append(map.get(label))

Normalize Music Data

In [30]:
features = [chroma_mean, chroma_var, rms_mean, rms_var, scm, scv, sbm, sbv]

# normalize between -1 and 1
def normalize_range(arr):
  arr_min = arr.min()
  arr_max = arr.max()
  for i in range(len(arr)):
    arr[i] = (2 * (arr[i] - arr_min) / (arr_max - arr_min)) -1

# normalize all data
for x in features:
  normalize_range(x)

Set up RNN

Initializing parameters and weight

In [50]:
input_size = 8 # number of features
hidden_size = 5 # (2/3 input size) + output size
output_size = 2 # number of genres to detect (subject to change)

# INITIALIZE WEIGHTS
# initalize weights for inputs to hidden layer
u = np.random.rand(input_size, hidden_size)

# initialize weights for hidden layer to hidden layer
w = np.random.rand(hidden_size, hidden_size)

# initialize weights for hidden layer to output layer
v = np.random.rand(output_size, hidden_size)

# INITIALIZE BIASES
# initialize bias for inputs to hidden layer
b_ih = np.random.rand(hidden_size)

# initialize bias for hidden layer to hidden layer
b_hh = np.random.rand(hidden_size)

# initialize bias for hidden layer to output
b_ho = np.random.rand(output_size)

Get all features from a sample

In [39]:
"""
Given an index of test data, return all features for that sample
"""
def get_features(sample_index):
  return_arr = [x[sample_index] for x in features]
  return return_arr

print(get_features(0))


[-0.28920239345338317, 0.43351398236789285, -0.4137334798119917, -0.784090682893563, -0.47565414285940366, -0.9304322634294081, -0.08159019673048362, -0.8117405004376876]


Recurrence Neural Network (Forward)

In [63]:
"""
This function does 1 iteration of a recurrence neural network training model
it takes in two arguments:
   mem_h:
     The current state of the hidden layer either calculated in previous rounds
       or defined as h_-1

   features_x:
     The features of the current audio sample. This is assumed to be from the
       list of 8 features defined earlier. This will be used in calculating the
       hidden layer state.
"""
def RNN(mem_h, features_x):
  # update state of memory
  mem_h = np.add(
      np.add(np.dot(w, mem_h), b_hh),
      np.add(np.dot(features_x, u), b_ih)
  )

  # calculate output Y
  # this will only be used on the final iteration of RNN
  output_y = np.add(
      np.dot(v, mem_h),
      b_ho
  )

  return mem_h, output_y


RNN Wrapper Function

In [76]:
"""
Target specifies a 30 second sample to pick. When selecting a target, the number
is multiplied by 10 and iterated upon
"""
def RNN_Wrapper(target):

  # initialize hidden memory with identity matrix
  init_mem_h = np.ones(hidden_size)

  # define containers for storing state
  hidden_states = np.zeros((10 + 1, hidden_size))
  hidden_states[0] = init_mem_h # initialize with filler value

  outputs = np.zeros((10, output_size))

  # loop through each round
  for i in range(10):
    print('Round: ' + str(i))
    true_target = (target * 10) + i
    hidden_states[i + 1], outputs[i] = RNN(hidden_states[i], get_features(true_target))
    print('output: ', outputs[i])
    print('hidden state: ', hidden_states[i+1])

  return hidden_states, outputs

RNN_Wrapper(0)

Round: 0
output:  [3.20587652 6.75437116]
hidden state:  [1.83535773 3.50508337 1.6813549  0.35083865 2.60633849]
Round: 1
output:  [ 7.4121341  13.48085612]
hidden state:  [4.1820154  5.73538157 4.17552361 2.10389585 4.96880255]
Round: 2
output:  [18.44667552 31.88192006]
hidden state:  [10.63440556 13.36175452 10.81053301  6.25895289 10.92067151]
Round: 3
output:  [47.4734642  79.93072221]
hidden state:  [27.67281164 33.19994384 27.87505719 17.30373325 26.63948603]
Round: 4
output:  [124.08379508 206.74323819]
hidden state:  [72.56860378 85.28976691 72.89154072 46.49487768 68.31565309]
Round: 5
output:  [327.07155768 542.64297997]
hidden state:  [191.42621269 223.19187635 192.37190069 123.95115164 178.5274501 ]
Round: 6
output:  [ 863.28868504 1430.29360376]
hidden state:  [505.4918437  587.98211551 507.8578673  328.38840556 469.89062386]
Round: 7
output:  [2279.95283318 3775.35407529]
hidden state:  [1335.28343046 1551.47410975 1341.22574178  868.48314574 1239.80938283]
Round: 8
out

(array([[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
         1.00000000e+00],
        [1.83535773e+00, 3.50508337e+00, 1.68135490e+00, 3.50838649e-01,
         2.60633849e+00],
        [4.18201540e+00, 5.73538157e+00, 4.17552361e+00, 2.10389585e+00,
         4.96880255e+00],
        [1.06344056e+01, 1.33617545e+01, 1.08105330e+01, 6.25895289e+00,
         1.09206715e+01],
        [2.76728116e+01, 3.31999438e+01, 2.78750572e+01, 1.73037333e+01,
         2.66394860e+01],
        [7.25686038e+01, 8.52897669e+01, 7.28915407e+01, 4.64948777e+01,
         6.83156531e+01],
        [1.91426213e+02, 2.23191876e+02, 1.92371901e+02, 1.23951152e+02,
         1.78527450e+02],
        [5.05491844e+02, 5.87982116e+02, 5.07857867e+02, 3.28388406e+02,
         4.69890624e+02],
        [1.33528343e+03, 1.55147411e+03, 1.34122574e+03, 8.68483146e+02,
         1.23980938e+03],
        [3.52803347e+03, 4.09754005e+03, 3.54380889e+03, 2.29595251e+03,
         3.27426358e+03],
        [9