<a href="https://colab.research.google.com/github/Jpw306/Music-Genre-RNN/blob/main/Music_Genre_RNN_with_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Libraries

In [5]:
import numpy as np
import librosa
import matplotlib.pyplot as plt
import pandas as pd
import math
import random

Parse Music Data

In [46]:
# Import CSV File
df = pd.read_csv("features_3_sec.csv")

# Seperate columns into features
chroma_mean = df["chroma_stft_mean"].to_numpy()
chroma_var = df["chroma_stft_var"].to_numpy()
rms_mean = df["rms_mean"].to_numpy()
rms_var = df["rms_var"].to_numpy()
scm = df["spectral_centroid_mean"].to_numpy()
scv = df["spectral_centroid_var"].to_numpy()
sbm = df["spectral_bandwidth_mean"].to_numpy()
sbv = df["spectral_bandwidth_var"].to_numpy()

# Convert label from string to int
map = {}
y = []
for label in df['label']:
  if label not in map:
    map.update({label: len(map)})
  y.append(map.get(label))

Normalize Music Data

In [47]:
features = [chroma_mean, chroma_var, rms_mean, rms_var, scm, scv, sbm, sbv]

# normalize between -1 and 1
def normalize_range(arr):
  arr_min = arr.min()
  arr_max = arr.max()
  for i in range(len(arr)):
    arr[i] = (2 * (arr[i] - arr_min) / (arr_max - arr_min)) -1

# normalize all data
for x in features:
  normalize_range(x)

Some Constants

In [4]:
input_size = 8 # number of features
output_size = len(df["label"].unique()) # number of genres to detect (subject to change)
hidden_size = math.floor(input_size * 2 / 3) + output_size # (2/3 input size) + output size

Some Functions

In [94]:
"""
Given an index of test data, return all features for that sample
"""
def get_features(sample_index):
  return_arr = [x[sample_index] for x in features]
  return np.array(return_arr).reshape(input_size, 1).T

Set up RNN / LSTM

In [16]:
"""WEIGHTS:"""

# calculate range for weights
# using xavier initialization
lower_x, upper_x = -1 / math.sqrt(input_size), 1 / math.sqrt(input_size) # input range for x_t
lower_h, upper_h = -1 / math.sqrt(hidden_size), 1 / math.sqrt(hidden_size) # input range for h_t-1

# function for h_size feedback neurons
def h_size_feedback():
  return np.random.uniform(lower_h, upper_h, (hidden_size, hidden_size))

def x_size_feedback():
  return np.random.uniform(lower_x, upper_x, (input_size, hidden_size))

# notation: (W)eight_(t)o(f)rom
W_fh = h_size_feedback() # weight into forget gate from h_t-1
W_fx = x_size_feedback() # weight into forget gate from x_t (current input)
W_ih = h_size_feedback() # weight into input gate from h_t-1 (For input gate)
W_ix = x_size_feedback() # weight into input gate from x_t (For input gate)
W_ch = h_size_feedback() # weight into input gate from h_t-1 (For candidate memory)
W_cx = x_size_feedback() # weight into input gate from x_t (For candidate memory)
W_oh = h_size_feedback() # weight into output gate from h_t-1
W_ox = x_size_feedback() # weight into output gate from x_t

weights = [W_fh, W_fx, W_ih, W_ix, W_ch, W_cx, W_oh, W_ox]

"""BIASES:"""

# notation: (B)ias_(t)o
b_f = np.ones([hidden_size, 1]) # bias for forget gate
b_i = np.ones([hidden_size, 1]) # bias for input gate
b_c = np.ones([hidden_size, 1]) # bias for candidate memory
b_o = np.ones([hidden_size, 1]) # bias for output gate

biases = [b_f, b_i, b_c, b_o]

Common Functions

In [64]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

""" BUILT IN NUMPY FUNCTIONS TO USE """
# np.add() for element wise addition
# np.tanh() for tanh
# hadamard via * multiplication

' BUILT IN NUMPY FUNCTIONS TO USE '

LSTM Cell

In [107]:
def LSTM_cell(c_t, h_t, x_t):
  # Forget Gate
  f_t = np.dot(W_fh, h_t) + np.dot(x_t, W_fx) + b_f
  f_out = sigmoid(f_t)
  c_t = np.multiply(c_t, f_out)

  # Input Gate
  i_t = np.dot(W_ih, h_t) + np.dot(x_t, W_ix) + b_i
  i_out = sigmoid(i_t)
  cand_t = np.dot(h_t.T, W_ch) + np.dot(x_t, W_cx) + b_c
  cand_out = np.multiply(i_out, np.tanh(cand_t))
  c_t = np.add(c_t, cand_out)

  # Output Gate
  o_t = np.dot(W_oh, h_t) + np.dot(x_t, W_ox) + b_o
  o_out = sigmoid(o_t)
  h_t = np.multiply(np.tanh(c_t), o_out)

  return c_t, h_t

def LSTM_wrapper(num_rounds, data_index):
  # Define Cell and Hidden states
  c_t_mem = np.zeros([num_rounds + 1, hidden_size])
  h_t_mem = np.zeros([num_rounds + 1, hidden_size])
  print(c_t_mem.shape)
  print(h_t_mem.shape)

  # Create -1st entry for memory lines
  c_t_mem[0] = np.zeros(hidden_size)
  h_t_mem[0] = np.zeros(hidden_size)

  actual_index = data_index * 10

  for i in range(num_rounds):
    c_t_cur = c_t_mem[i].reshape(hidden_size, 1)
    h_t_cur = h_t_mem[i].reshape(hidden_size, 1)
    c_t_mem[i + 1], h_t_mem[i + 1] = LSTM_cell(c_t_cur, h_t_cur, get_features(actual_index + i))

  return c_t_mem, h_t_mem

res = LSTM_wrapper(10, 0)
print(res[0])
print(res[1])

(11, 15)
(11, 15)


ValueError: could not broadcast input array from shape (15,15) into shape (15,)