In [62]:
import pandas as pd
import numpy as np

data = pd.read_csv('./Dataset/data.csv')

# Here's how you reduce sample the data set. sample_data will contain every 100th row. This is just as an example
# Should use more than a 100th of the data
sample_data = data.iloc[::100, :]
sample_data.head()

# Splitting the data into features and song profiles
song_profiles = sample_data[['id', 'name', 'artists', 'release_date', 'year', 'id']].copy()
features = sample_data.drop('name', axis=1).copy().drop('artists', axis=1).drop('id', axis=1).drop('release_date', axis=1).values.tolist()
# Drop irrelevant columns in feature set
# features = features.drop('key', axis=1)
    # Key?
    # My guess is that the key is the western music scale key that the song is in
    # possibly normalized for major scales? (e.g., gmaj == cmin)
# sample_data
# song_profiles.head()
# features.head()

# I'm going to make this an RNN. Deal.
# we can use the index of the song in the features list as the class. Because we're 

In [174]:
import math
import random
import copy
# We define a our recurrent neural network as one where the features of the current song being played is the input, along 
# along with classifiers (i.e., their place in the features array) as other inputs.
# we return the value as a binary number representation of the index

def GetClassFromNum(num):
    out = [0 for _ in range(20)] #we have 600k songs as our input, thus we need 20 bits to hold any as a 
    i = 0
    while(num > 0):
        out[i] = num % 2
        num = num // 2
        i += 1
    print(out)
    return out

def GetNumFromClass(vals):
    out = 0
    for i in range(len(vals)):
        out += (i**2)*vals[i]
    return out

class Neuron:
    Weights = []
    Output = 1.0
    Delta = 0.5
    
    def __init__(self, prevLayerWidth):
        self.Weights = [random.random() for _ in range(prevLayerWidth + 1)]
    
    def Activate(self, inputs):
        activation_val = self.Weights[-1]
        for i in range(len(inputs)):
            activation_val += inputs[i] * self.Weights[i]
        self.Output = np.tanh(activation_val)
        return self.Output
    
    def UpdateWeights(self, regressive_outputs, learn_rate):
        for i in range(len(regressive_outputs)):
            self.Weights[i] += learn_rate * self.Delta * regressive_outputs[i]
        self.Weights[-1] += learn_rate * self.Delta
    
    def UpdateDelta(self, error):
        self.Delta = error * (1 - np.tanh(self.Output)**2)

class RecurrentNeuralNetwork:
    RecurrentInputs = 0
    OutputLength = 0
    NeuronLayers = []
    LearningRate = 0.0
    
    def __init__(self, features, recurrent_inputs, hidden_layers, output_length, learning_rate):
        print("Building neural network with", features, "features,", recurrent_inputs, "recurrent inputs, and", hidden_layers, "hidden layers.")
        self.RecurrentInputs = recurrent_inputs
        self.LearningRate = learning_rate
        self.OutputLength = output_length
        
        y_intercept = (features + recurrent_inputs*output_length)
        slope = (output_length - y_intercept)/hidden_layers #for quicker learning, we narrow the scope of each layer a bit
        prev_width = y_intercept
        for i in range(hidden_layers + 1):
            print("\tBuilding layer", i, "with neurons", prev_width)
            next_len = math.ceil(i*slope + y_intercept) if (i < hidden_layers) else output_length
            for j in range(next_len):
                self.NeuronLayers.append([Neuron(prev_width) for _ in range(prev_width)])
            prev_width = next_len
        print("Completed!")
    
    def ForwardPropagation(self, current_input, recurrent_inputs):
        # setup previous layers "output" as the total input
        layer_output = copy.deepcopy(current_input)
        for recurrent_input in recurrent_inputs:
            for val in recurrent_input:
                layer_output.append(val)
        
        for layer in self.NeuronLayers:
            next_layer_input = []
            for neuron in layer:
                next_layer_input(neuron.Activate(layer_output))
            layer_output = next_layer_input
        return layer_output
    
    def BackwardPropagation(self, expected_value):
        prev_layer = []
        for layer in reversed(self.NeuronLayers):
            layer_error = []
            if (layer is self.NeuronLayers[-1]):
                for i in range(len(layer)):
                    layer_error.append(expected_value[i] - layer[i].Output)
            else:
                for i in range(len(layer)):
                    error = 0.0
                    for neuron in prev_layer:
                        error += neuron.Weights[i] * neuron.Delta
                    layer_error.append(error)
            for i in range(len(layer)):
                layer[i].UpdateDelta(layer_error[i])
            prev_layer = layer

    def UpdateAllWeights(self, current_input, recurrent_inputs):
        layer_input = current_input
        for recurrent_input in recurrent_inputs:
            for val in recurrent_input:
                layer_input.append(val)
        
        for layer in self.NeuronLayers:
            next_layer_input = []
            for neuron in layer:
                neuron.UpdateWeights(layer_input, self.LearningRate)
                next_layer_input.append(neuron.Output)
            layer_input = next_layer_input

    def SimilarityCalculation(self, input_class, output_class):
        numer = 0
        denom = 0
        for i in range(len(input_class)):
            numer += (input_class[i]*output_class[i])
            denom *= ((input_class[i]**2) + (output_class[i]**2))**0.5
        if (denom != 0):
            return numer/denom
        else:
            return 0
    
    def Train(self, training_set, epochs):
        shuffled_set = copy.copy(training_set)
        print("RNN starting training with", epochs, "epochs and ", self.RecurrentInputs, " recurrent inputs. ")
        prev_inputs = []
        expected_similarity = 0.75
        blank = [0 for _ in range(self.OutputLength)]

        for i in range(self.RecurrentInputs):
            prev_inputs.append([0 for _ in range(self.OutputLength)])
        
        for epoch in range(epochs + 1):
            total_epoch_error = 0.0
            random.shuffle(shuffled_set)
            for value in shuffled_set:
                possible_output = self.ForwardPropagation(value, prev_inputs)
                calc_possible_output = []
                for el in possible_output:
                    calc_possible_output.append(1 if el > 0 else 0)

                similarity = 0
                print("\tInput:", value)
                print("\t\tDirect Output:", possible_output, "\tCalculated Output: ", calc_possible_output)
                classnum = GetNumFromClass(calc_possible_output)
                print("\t\tClass Number", classnum)
                if (classnum < len(training_set)):
                    features = training_set[classnum]
                    print("\t\tFeatures:", features)
                    similarity = self.SimilarityCalculation(value, features)
                expected = []
                
                print("\t\tSimilarity:", similarity)
                next_recurrent_inputs = prev_inputs
                
                if (similarity >= expected_similarity and
                    possible_output not in prev_inputs):
                    next_recurrent_inputs = (prev_inputs[1:]).append(possible_output)
                    expected = calc_possible_output
                elif (similarity < expected_similarity):
                    for el in possible_output:
                        expected.append(el * similarity)
                self.BackwardPropagation(expected)
                self.UpdateAllWeights(value, prev_inputs)
                prev_inputs = next_recurrent_inputs

In [175]:
new_network = RecurrentNeuralNetwork(len(features[0]), 20, 30, 20, 0.5)

Building neural network with 15 features, 20 recurrent inputs, and 30 hidden layers.
	Building layer 0 with neurons 415


KeyboardInterrupt: 

In [153]:
new_network.Train(features, 20)

RNN starting training with 20 epochs and  20  recurrent inputs. 
	Input: [0.0555, 0.263, 284496.0, 0.759, 0.0, 0.0, 0.0, 0.231, -7.2879999999999985, 1.0, 57.0, 0.0431, 179.90400000000002, 0.429, 1987.0]
		Direct Output: [0.4842812478978636] 	Calculated Output:  [0]
		Class Number 0
		Features: [0.926, 0.742, 184070.0, 0.243, 0.0, 0.0, 10.0, 0.0873, -11.013, 1.0, 12.0, 0.105, 73.438, 0.773, 1942.0]
		Similarity: 0
	Input: [0.628, 0.772, 164653.0, 0.7290000000000001, 0.0, 0.0169, 4.0, 0.0959, -7.934, 1.0, 44.0, 0.114, 146.537, 0.867, 1992.0]
		Direct Output: [0.2739576579137026] 	Calculated Output:  [0]
		Class Number 0
		Features: [0.926, 0.742, 184070.0, 0.243, 0.0, 0.0, 10.0, 0.0873, -11.013, 1.0, 12.0, 0.105, 73.438, 0.773, 1942.0]
		Similarity: 0
	Input: [0.418, 0.598, 288261.0, 0.402, 0.0, 0.3229999999999999, 11.0, 0.125, -14.019, 1.0, 1.0, 0.059, 95.018, 0.225, 2011.0]
		Direct Output: [0.12764188421795222] 	Calculated Output:  [0]
		Class Number 0
		Features: [0.926, 0.742, 18407

IndexError: list index out of range