In [98]:
import pandas as pd
import numpy as np
import csv
import copy

data = pd.read_csv('./Dataset/data.csv')

# Here's how you reduce sample the data set. sample_data will contain every 100th row. This is just as an example
# Should use more than a 100th of the data
sample_data = data.iloc[::2000, :]
sample_data.head()

# Splitting the data into features and song profiles
song_profiles = sample_data[['id', 'name', 'artists', 'release_date', 'year']].copy()
features = sample_data.copy().drop('name', axis=1).drop('artists', axis=1).drop('id', axis=1).drop('release_date', axis=1).values.tolist()
def Maxes(features):
    maxes = [0 for _ in range(len(features[0]))]
    for row in features:
        for i in range(len(row)):
            if (abs(row[i]) > maxes[i]):
                maxes[i] = abs(row[i])
    return maxes

def CalcNormalizations(features, maxes):
    ofeatures = copy.deepcopy(features)
    length = len(ofeatures)
    for row in ofeatures:
        for i in range(len(row)):
            row[i] = (row[i])/maxes[i]
    return ofeatures

def SimilarityCalculation(input_class, output_class):
    numer = 0.0
    mag1 = 0.0
    mag2 = 0.0

    for i in range(len(input_class)):
        numer += (input_class[i] * output_class[i])
        mag1 += input_class[i] ** 2
        mag2 += output_class[i] ** 2
    return numer / ((mag1 * mag2) ** 0.5)

def GetClassFromNum(num):
    out = [0 for _ in range(20)] #we have 600k songs as our input, thus we need 20 bits to hold any as a 
    i = 0
    while(num > 0):
        out[i] = num % 2
        num = num // 2
        i += 1
    return out

def GetNumFromClass(vals):
    out = 0
    for i in range(len(vals)):
        out += (2 ** i) * vals[i]
    return out

def CalculateClasses(data):
    # just takes the number in the list, and returns it as a little-endian
    # binary representation
    output = []
    for i in range(len(data)):
        data[i].append(GetClassFromNum(i))
    return data

maxes = Maxes(features)
normalized_features = CalcNormalizations(features, maxes)
normalized_data_with_classes = CalculateClasses(normalized_features)
# Drop irrelevant columns in feature set
# features = features.drop('key', axis=1)
    # Key?
    # My guess is that the key is the western music scale key that the song is in
    # possibly normalized for major scales? (e.g., gmaj == cmin)
# sample_data
# song_profiles.head()
# features.head()
# I'm going to make this an RNN. Deal.
# we can use the index of the song in the features list as the class. Because we're 

In [146]:
import math
import random
import copy
# We define a our recurrent neural network as one where the features of the current song being played is the input, along 
# along with classifiers (i.e., their place in the features array) as other inputs.
# we return the value as a binary number representation of the index
class Neuron:
    Weights = []
    Output = 1.0
    Delta = 0.5
    
    def __init__(self, prevLayerWidth):
        self.Weights = [random.random() for _ in range(prevLayerWidth + 1)]
    
    def Activate(self, inputs):
        activation_val = self.Weights[-1]
#         print("\tsanity check:\n\t\t", inputs, len(inputs), "\n\t\t", self.Weights, len(self.Weights))
        for i in range(len(inputs)):
            activation_val += inputs[i] * self.Weights[i]
        self.Output = max(0, activation_val)
        #print("output sanity check:", self.Output)
        return self.Output
    
    def UpdateWeights(self, regressive_outputs, learn_rate):
#         print("UpdateWeights sanity check: ", regressive_outputs)
        for i in range(len(regressive_outputs)):
            self.Weights[i] += learn_rate * self.Delta * regressive_outputs[i]
        self.Weights[-1] += learn_rate * self.Delta
    
    def UpdateDelta(self, error):
        self.Delta = error * (0 if self.Output < 1 else 1)

class RecurrentNeuralNetwork:
    RecurrentInputs = 0
    OutputLength = 0
    NeuronLayers = []
    LearningRate = 0.0
    
    def __init__(self, features, recurrent_inputs, hidden_layers, output_length, learning_rate):
        print("Building neural network with", features, "features,", recurrent_inputs, "recurrent inputs, and", hidden_layers, "hidden layers.")
        self.RecurrentInputs = recurrent_inputs
        self.LearningRate = learning_rate
        self.OutputLength = output_length
        
        y_intercept = features + recurrent_inputs * output_length
        slope = (output_length - y_intercept)/hidden_layers #for quicker learning, we narrow the scope of each layer a bit
        prev_width = y_intercept
        for i in range(hidden_layers + 1):
            next_len = math.ceil((i*slope + y_intercept)) if (i < hidden_layers) else output_length
            print("\tBuilding layer", i, "with", next_len, "neurons.")
            for j in range(next_len):
                self.NeuronLayers.append([Neuron(prev_width) for _ in range(next_len)])
            prev_width = next_len
        print("Completed!")
    
    def ForwardPropagation(self, current_input, recurrent_inputs):
        # setup previous layers "output" as the total input
        layer_output = copy.deepcopy(current_input)
#         print("\tsanity check: ", recurrent_inputs)
        for recurrent_input in recurrent_inputs:
            for el in recurrent_input:
                layer_output.append(el)
        
        for layer in self.NeuronLayers:
            next_layer_input = []
            for neuron in layer:
                next_layer_input.append(neuron.Activate(layer_output))
            layer_output = next_layer_input
        return layer_output
    
    def BackwardPropagation(self, expected_value):
        prev_layer = []
        for layer in reversed(self.NeuronLayers):
            layer_error = []
#             print("\t\t", len(expected_value), ",", len(layer))
            if (layer is self.NeuronLayers[-1]):
                for i in range(len(layer)):
                    layer_error.append(expected_value[i] - layer[i].Output)
            else:
                for i in range(len(layer)):
                    error = 0.0
                    for neuron in prev_layer:
                        error += neuron.Weights[i] * neuron.Delta
                    layer_error.append(error)
            for i in range(len(layer)):
                layer[i].UpdateDelta(layer_error[i])
            prev_layer = layer

    def UpdateAllWeights(self, current_input, recurrent_inputs):
        current_input = copy.deepcopy(current_input)
        layer_input = current_input
        for recurrent_input in recurrent_inputs:
            for el in recurrent_input:
                layer_input.append(el)
#         print("updateallweights sanity:", layer_input)
        for layer in self.NeuronLayers:
            next_layer_input = []
            for neuron in layer:
                neuron.UpdateWeights(layer_input, self.LearningRate)
                next_layer_input.append(neuron.Output)
            layer_input = next_layer_input

    def SimilarityCalculation(self, vector01, vector02):
        numer = 0.0
        mag1 = 0.0
        mag2 = 0.0

        for i in range(len(vector01)):
            numer += (vector01[i] * vector02[i])
            mag1 += vector01[i] ** 2
            mag2 += vector02[i] ** 2
        return numer / ((mag1 * mag2) ** 0.5)

    def FindClosestInTrainingData(self, output_class, training_data):
        output = training_data[0]
        max_similarity = 0
        for t in training_data:
            current_similarity = self.SimilarityCalculation(output_class, t)
            if (current_similarity > max_similarity):
                max_similarity = current_similarity
                output = t
        return output

    def Train(self, training_set, epochs):
        shuffled_set = copy.copy(training_set)
        print("RNN starting training with", epochs, "epochs and",
            self.RecurrentInputs, "recurrent inputs on",
            len(training_set),"pieces of data.")
        prev_inputs = []
        expected_similarity = 0.85
        blank = [0 for _ in range(self.OutputLength)]

        for i in range(self.RecurrentInputs):
            prev_inputs.append(copy.deepcopy(blank))
        
        for epoch in range(epochs + 1):
            print("Epoch:", epoch + 1)
            total_epoch_error = 0.0
            random.shuffle(shuffled_set)
            
            j = 1
            for value in shuffled_set:
#                 print("prepossible sanity", prev_inputs)
                possible_output = self.ForwardPropagation(value[:-1], prev_inputs)[:-1] # drop biaser for actual class
                print("posout:", possible_output)
                calculated_class = [0 if el <= 0 else 1 for el in possible_output]
                index = GetNumFromClass(calculated_class)

#                 print("sanity:", calculated_class)
                if (index < len(training_set)):
                    calculated_song = training_set[index]
                    similarity = self.SimilarityCalculation(value[:-1], calculated_song[:-1])
                    print("\t", j,"Index:", index, "\tSimilarity: ", similarity)
                else:
                    calculated_class = copy.deepcopy(value[-1])
                    similarity = 0
                    print("\t", j, "Index:", index, "\tOOB, 0 similarity")
                next_recurrent_inputs = prev_inputs

                if (similarity < expected_similarity or similarity == 1.0):
                    calculated_class.append(0)
                else:
                    calculated_class.append(1)
                    next_recurrent_inputs = prev_inputs[1:]
                    next_recurrent_inputs.append(calculated_class)

#                 print("\texpected:", expected)
                total_epoch_error = (1 - calculated_class[-1]) ** 2 
#                 print("sanity:", calculated_class)
                self.BackwardPropagation(calculated_class)
                self.UpdateAllWeights(value[:-1], prev_inputs)
                prev_inputs = next_recurrent_inputs
                
                j += 1
            print("\n\n\tError:", total_epoch_error)

In [148]:
test_features = normalized_data_with_classes
rnn = RecurrentNeuralNetwork(len(test_features[0]) - 1, 2, 3, len(test_features[0][-1]) + 1, 0.5)
rnn.Train(test_features, 20)

Building neural network with 15 features, 2 recurrent inputs, and 3 hidden layers.
	Building layer 0 with 57 neurons.
	Building layer 1 with 45 neurons.
	Building layer 2 with 33 neurons.
	Building layer 3 with 21 neurons.
Completed!
RNN starting training with 20 epochs and 2 recurrent inputs on 88 pieces of data.
Epoch: 1
posout: [1.2972372668780255e+204, 1.4085224174061907e+204, 1.2208593491798322e+204, 1.339210207262176e+204, 9.575061705040711e+203, 1.1282936089726316e+204, 1.2125852007753639e+204, 1.3414716934471215e+204, 1.2738207887441367e+204, 1.2388711524195376e+204, 1.3673879216645747e+204, 1.3526554016999603e+204, 1.293204152555941e+204, 1.2223916125079887e+204, 1.2916522975531377e+204, 9.809195841395378e+203, 1.3081856517569434e+204, 1.3467481918956527e+204, 1.2210277257559048e+204, 9.462196955151948e+203]
	 1 Index: 1048575 	OOB, 0 similarity
posout: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
	 2 Index: 0 	Similarity:  0.6387390740209391
posout: [0, 0, 0, 

KeyboardInterrupt: 

In [None]:
# this doesn't work, and I don't know why.

import copy
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()

class TensorFlowColaborativeNetwork:
    Session = None
    Graph = None
    RecurrentInputs = 0
    OutputLength = 0
    Layers = []
    LearningRate = 0.0
    TrainingData = []

    def __init__(self, training_data, recurrent_inputs, hidden_layers, output_length, learning_rate):
        print("Building neural network with", len(training_data[0]), "features,", recurrent_inputs, "recurrent inputs, and", hidden_layers, "hidden layers.")
        self.TrainingData = training_data
        self.Graph = tf.Graph().as_default()
        self.RecurrentInputs = recurrent_inputs
        self.LearningRate = learning_rate
        self.OutputLength = output_length
        
        features = len(training_data[0])
        y_intercept = features * (1 + recurrent_inputs)
        slope = (output_length - y_intercept)/hidden_layers
        prev_width = y_intercept
        
        song_features  = tf.placeholder(tf.int32, shape=(features))
        song_var = tf.Variable(tf.random_normal([len(training_data), features], stddev=0.5), name="Song-Features")
        embedding = tf.keras.layers.Flatten()(tf.nn.embedding_lookup(song_features, song_var))
        recurrency = [copy.deepcopy(embedding) for _ in range(recurrent_inputs + 1)]
        concatenated = tf.keras.layers.concatenate(recurrency)
        dropout = tf.keras.layers.Dropout(0.2)(concatenated)
        
        for i in range(hidden_layers + 1):
            next_len = math.ceil((i*slope + y_intercept)) if (i < hidden_layers) else output_length
            lname = "Layer-"+str(i)
            bname = "BatchNorm-"+str(i)
            dname = "Dropout-"+str(i)
            
            self.Layers.append(tf.keras.layers.Dense(next_len, activation = 'relu', name = lname)(dropout if len(self.Layers) == 0 else self.Layers[-1]))
            if (len(self.Layers) < 2):
                normalized = tf.keras.layers.BatchNormalization(name = bname)(self.Layers[0])
                layerdropout = tf.keras.layers.Dropout(0.2, name = dname)(normalized)
        output_layer = tf.keras.layers.Dense(1, kernel_initializer="lecun_uniform", name="Layer-Output")(self.Layers[-1])
        
        labels = tf.placeholder(tf.int32, shape=(1))
        loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=our_labels, logits=output_layer))
        opt = tf.train.AdamOptimizer(learning_rate = self.LearningRate)
        init = tf.global_variable_initialier()
        self.Session = tf.Session(config=none, graph=self.Graph)
        self.Session.run(init)
tflownet = TensorFlowColaborativeNetwork(features, 20, 20, len(features[0]), 0.5)

In [None]:
np.tanh(0.5)