In [2]:
from matplotlib import pyplot as plt
import numpy as np
import math,random
from tqdm import tqdm
from random import shuffle
import pprint

import pandas as pd
from src.features.build_features import word_count_series
from src.features.text_blob_analysis import analyze_lyrics
from src.data.make_dataset import create_dataset
from src.data.util import unzip_file

def step_function(x):
    return 1 if x >= 0 else 0


def perceptron_output(weights, bias, x):
    '''Returns 1 if the perceptrion 'fires', 0 if not '''
    return step_function(np.dot(weights, x) + bias)

def sigmoid(t):
    return 1 / (1 + math.exp(-t))

def neuron_output(weights, inputs):
    return sigmoid(np.dot(weights, inputs))

def predict(input, network):
    return feed_forward(network, input)[-1]

def feed_forward(neural_network, input_vector):
    """takes in a neural network (represented as a list of lists of lists of weights)
    and returns the output from forward-propagating the input"""

    outputs = []

    for layer in neural_network:

        input_with_bias = input_vector + [1]             # add a bias input
        output = [neuron_output(neuron, input_with_bias) # compute the output
                  for neuron in layer]                   # for this layer
        outputs.append(output)                           # and remember it

        # the input to the next layer is the output of this one
        input_vector = output

    return outputs
    
def backpropagate(network, input_vector, targets):
    hidden_outputs, outputs = feed_forward(network, input_vector)
    
  
    # the output * (1 - output) is from the derivative of sigmoid
    output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, targets)]
        # adjust weights for output layer, one neuron at a time
    for i, output_neuron in enumerate(network[-1]):
    # focus on the ith output layer neuron
        for j, hidden_output in enumerate(hidden_outputs + [1]):
            # adjust the jth weight based on both
            # this neuron's delta and its jth input
            output_neuron[j] -= output_deltas[i] * hidden_output
    # back-propagate errors to hidden layer
    hidden_deltas = [hidden_output * (1 - hidden_output) * np.dot(output_deltas, [n[i] for n in output_layer])for i, hidden_output in enumerate(hidden_outputs)]
        
    # adjust weights for hidden layer, one neuron at a time
    for i, hidden_neuron in enumerate(network[0]):
        for j, input in enumerate(input_vector + [1]):
            hidden_neuron[j] -= hidden_deltas[i] * input

In [3]:
# Importing data
what, df = create_dataset()

print(len(df))
#print(len(testdata_df))


Creating missing paths...
Skipping unzip...
Skipping data filtering...
750


In [4]:
# targets
series = df['genre'].value_counts()
genre_labels = series.keys() # getting genre labels
targets = [[1 if i == j else 0 for i in genre_labels] for j in df['genre']]
# features
features_series = word_count_series(df['lyrics'])
sentiments = analyze_lyrics(df['lyrics'])


inputs = [[feature,s[0], s[1]] for feature, s in zip(features_series, sentiments)]
print(inputs[0:20])
#shuffle(inputs)
#inputs = inputs[0:500]

Analyzing sentiment...: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [00:01<00:00, 746.71it/s]


[[135, -0.05, 0.25227272727272726], [367, 0.2929487179487179, 0.6051282051282051], [174, 0.2529004329004329, 0.4183982683982685], [162, -0.04561965811965812, 0.4529914529914531], [264, -0.1270408163265306, 0.464030612244898], [219, 0.3233918128654971, 0.6573099415204676], [287, -0.07853535353535353, 0.311489898989899], [175, -0.28333333333333327, 0.6733333333333333], [301, -0.020205026455026462, 0.5448412698412698], [223, 0.35684523809523805, 0.544047619047619], [321, 0.19166666666666668, 0.7666666666666666], [322, 0.030833333333333334, 0.7305555555555558], [215, 0.28055555555555556, 0.5055555555555556], [210, 0.13045314900153607, 0.45410906298003073], [253, 0.22566844919786108, 0.5039215686274511], [222, 0.42063492063492064, 0.6126984126984126], [215, 0.28055555555555556, 0.5055555555555556], [226, -0.569234360410831, 0.9330532212885154], [196, 0.04718253968253968, 0.35635487528344684], [190, 0.13043478260869565, 0.3152173913043478]]


In [5]:
########### Træning af model ###########

###########
# Opsætning af Neural Network
###########
random.seed(0) # to get repeatable results
input_size = 3 # antal af input noder (samme antal som feautures)
num_hidden = 3 # antal af hidden noder
output_size = 3 # antal af output noder (i vores tilfælde, genres)

# each hidden neuron has one weight per input, plus a bias weight
hidden_layer = [[random.random() for __ in range(input_size + 1)] for __ in range(num_hidden)]

# each output neuron has one weight per hidden neuron, plus a bias weight
output_layer = [[random.random() for __ in range(num_hidden + 1)] for __ in range(output_size)]

# the network starts out with random weights
network = [hidden_layer, output_layer]

# Iteration of training
num = 0
print(network)
for __ in  tqdm(range(50000)):
    num = num +1
    if num == 10000 or num == 20000 or num == 40000 or num == 45000:
        print(network)
    for input_vector, target_vector in zip(inputs, targets):
       
        backpropagate(network, input_vector, target_vector)
print(network)

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335], [0.5112747213686085, 0.4049341374504143, 0.7837985890347726, 0.30331272607892745], [0.4765969541523558, 0.5833820394550312, 0.9081128851953352, 0.5046868558173903]], [[0.28183784439970383, 0.7558042041572239, 0.6183689966753316, 0.25050634136244054], [0.9097462559682401, 0.9827854760376531, 0.8102172359965896, 0.9021659504395827], [0.3101475693193326, 0.7298317482601286, 0.8988382879679935, 0.6839839319154413]]]


 20%|██████████████████████████▉                                                                                                            | 9996/50000 [03:56<15:41, 42.49it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335], [0.511274698701284, 0.4049341374504143, 0.7837985890347726, 0.30331272565930606], [0.47659680292115125, 0.5833820394550312, 0.9081128851953352, 0.504686853016576]], [[-1.1206707106101983, -0.6467043510924753, -0.7841395595380324, -1.152002213643022], [0.9313519113066225, 1.004391131393791, 0.8318228913593951, 0.9237716057801838], [-1.366861203114904, -0.9471770237211868, -0.7781704820595584, -0.9930248405187927]]]


 40%|█████████████████████████████████████████████████████▌                                                                                | 19999/50000 [07:54<11:48, 42.35it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335], [0.51127467603164, 0.4049341374504143, 0.7837985890347726, 0.30331272523964176], [0.4765966516749135, 0.5833820394550312, 0.9081128851953352, 0.5046868502154833]], [[-1.120670710250486, -0.6467043509725712, -0.7841395603818019, -1.1520022132788688], [0.9313519112955203, 1.0043911314004523, 0.8318228913727178, 0.923771605771302], [-1.3668612038298877, -0.9471770239831995, -0.7781704803675785, -0.9930248412337763]]]


 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▏                          | 39996/50000 [15:43<03:51, 43.24it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335], [0.5112746306923521, 0.4049341374504143, 0.7837985890347726, 0.30331272440031315], [0.47659634917378657, 0.5833820394550312, 0.9081128851953352, 0.5046868446132979]], [[-1.1206707095310615, -0.646704350732763, -0.7841395620693409, -1.1520022125505625], [0.9313519112733158, 1.004391131413775, 0.8318228913993632, 0.9237716057535384], [-1.366861205259855, -0.9471770245072247, -0.7781704769836187, -0.9930248426637436]]]


 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌             | 44998/50000 [17:41<01:56, 42.89it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335], [0.5112746193575302, 0.4049341374504143, 0.7837985890347726, 0.303312724190481], [0.47659627354650436, 0.5833820394550312, 0.9081128851953352, 0.5046868432127516]], [[-1.1206707093512054, -0.646704350672811, -0.7841395624912256, -1.1520022123684859], [0.9313519112677647, 1.0043911314171057, 0.8318228914060245, 0.9237716057490976], [-1.3668612056173468, -0.9471770246382311, -0.7781704761376288, -0.9930248430212354]]]


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50000/50000 [19:40<00:00, 42.36it/s]


In [20]:
# 'Rock', 'Pop', 'Hip-Hop', 'Not Available', 'Metal', 'Country', 'Jazz', 'Electronic', 'Other', 'R&B', 'Indie', 'Folk'
print(inputs[-10:])

print('##########################')
print(genre_labels)
res = predict([1, -1,1], network)
print(res)


[[292, -0.09597883597883598, 0.28423280423280417], [102, -0.009999999999999988, 0.3516666666666667], [103, 0.06, 0.62], [340, 0.15690359477124188, 0.3385620915032678], [217, 0.07628855519480511, 0.6662822420634921], [153, 0.19627450980392155, 0.6133333333333333], [190, -0.1220959595959596, 0.33181818181818185], [199, 0.047708333333333346, 0.4533333333333333], [241, 0.2197425381635908, 0.5253645477329689], [121, 0.03166666666666666, 0.5516666666666666]]
##########################
Index(['Hip-Hop', 'Rock', 'Pop'], dtype='object')
[0.046076819280455414, 0.9518826118018368, 0.03678346163343867]
