In [1]:
from matplotlib import pyplot as plt
import numpy as np
import math,random
from tqdm import tqdm
from random import shuffle
import pprint

import pandas as pd
from src.features.build_features import word_count_series
from src.features.text_blob_analysis import analyze_sentiment
from src.data.make_dataset import create_dataset
from src.data.util import unzip_file

def step_function(x):
    return 1 if x >= 0 else 0


def perceptron_output(weights, bias, x):
    '''Returns 1 if the perceptrion 'fires', 0 if not '''
    return step_function(np.dot(weights, x) + bias)

def sigmoid(t):
    return 1 / (1 + math.exp(-t))

def neuron_output(weights, inputs):
    return sigmoid(np.dot(weights, inputs))

def predict(input, network):
    return feed_forward(network, input)[-1]

def feed_forward(neural_network, input_vector):
    """takes in a neural network (represented as a list of lists of lists of weights)
    and returns the output from forward-propagating the input"""

    outputs = []

    for layer in neural_network:

        input_with_bias = input_vector + [1]             # add a bias input
        output = [neuron_output(neuron, input_with_bias) # compute the output
                  for neuron in layer]                   # for this layer
        outputs.append(output)                           # and remember it

        # the input to the next layer is the output of this one
        input_vector = output

    return outputs
    
def backpropagate(network, input_vector, targets):
    hidden_outputs, outputs = feed_forward(network, input_vector)
    
  
    # the output * (1 - output) is from the derivative of sigmoid
    output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, targets)]
        # adjust weights for output layer, one neuron at a time
    for i, output_neuron in enumerate(network[-1]):
    # focus on the ith output layer neuron
        for j, hidden_output in enumerate(hidden_outputs + [1]):
            # adjust the jth weight based on both
            # this neuron's delta and its jth input
            output_neuron[j] -= output_deltas[i] * hidden_output
    # back-propagate errors to hidden layer
    hidden_deltas = [hidden_output * (1 - hidden_output) * np.dot(output_deltas, [n[i] for n in output_layer])for i, hidden_output in enumerate(hidden_outputs)]
        
    # adjust weights for hidden layer, one neuron at a time
    for i, hidden_neuron in enumerate(network[0]):
        for j, input in enumerate(input_vector + [1]):
            hidden_neuron[j] -= hidden_deltas[i] * input

In [2]:
# Importing data
training_df, df = create_dataset()

print(len(df))
#print(len(testdata_df))


Creating missing paths...
Skipping unzip...
Skipping data filtering...
750


In [12]:
# targets
series = df['genre'].value_counts()
genre_labels = series.keys() # getting genre labels
targets = [[1 if i == j else 0 for i in genre_labels] for j in df['genre']]
# features
features_series = word_count_series(df['lyrics'])
df = analyze_sentiment(df)
polarity = df['polarity']
subjectivity = df['subjectivity']

inputs = [[f, p, s]  for f,p,s in zip(features_series, polarity, subjectivity)]
print(inputs[0:10])

#shuffle(inputs)
#inputs = inputs[0:500]

Analyzing sentiment...: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [00:00<00:00, 771.16it/s]
Analyzing sentiment...: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [00:00<00:00, 369563.91it/s]
Analyzing sentiment...: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [00:00<00:00, 375878.60it/s]


[[135, -0.05, 0.25227272727272726], [367, 0.2929487179487179, 0.6051282051282051], [174, 0.2529004329004329, 0.4183982683982685], [162, -0.04561965811965812, 0.4529914529914531], [264, -0.1270408163265306, 0.464030612244898], [219, 0.3233918128654971, 0.6573099415204676], [287, -0.07853535353535353, 0.311489898989899], [175, -0.28333333333333327, 0.6733333333333333], [301, -0.020205026455026462, 0.5448412698412698], [223, 0.35684523809523805, 0.544047619047619]]


In [19]:
########### Træning af model ###########

###########
# Opsætning af Neural Network
###########
random.seed(0) # to get repeatable results
input_size = 3 # antal af input noder (samme antal som feautures)
num_hidden = 1 # antal af hidden noder
output_size = 3 # antal af output noder (i vores tilfælde, genres)

# each hidden neuron has one weight per input, plus a bias weight
hidden_layer = [[random.random() for __ in range(input_size + 1)] for __ in range(num_hidden)]

# each output neuron has one weight per hidden neuron, plus a bias weight
output_layer = [[random.random() for __ in range(num_hidden + 1)] for __ in range(output_size)]

# the network starts out with random weights
network = [hidden_layer, output_layer]

# Iteration of training
num = 0
print(network)
for __ in  tqdm(range(10000)):
    num = num +1
    if num == 1000 or num == 2000 or num == 4000 or num == 6000 or num == 8000:
        print(network)
    for input_vector, target_vector in zip(inputs, targets):
        backpropagate(network, input_vector, target_vector)
print(network)

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335]], [[0.5112747213686085, 0.4049341374504143], [0.7837985890347726, 0.30331272607892745], [0.4765969541523558, 0.5833820394550312]]]


 10%|█████████▋                                                                                       | 998/10000 [00:37<05:41, 26.34it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335]], [[1.6984514608495225, 1.5921108769317691], [-1.4146082091810208, -1.8950940721377507], [-1.9082127145963963, -1.801427629294161]]]


 20%|███████████████████▏                                                                            | 1997/10000 [01:15<05:08, 25.93it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335]], [[1.6984514608493004, 1.5921108769319912], [-1.4146082091805767, -1.8950940721381948], [-1.9082127145961743, -1.801427629294383]]]


 40%|██████████████████████████████████████▍                                                         | 3999/10000 [02:32<03:50, 25.99it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335]], [[1.6984514608488563, 1.5921108769324352], [-1.4146082091796885, -1.895094072139083], [-1.9082127145957302, -1.801427629294827]]]


 60%|█████████████████████████████████████████████████████████▌                                      | 5997/10000 [03:48<02:28, 26.96it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335]], [[1.6984514608484123, 1.5921108769328793], [-1.4146082091788004, -1.8950940721399712], [-1.908212714595286, -1.8014276292952711]]]


 80%|████████████████████████████████████████████████████████████████████████████▊                   | 7999/10000 [05:03<01:15, 26.50it/s]

[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335]], [[1.6984514608479682, 1.5921108769333234], [-1.4146082091779122, -1.8950940721408593], [-1.908212714594842, -1.8014276292957152]]]


100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [06:18<00:00, 26.39it/s]


[[[0.8444218515250481, 0.7579544029403025, 0.420571580830845, 0.25891675029296335]], [[1.6984514608475239, 1.5921108769337677], [-1.4146082091770236, -1.895094072141748], [-1.9082127145943977, -1.8014276292961595]]]


In [16]:
# 'Rock', 'Pop', 'Hip-Hop', 'Not Available', 'Metal', 'Country', 'Jazz', 'Electronic', 'Other', 'R&B', 'Indie', 'Folk'
print(inputs[-10:])

print('##########################')
print(genre_labels)
res = predict([1, 0], network)
print(res)


[[-0.09597883597883598, 0.28423280423280417], [-0.009999999999999988, 0.3516666666666667], [0.06, 0.62], [0.15690359477124188, 0.3385620915032678], [0.07628855519480511, 0.6662822420634921], [0.19627450980392155, 0.6133333333333333], [-0.1220959595959596, 0.33181818181818185], [0.047708333333333346, 0.4533333333333333], [0.2197425381635908, 0.5253645477329689], [0.03166666666666666, 0.5516666666666666]]
##########################
Index(['Rock', 'Hip-Hop', 'Pop'], dtype='object')
[0.899601214060026, 0.015641430721431964, 0.14530625652855883]
