In [15]:
from matplotlib import pyplot as plt
import numpy as np
import math,random
from tqdm import tqdm
from random import shuffle
import pprint

import pandas as pd
from src.features.build_features import word_count, sentence_avg_word_length, normalize
from src.data.make_dataset import create_dataset
from src.data.util import unzip_file

def step_function(x):
    return 1 if x >= 0 else 0


def perceptron_output(weights, bias, x):
    '''Returns 1 if the perceptrion 'fires', 0 if not '''
    return step_function(np.dot(weights, x) + bias)

def sigmoid(t):
    return 1 / (1 + math.exp(-t))

def neuron_output(weights, inputs):
    return sigmoid(np.dot(weights, inputs))

def predict(input, network):
    return feed_forward(network, input)[-1]

def feed_forward(neural_network, input_vector):
    """takes in a neural network (represented as a list of lists of lists of weights)
    and returns the output from forward-propagating the input"""

    outputs = []

    for layer in neural_network:

        input_with_bias = input_vector + [1]             # add a bias input
        output = [neuron_output(neuron, input_with_bias) # compute the output
                  for neuron in layer]                   # for this layer
        outputs.append(output)                           # and remember it

        # the input to the next layer is the output of this one
        input_vector = output

    return outputs
    
def backpropagate(network, input_vector, targets):
    hidden_outputs, outputs = feed_forward(network, input_vector)
    
    print(hidden_outputs)
    print(outputs)
    
    # the output * (1 - output) is from the derivative of sigmoid
    output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, targets)]
        # adjust weights for output layer, one neuron at a time
    for i, output_neuron in enumerate(network[-1]):
    # focus on the ith output layer neuron
        for j, hidden_output in enumerate(hidden_outputs + [1]):
            # adjust the jth weight based on both
            # this neuron's delta and its jth input
            output_neuron[j] -= output_deltas[i] * hidden_output
    # back-propagate errors to hidden layer
    hidden_deltas = [hidden_output * (1 - hidden_output) * np.dot(output_deltas, [n[i] for n in output_layer])for i, hidden_output in enumerate(hidden_outputs)]
        
    # adjust weights for hidden layer, one neuron at a time
    for i, hidden_neuron in enumerate(network[0]):
        for j, input in enumerate(input_vector + [1]):
            hidden_neuron[j] -= hidden_deltas[i] * input

In [12]:
# Importing data
df, testdata_df = create_dataset()

print(len(df))
print(len(testdata_df))


Creating missing paths...
Skipping unzip...
Skipping data filtering...
12000
750


In [20]:
# targets
series = df['genre'].value_counts()
genre_labels = series.keys() # getting genre labels
targets = [[1 if i == j else 0 for i in genre_labels] for j in df['genre']]



# features
df = sentence_avg_word_length(df,"avg_word_len", 'lyrics')
df = word_count(df,"word_count", 'lyrics')
df = normalize(df, 'word_count_nm', 'word_count')

inputs = [[f, wl] for f, wl in zip(df['avg_word_len'],df["word_count_nm"])]

#shuffle(inputs)
#inputs = inputs[0:500]

In [6]:
########### Træning af model ###########

###########
# Opsætning af Neural Network
###########
random.seed(0) # to get repeatable results
input_size = 2 # antal af input noder (samme antal som feautures)
num_hidden = 1 # antal af hidden noder
output_size = 3 # antal af output noder (i vores tilfælde, genres)

# each hidden neuron has one weight per input, plus a bias weight
hidden_layer = [[random.random() for __ in range(input_size + 1)] for __ in range(num_hidden)]

# each output neuron has one weight per hidden neuron, plus a bias weight
output_layer = [[random.random() for __ in range(num_hidden + 1)] for __ in range(output_size)]

# the network starts out with random weights
network = [hidden_layer, output_layer]

# Iteration of training
for __ in  tqdm(range(1000)):
    for input_vector, target_vector in zip(inputs, targets):
        backpropagate(network, input_vector, target_vector)

  0%|          | 0/1000 [00:00<?, ?it/s]

[1.0, 1.0, 1.0, 1.0, 1.0]
[0.9650855891197063, 0.9904809373346448, 0.9608615493284269, 0.9864464062430583, 0.9598543479815351, 0.9427789460615875, 0.9614276290510481, 0.9242539085735637, 0.98486920784435, 0.9468375196915892, 0.9013138374686338, 0.9946296164527801]





IndexError: list index out of range

In [9]:
# denne blok er forbeholdt for egentlige predicts fra trænet model

network

In [11]:
 pprint.pprint([[[0.8444218515250481, 0.7579544029403025], [0.420571580830845, 0.25891675029296335], [0.5112747213686085, 0.4049341374504143], [0.7837985890347726, 0.30331272607892745], [0.4765969541523558, 0.5833820394550312]], [[0.9081128851953352, 0.5046868558173903, 0.28183784439970383, 0.7558042041572239, 0.6183689966753316, 0.25050634136244054], [0.9097462559682401, 0.9827854760376531, 0.8102172359965896, 0.9021659504395827, 0.3101475693193326, 0.7298317482601286], [0.8988382879679935, 0.6839839319154413, 0.47214271545271336, 0.1007012080683658, 0.4341718354537837, 0.6108869734438016], [0.9130110532378982, 0.9666063677707588, 0.47700977655271704, 0.8653099277716401, 0.2604923103919594, 0.8050278270130223], [0.5486993038355893, 0.014041700164018955, 0.7197046864039541, 0.39882354222426875, 0.824844977148233, 0.6681532012318508], [0.0011428193144282783, 0.49357786646532464, 0.8676027754927809, 0.24391087688713198, 0.32520436274739006, 0.8704712321086546], [0.19106709150239054, 0.5675107406206719, 0.23861592861522019, 0.9675402502901433, 0.80317946927987, 0.44796957143557037], [0.08044581855253541, 0.32005460467254576, 0.5079406425205739, 0.9328338242269067, 0.10905784593110368, 0.5512672460905512], [0.7065614098668896, 0.5474409113284238, 0.814466863291336, 0.540283606970324, 0.9638385459738009, 0.603185627961383], [0.5876170641754364, 0.4449890262755162, 0.5962868615831063, 0.38490114597266045, 0.5756510141648885, 0.290329502402758], [0.18939132855435614, 0.1867295282555551, 0.6127731798686067, 0.6566593889896288, 0.47653099200938076, 0.08982436119559367], [0.7576039219664368, 0.8767703708227748, 0.9233810159462806, 0.8424602231401824, 0.898173121357879, 0.9230824398201768]]])

In [26]:
# 'Rock', 'Pop', 'Hip-Hop', 'Not Available', 'Metal', 'Country', 'Jazz', 'Electronic', 'Other', 'R&B', 'Indie', 'Folk'
res = predict([50], network)
print(res)


[0.045110360852890705, 0.02308738715274462, 0.022737105198688566, 0.02550967069976677, 0.010197567411125874, 0.9038265500054475, 0.00012913281374445947, 0.00012913277785688227, 0.06411270536687758, 0.00012913279439198867, 0.00012913276791952792, 0.00012913318395252365]
