# Introduccion
en este notebook se analizará, un dataset de 2000 canciones, y se espera crear una red neuronal capaz de poder el valor de popularidad de una canción

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from keras import models
from keras import layers
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
import category_encoders as ce

In [3]:
spotify = pd.read_csv('Spotify-2000.csv',
                      dtype={
                          'Top Genre':'category',
                          'Year':'category'},
                     decimal=",")
spotify.head()

Unnamed: 0,Index,Title,Artist,Top Genre,Year,Beats Per Minute (BPM),Energy,Danceability,Loudness (dB),Liveness,Valence,Length (Duration),Acousticness,Speechiness,Popularity
0,1,Sunrise,Norah Jones,adult standards,2004,157,30,53,-14,11,68,201.0,94,3,71
1,2,Black Night,Deep Purple,album rock,2000,135,79,50,-11,17,81,207.0,17,7,39
2,3,Clint Eastwood,Gorillaz,alternative hip hop,2001,168,69,66,-9,7,52,341.0,2,17,69
3,4,The Pretender,Foo Fighters,alternative metal,2007,173,96,43,-4,3,37,269.0,0,4,76
4,5,Waitin' On A Sunny Day,Bruce Springsteen,classic rock,2002,106,82,58,-5,10,87,256.0,1,3,59


In [4]:
spotify.shape

(1994, 15)

#### Limpieza de datos

Eliminimo las columnas Index, Title (y Artist?) y que no son Features como tal 

In [29]:
spotify_clean = spotify.drop(columns=['Index', 'Title', 'Artist'])
spotify_clean.mean()

Beats Per Minute (BPM)    120.215647
Energy                     59.679539
Danceability               53.238215
Loudness (dB)              -9.008526
Liveness                   19.012036
Valence                    49.408726
Length (Duration)         259.842122
Acousticness               28.858074
Speechiness                 4.994985
Popularity                 59.526580
dtype: float64

Normalizamos las variables

In [30]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

columnas_numericas = spotify_clean.select_dtypes(include=numerics).columns.to_list()
columnas_numericas 

['Beats Per Minute (BPM)',
 'Energy',
 'Danceability',
 'Loudness (dB)',
 'Liveness',
 'Valence',
 'Length (Duration)',
 'Acousticness',
 'Speechiness',
 'Popularity']

In [31]:
spotify_clean[columnas_numericas] = (spotify_clean[columnas_numericas] - spotify_clean[columnas_numericas].min()) / (spotify_clean[columnas_numericas].max() - spotify_clean[columnas_numericas].min())
spotify_clean.head()

Unnamed: 0,Top Genre,Year,Beats Per Minute (BPM),Energy,Danceability,Loudness (dB),Liveness,Valence,Length (Duration),Acousticness,Speechiness,Popularity
0,adult standards,2004,0.710059,0.278351,0.5,0.52,0.092784,0.677083,0.207154,0.949495,0.018868,0.674157
1,album rock,2000,0.579882,0.783505,0.465116,0.64,0.154639,0.8125,0.213373,0.171717,0.09434,0.314607
2,alternative hip hop,2001,0.775148,0.680412,0.651163,0.72,0.051546,0.510417,0.35225,0.020202,0.283019,0.651685
3,alternative metal,2007,0.804734,0.958763,0.383721,0.92,0.010309,0.354167,0.27763,0.0,0.037736,0.730337
4,classic rock,2002,0.408284,0.814433,0.55814,0.88,0.082474,0.875,0.264156,0.010101,0.018868,0.539326


Ahora convierto la columna de Generos, de variable categorica (Top Genre y Year) a numerica, usando binary encoding

In [32]:
encoder = ce.BinaryEncoder(cols=['Top Genre', 'Year'],return_df=True)
spotify_clean = encoder.fit_transform(spotify_clean) 
spotify_clean.head()

  elif pd.api.types.is_categorical(cols):


Unnamed: 0,Top Genre_0,Top Genre_1,Top Genre_2,Top Genre_3,Top Genre_4,Top Genre_5,Top Genre_6,Top Genre_7,Top Genre_8,Year_0,...,Beats Per Minute (BPM),Energy,Danceability,Loudness (dB),Liveness,Valence,Length (Duration),Acousticness,Speechiness,Popularity
0,0,0,0,0,0,0,0,0,1,0,...,0.710059,0.278351,0.5,0.52,0.092784,0.677083,0.207154,0.949495,0.018868,0.674157
1,0,0,0,0,0,0,0,1,0,0,...,0.579882,0.783505,0.465116,0.64,0.154639,0.8125,0.213373,0.171717,0.09434,0.314607
2,0,0,0,0,0,0,0,1,1,0,...,0.775148,0.680412,0.651163,0.72,0.051546,0.510417,0.35225,0.020202,0.283019,0.651685
3,0,0,0,0,0,0,1,0,0,0,...,0.804734,0.958763,0.383721,0.92,0.010309,0.354167,0.27763,0.0,0.037736,0.730337
4,0,0,0,0,0,0,1,0,1,0,...,0.408284,0.814433,0.55814,0.88,0.082474,0.875,0.264156,0.010101,0.018868,0.539326


Separo el dataset, por un lado 'X' va a tener de columnas todos los posibles features que se puede user, 'y' va a ser nuestra columna target, el valor que queremos predecir

In [33]:
X, y = spotify_clean.iloc[:,:-1], spotify.iloc[:,-1]

In [34]:
n_features = X.columns.size

In [35]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.2, random_state=123)

In [36]:
y_train[y_train >= 50] = 1
y_train[y_train < 50] = 0

In [37]:
X_train = np.asarray(X_train).astype(np.float32)
y_train = np.asarray(y_train).astype(np.float32)

In [38]:
def generadorRed():
    red = models.Sequential()

    red.add(layers.Dense(units=32, activation='relu', input_shape=(n_features, )))

    red.add(layers.Dense(units=64, activation='relu'))
    red.add(layers.Dense(units=64, activation='relu'))

    red.add(layers.Dense(units=1, activation='sigmoid'))

    # Compile neural network
    red.compile(loss='mean_absolute_error',  # Mean Error
                optimizer='rmsprop',  # Root Mean Square Propagation
                metrics=['accuracy'])  # Accuracy performance metric
    return red

In [39]:
red_neuronal = KerasRegressor(build_fn=generadorRed,
                              epochs=100,
                              batch_size=X_train.size)

In [40]:
score = cross_val_score(red_neuronal, X_train, y_train, cv=5)
score.mean()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

-3.100590984104201e-05