# **A. - B. ADELANTE Y ATRAS**

In [None]:
#Juan Sebastian Camargo Sanchez
#Juan Pablo Daza Pereira

import numpy as np
import sys
#------------------------------
def initialize_parameters(layers):
    red = {}
    for i in range(0,len(layers)):
      if i>0:
        auxW=np.random.randn(layers[i],layers[i-1])*0.01
        auxB=np.zeros((layers[i],1))
        red["W"+str(i)]=auxW
        red["b"+str(i)]=auxB
    return red
#------------------------------
def forward_step(A_1, W, b):
  zeta=W@A_1 + b
  zeta = zeta.astype(np.float128)
  A=1/(1+np.exp(-zeta))
  return A,np.array([A_1,W,A],dtype=object)
#------------------------------
def forward(X, parameters):
  A_1=X
  caches=[]
  for i in range(1,int(len(parameters)/2)+1):
    A,cache=forward_step(A_1,parameters["W"+str(i)],parameters["b"+str(i)])
    A_1 = cache[2]
    caches.append(cache)
  return A,caches
#------------------------------
def predict(X, parameters):
  L = len(parameters) // 2
  A = X
  for l in range(1, L + 1):
    A_prev = A
    W = parameters[f'W{l}']
    b = parameters[f'b{l}']
    Z = np.dot(W, A_prev) + b
    Z=np.float128(Z)
    A = 1 / (1 + np.exp(-Z))
  AL = A
  return AL
#------------------------------
def accuracy (X, Y, parameters):
  AL = predict(X, parameters)
  #print(AL)
  predictions = (AL > 0.999).astype(int)
  accuracy = np.mean(predictions == Y)
  return accuracy
#------------------------------
def cost(Yp, Y):
    Yp = np.maximum(Yp, sys.float_info.min)
    m = Y.shape[1]
    cost = -1/m * np.sum(Y * np.log(Yp + sys.float_info.epsilon) + (1 - Y) * np.log(1 - Yp + sys.float_info.epsilon))
    cost = np.squeeze(cost)
    return cost

#------------------------------
def backward_step(dA, cache):
    A_1, A = cache
    dZ = dA * A * (1 - A)
    m = A_1.shape[1]

    dA_1 = np.dot(W.T, dZ)
    dW = (1/m) * np.dot(dZ, A_1.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)

    return dA_1, dW, db
#------------------------------
def backward(AL, Y, caches):
    grads = {}
    L = len(caches)
    epsilon = 1e-12
    dAL = -(np.divide(Y, AL + epsilon) - np.divide(1 - Y, 1 - AL + epsilon))
    current_cache = caches[L - 1]
    A_prev, W, A = current_cache
    m = A_prev.shape[1]
    dZ = dAL * A * (1 - A)
    grads["dA" + str(L - 1)] = np.dot(W.T, dZ)
    grads["dW" + str(L)] = (1 / m) * np.dot(dZ, A_prev.T)
    grads["db" + str(L)] = (1 / m) * np.sum(dZ, axis=1, keepdims=True)

    for l in reversed(range(L - 1)):
        current_cache = caches[l]
        A_prev, W, A = current_cache

        dZ = grads["dA" + str(l + 1)] * A * (1 - A)
        grads["dA" + str(l)] = np.dot(W.T, dZ)
        grads["dW" + str(l + 1)] = (1 / m) * np.dot(dZ, A_prev.T)
        grads["db" + str(l + 1)] = (1 / m) * np.sum(dZ, axis=1, keepdims=True)

    return grads
#------------------------------
def update_parameters(parameters,grads, learning_rate):
    L= len(parameters)//2
    for l in range(L):
        parameters['W'+str(l+1)]= parameters['W'+str(l+1)]-learning_rate*grads['dW'+str(l+1)]
        parameters['b'+str(l+1)]= parameters['b'+str(l+1)]-learning_rate*grads['db'+str(l+1)]
    return parameters
#------------------------------
def train(X, Y, layers, learning_rate, iterations, print_cost):
    parameters = initialize_parameters(layers)
    for i in range(0, iterations):
        AL, caches = forward(X, parameters)
        current_cost = cost(AL, Y)
        grads = backward(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)

        if print_cost and i % print_cost == 0:
            print(f"Cost after iteration {i}: {current_cost}")
    print(f"Cost after iteration {iterations}: {current_cost}")
    return parameters

# **Resultados**

Ingeniería de selección de características

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# Load your dataset (replace 'your_dataset.csv' with your actual file name)
df = pd.read_csv('/content/sample_data/spotify_songs.csv')

# Separate features and target variable
X = df.drop(['track_id', 'track_name', 'track_artist', 'track_popularity','track_album_id', 'track_album_name', 'track_album_release_date', 'playlist_name', 'playlist_id', 'playlist_genre', 'playlist_subgenre'], axis=1)  # Exclude the target variable column
y = df['playlist_genre']  # Target variable column

# Create a random forest classifier
clf = RandomForestClassifier(random_state=42)

# Fit the classifier to your data
clf.fit(X, y)

# Get feature importances from the trained model
feature_importances = clf.feature_importances_

# Create a DataFrame to display feature importances
feature_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importances
})

# Sort the DataFrame by importance in descending order
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Print the feature importance DataFrame
print("Feature Importance:")
print(feature_importance_df)



Red de 3 entradas con las mejores características con 3 salidas

In [None]:

archivo_csv = "/content/sample_data/spotify_songs.csv"
datos = pd.read_csv(archivo_csv)

#Filtrar el data frame sobre los parametros selectos para el entrenamiento de la red
dataFrame = datos[['danceability','speechiness','tempo']].to_numpy()
dataFrameGenre = datos['playlist_genre'].to_numpy()


#Se divide el data frame por genero

rockInput=dataFrame[11255-2:15216-1]
popInput=dataFrame[2-2:4408-1]
randbInput=dataFrame[21361-2:25706-1]


allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,popInput,axis=0)
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T


genres = dataFrameGenre[11255-2:15216-1]
genres = np.append(genres,dataFrameGenre[2-2:4408-1],axis=0)
genres = np.append(genres,dataFrameGenre[21361-2:25706-1],axis=0)

Y=np.zeros((3,len(genres)))
Y[0]=np.where(genres[0:]== 'rock', 1, 0)
Y[1]=np.where(genres[0:]== 'pop', 1, 0)
Y[2]=np.where(genres[0:]== 'r&b', 1, 0)


print(np.shape(allGenresInput))
print(np.shape(Y))
parameters1=train(allGenresInput, Y, [3,2,3], 0.1, 5000, 5000)

Test de la Red

In [None]:
#Se divide el data frame por genero

rockInput=dataFrame[15216-1:16205-1]
popInput=dataFrame[4408-1:5508-1]
randbInput=dataFrame[25706-1:26791-1]




allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,popInput,axis=0)
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T

test=allGenresInput


genres = dataFrameGenre[15216-1:16205-1]
genres = np.append(genres,dataFrameGenre[4408-1:5508-1],axis=0)
genres = np.append(genres,dataFrameGenre[25706-1:26791-1],axis=0)

Y_true=np.zeros((3,len(genres)))
Y_true[0]=np.where(genres[0:]== 'rock', 1, 0)
Y_true[1]=np.where(genres[0:]== 'pop', 1, 0)
Y_true[2]=np.where(genres[0:]== 'r&b', 1, 0)

print("Red 1 de [3,2,3] con 5 mil iteraciones:")
print(accuracy(test,Y_true,parameters1))

Red de 10 entradas con las mejores características con 1 salida

In [None]:
archivo_csv = "/content/sample_data/spotify_songs.csv"
datos = pd.read_csv(archivo_csv)

#Filtrar el data frame sobre los parametros selectos para el entrenamiento de la red
dataFrame = datos[['danceability', 'energy','key','loudness','mode','speechiness','acousticness','instrumentalness','tempo','duration_ms']].to_numpy()
dataFrameGenre = datos['playlist_genre'].to_numpy()


#Se divide el data frame por genero

rockInput=dataFrame[11255-2:15216-1]
randbInput=dataFrame[21361-2:25706-1]


allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T


genres = dataFrameGenre[11255-2:15216-1]
genres = np.append(genres,dataFrameGenre[21361-2:25706-1],axis=0)

Y=np.zeros((1,len(genres)))
Y[0]=np.where(genres[0:]== 'rock', 1, 0)



print(np.shape(allGenresInput))
print(np.shape(Y))
parameters1=train(allGenresInput, Y, [10,5,3,1], 0.1, 10000, 5000)

Test de la Red especializada con 10 entradas y 1 salida


In [None]:
#Se divide el data frame por genero
rockInput=dataFrame[15216-1:16205-1]
randbInput=dataFrame[25706-1:26791-1]

allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T

test=allGenresInput

genres = dataFrameGenre[15216-1:16205-1]
genres = np.append(genres,dataFrameGenre[25706-1:26791-1],axis=0)

Y_true=np.zeros((3,len(genres)))
Y_true[0]=np.where(genres[0:]== 'rock', 1, 0)


print("Red 1 de [3,5,3,1] con 5 mil iteraciones:")
print(accuracy(test,Y_true,parameters1))

Red especializada en el género de rock con las tres mejores entradas y una salida

In [None]:
archivo_csv = "/content/sample_data/spotify_songs.csv"
datos = pd.read_csv(archivo_csv)

#Filtrar el data frame sobre los parametros selectos para el entrenamiento de la red
dataFrame = datos[['danceability','speechiness','tempo']].to_numpy()
dataFrameGenre = datos['playlist_genre'].to_numpy()


#Se divide el data frame por genero

rockInput=dataFrame[11255-2:15216-1]
randbInput=dataFrame[21361-2:25706-1]


allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T


genres = dataFrameGenre[11255-2:15216-1]
genres = np.append(genres,dataFrameGenre[21361-2:25706-1],axis=0)

Y=np.zeros((1,len(genres)))
Y[0]=np.where(genres[0:]== 'rock', 1, 0)



print(np.shape(allGenresInput))
print(np.shape(Y))
parameters1=train(allGenresInput, Y, [3,5,3,1], 0.1, 30000, 5000)

Test de la Red especializada con las mejores características





In [None]:
#Se divide el data frame por genero

rockInput=dataFrame[15216-1:16205-1]
randbInput=dataFrame[25706-1:26791-1]

allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T

test=allGenresInput

genres = dataFrameGenre[15216-1:16205-1]
genres = np.append(genres,dataFrameGenre[25706-1:26791-1],axis=0)

Y_true=np.zeros((3,len(genres)))
Y_true[0]=np.where(genres[0:]== 'rock', 1, 0)


print("Red 1 de [3,5,3,1] con 5 mil iteraciones:")
print(accuracy(test,Y_true,parameters1))

Red especializada en el género de rock con las mejores entradas seleccionadas por un experto y una salida

In [None]:
archivo_csv = "/content/sample_data/spotify_songs.csv"
datos = pd.read_csv(archivo_csv)

#Filtrar el data frame sobre los parametros selectos para el entrenamiento de la red
dataFrame = datos[['danceability','loudness','tempo','acousticness']].to_numpy()
dataFrameGenre = datos['playlist_genre'].to_numpy()


#Se divide el data frame por genero

rockInput=dataFrame[11255-2:15216-1]
randbInput=dataFrame[21361-2:25706-1]


allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T


genres = dataFrameGenre[11255-2:15216-1]
genres = np.append(genres,dataFrameGenre[21361-2:25706-1],axis=0)

Y=np.zeros((1,len(genres)))
Y[0]=np.where(genres[0:]== 'rock', 1, 0)

Test

In [None]:
#Se divide el data frame por genero

rockInput=dataFrame[15216-1:16205-1]
randbInput=dataFrame[25706-1:26791-1]

allGenresInput=rockInput
allGenresInput = np.append(allGenresInput,randbInput,axis=0)

allGenresInput = allGenresInput.T

test=allGenresInput

genres = dataFrameGenre[15216-1:16205-1]
genres = np.append(genres,dataFrameGenre[25706-1:26791-1],axis=0)

Y_true=np.zeros((3,len(genres)))
Y_true[0]=np.where(genres[0:]== 'rock', 1, 0)


print("Red 1 de [3,5,3,1] con 5 mil iteraciones:")
print(accuracy(test,Y_true,parameters1))