In [1]:
import pandas as pd
from typing import List, Tuple
from scipy.spatial.distance import euclidean


In [2]:
def get_sorted_nearest_cluster_from_atypical_point(atypical_point: pd.Series, cluster_centers: pd.DataFrame) -> List[Tuple[int, float]]:
    distances = []
    for index, row in cluster_centers.iterrows():
        dist = euclidean(atypical_point, row.drop('cluster'))
        distances.append((row['cluster'], dist))
    
    distances.sort(key=lambda x: x[1])
    return distances


def calculate_clusters_centers_distance(cluster_start, cluster_target, cluster_centers: pd.DataFrame) -> float:
    cluster_start_point = cluster_centers[cluster_centers["cluster"] == cluster_start].drop('cluster', axis=1).iloc[0]
    cluster_target_point = cluster_centers[cluster_centers["cluster"] == cluster_target].drop('cluster', axis=1).iloc[0]

    return euclidean(cluster_start_point, cluster_target_point)


def get_sorted_nearest_clusters(start_cluster, cluster_centers: pd.DataFrame) -> List[Tuple[int, float]]:
    all_clusters = cluster_centers['cluster'].unique()
    distances = []

    for cluster in all_clusters:
        if cluster != start_cluster:
            distance = calculate_clusters_centers_distance(start_cluster, cluster, cluster_centers)
            distances.append((cluster, distance))

    distances.sort(key=lambda x: x[1])
    return distances

def get_cluster_row_by_id(cluster_id: int, cluster_centers: pd.DataFrame) -> pd.Series:
    cluster_row = cluster_centers[cluster_centers['cluster'] == cluster_id]

    if not cluster_row.empty:
        return cluster_row.iloc[0]
    else:
        return None 

In [3]:
cluster_center = pd.read_csv("./data/cluster_center.csv")
clustered_data = pd.read_csv("./data/spotify_songs_clustered.csv")

In [4]:
clustered_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15405 entries, 0 to 15404
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   danceability             15405 non-null  float64
 1   energy                   15405 non-null  float64
 2   key                      15405 non-null  float64
 3   loudness                 15405 non-null  float64
 4   speechiness              15405 non-null  float64
 5   acousticness             15405 non-null  float64
 6   instrumentalness         15405 non-null  float64
 7   valence                  15405 non-null  float64
 8   tempo                    15405 non-null  float64
 9   lyrics_sentiment         15405 non-null  float64
 10  album_name_sentiment     15405 non-null  float64
 11  track_name_sentiment     15405 non-null  float64
 12  playlist_name_sentiment  15405 non-null  float64
 13  genre_edm                15405 non-null  bool   
 14  genre_latin           

In [5]:
for col in ['genre_edm', 'genre_latin', 'genre_pop', 'genre_r&b', 'genre_rap', 'genre_rock']:
    clustered_data[col] = clustered_data[col].astype('float32')

In [6]:
from sklearn.model_selection import train_test_split

# Suponiendo que 'data' es tu DataFrame y 'cluster' es la columna objetivo
X = clustered_data.drop('cluster', axis=1)
y = clustered_data['cluster']

# Dividir los datos en conjuntos de entrenamiento, validación y prueba
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [7]:
# Verifica si hay NaNs y los reemplaza por un número, por ejemplo, 0
X_train = X_train.fillna(0)
X_val = X_val.fillna(0)
X_test = X_test.fillna(0)

# Asegúrate de que todos los datos son numéricos
X_train = X_train.apply(pd.to_numeric)
X_val = X_val.apply(pd.to_numeric)
X_test = X_test.apply(pd.to_numeric)


In [8]:
from keras.models import Sequential
from keras.layers import Dense, Dropout


# Número de características de entrada
input_dim = X_train.shape[1]

# Número de clusters (número de neuronas en la capa de salida)
num_clusters = len(y.unique())

# Crear modelo
model = Sequential()
model.add(Dense(64, input_dim=input_dim, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_clusters, activation='softmax'))

# Compilar modelo
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
X_train_filtered = X_train[y_train != -1]
y_train_filtered = y_train[y_train != -1]

X_val_filtered = X_val[y_val != -1]
y_val_filtered = y_val[y_val != -1]

X_test_filtered = X_test[y_test != -1]
y_test_filtered = y_test[y_test != -1]

In [10]:
history = model.fit(X_train_filtered, y_train_filtered, validation_data=(X_val_filtered, y_val_filtered), epochs=100, batch_size=10)


Epoch 1/100


InvalidArgumentError: Graph execution error:

Detected at node 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
    File "<frozen runpy>", line 198, in _run_module_as_main
    File "<frozen runpy>", line 88, in _run_code
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1043, in launch_instance
      app.start()
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 725, in start
      self.io_loop.start()
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\tornado\platform\asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 607, in run_forever
      self._run_once()
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 1922, in _run_once
      handle._run()
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 409, in dispatch_shell
      await result
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel\ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\ipykernel\zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3006, in run_cell
      result = self._run_cell(
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3061, in _run_cell
      result = runner(coro)
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3266, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3445, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\lp109\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3505, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\lp109\AppData\Local\Temp\ipykernel_2560\910705944.py", line 1, in <module>
      history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=10)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1051, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\training.py", line 1109, in compute_loss
      return self.compiled_loss(
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\losses.py", line 142, in __call__
      losses = call_fn(y_true, y_pred)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\losses.py", line 268, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\losses.py", line 2078, in sparse_categorical_crossentropy
      return backend.sparse_categorical_crossentropy(
    File "c:\Users\lp109\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\backend.py", line 5660, in sparse_categorical_crossentropy
      res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
Received a label value of -1 which is outside the valid range of [0, 29).  Label values: 2 0 -1 -1 -1 -1 2 -1 -1 0
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_1027]

In [None]:

# Número de características de entrada
input_dim = X_train.shape[1]

# Número de clusters (número de neuronas en la capa de salida)
num_clusters = len(y.unique())

# Crear modelo
model = Sequential()
model.add(Dense(64, input_dim=input_dim, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(num_clusters, activation='softmax'))

# Compilar modelo
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.save('mi_modelo_cluster.h5')
