In [16]:
import os
import sys

dataset = "./dataset/spotify_songs_download.csv"
scaler = "./utils/scaler.joblib"
kmeans = "./utils/Kmeans.joblib"
histogram = "./utils/histogramas_acusticos.json"
funciones = "./process/extract_audio_functions.py"

archivos = [dataset, scaler, kmeans, histogram, funciones]
directorios = ["./audios_temp", "./audios_wav", "./dataset"]

faltan = False
for archivo in archivos:
    if not os.path.isfile(archivo):
        print(f"Falta: {archivo}")
        faltan = True
    else:
        print(f"✅{archivo}")

for d in directorios:
    if not os.path.isdir(d):
        print(f"falta :{d}")
    else:
        print(f"✅{d}")

if faltan:
    print("Archivos faltantes. Abortando.")
    sys.exit(1)

print("✅ Entorno verificado correctamente.")


✅./dataset/spotify_songs_download.csv
✅./utils/scaler.joblib
✅./utils/Kmeans.joblib
✅./utils/histogramas_acusticos.json
✅./process/extract_audio_functions.py
✅./audios_temp
✅./audios_wav
✅./dataset
✅ Entorno verificado correctamente.


In [17]:
# METODOS UTILES PARA PROCESAR EL AUDIO 
from process.extract_audio_functions import *
import pandas as pd

# Funciones para usar al procesar
def extraer_mfcc_por_path(path, scaler) -> list[list[float]]:
    mfcc_test = extract_mfcc(path)
    return scaler.transform(mfcc_test)



In [18]:
#--------------- TEST adudio MP3 --------------
#para poder hacer recomendacion de audio necesita path
# para el SQL 
def obtener_recomendaciones_por_audio_mp3(path_mp3,k=5,tipo="coseno"):
    """
        recibe un path mp3 : C:/dowload/coldplay.mpeg    ->  no guarda este audio lo deja como temp.wav

        retorna: array [(id1:score1),(id2:score2),...]
    """
    directorio='./audios_temp'
    
    path_salida = directorio+"/temp.wav"
    path_mp3_salida_wav=os.path.abspath(path_salida)
    
    #process
    transform_mp3_to_wav(path_mp3,path_mp3_salida_wav)
    scaler = cargar_objeto("./utils/scaler.joblib")
    kmeans_model = cargar_objeto("./utils/Kmeans.joblib")
    mfcc_normalizado=extraer_mfcc_por_path(path_mp3_salida_wav,scaler)

    histograma = histogram_audio(mfcc_normalizado, kmeans_model)

    if   tipo=="coseno":
        recomendaciones= knn_cosine(histograma, k=k)
    elif tipo=="manhatan":
        recomendaciones= knn_manhattan(histograma,k=k)
    else : 
        recomendaciones= knn_lineal(histograma,k=k) # distancia euclidiana   
    recomendaciones_dict=dict(recomendaciones)
    return recomendaciones_dict

# Prueba
audio="C:\\Users\\jefersson\\Downloads\\coldplay.mpeg"

auidos_recomendados=obtener_recomendaciones_por_audio_mp3(audio,k=5,tipo="manhatan")
auidos_recomendados


Convertido: C:\Users\jefersson\Downloads\coldplay.mpeg → c:\Users\jefersson\Desktop\ANACONDA\audios_temp\temp.wav


{'2815': np.float64(2.58099405973851),
 '2847': np.float64(2.8705755271880626),
 '633': np.float64(2.877964552583518),
 '582': np.float64(2.9159674530347925),
 '2772': np.float64(2.923192466445492)}

In [19]:
# -- RECOMENDACION POR ID --      (el usuario selecciona un audio del dataset y debajo le salen musicas recomendadas k=5)
import json

def sacar_histograma_con_id(id, json_path="./utils/histogramas_acusticos.json"):
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return data.get(str(id))  
    except FileNotFoundError:
        print(f"[ERROR] Archivo {json_path} no encontrado.")
    except Exception as e:
        print(f"[ERROR] {e}")
    
def obtener_recomendaciones_por_song_id(id,tipo,k=5):
    histograma=sacar_histograma_con_id(id)

    if   tipo=="coseno":
        recomendaciones= knn_cosine(histograma, k=k)
    elif tipo=="manhatan":
        recomendaciones= knn_manhattan(histograma,k=k)
    else : 
        recomendaciones= knn_lineal(histograma,k=k) # distancia euclidiana   
    recomendaciones_dict=dict(recomendaciones)
    return recomendaciones_dict

# PRUEBA

recomendaciones = obtener_recomendaciones_por_song_id(2913, tipo="manhattan", k=5)
recomendaciones


{'2913': np.float64(0.0),
 '2584': np.float64(0.9398874530419487),
 '2077': np.float64(0.9504187165987075),
 '1536': np.float64(0.9585690256109476),
 '2333': np.float64(0.9622359089620979)}

In [20]:
# ---- INSERT AUDIO ---- 
import json

def insertar_csv(name, id, path_wav, csv_path="./dataset/spotify_songs_download.csv"):
    df = pd.read_csv(csv_path)

    nueva_fila = {
        'track_id': '',
        'track_artist': '',
        'track_name': name,
        'path_download': '',
        'id': str(id),
        'path_download_wav': "./"+path_wav.replace("\\", "/")

    }

    df = pd.concat([df, pd.DataFrame([nueva_fila])], ignore_index=True)
    df.to_csv(csv_path, index=False)

def max_key(json_path):
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)

        if not data:
            return 0

        ultima_clave = list(data.keys())[-1]
        return int(ultima_clave) if ultima_clave.isdigit() else 0

    except FileNotFoundError:
        return 0
    except Exception as e:
        print(f"Error al leer {json_path}: {e}")
        return 0

def insert_audio(path_mp3: str, id: str = None, json_path: str = "./utils/histogramas_acusticos.json"):
    """
    recibe un path mp3 : C:/dowload/coldplay.mpeg 
    id : id del audio que se insertara 
    
    Inserta un nuevo audio: 
    extrae MFCC, genera su histograma y lo guarda en JSON, ademas guarda su nombre.wav.
    """
    name=os.path.basename(path_mp3)
    name=name.replace(".mpeg","")
    name_wav=os.path.join('audios_wav',name+".wav")
    path_wav = os.path.abspath(name_wav)

    #insert en audios_wav
    transform_mp3_to_wav(path_mp3, path_wav, tiempo_recorte=30)

    scaler = cargar_objeto("./utils/scaler.joblib")
    kmeans_model = cargar_objeto("./utils/Kmeans.joblib")

    mfcc = extract_mfcc(path_wav)
    mfcc_normalizado = scaler.transform(mfcc)
    hist = histogram_audio(mfcc_normalizado, kmeans_model)

    #insert histogramas
    try:
        with open("./utils/histogramas_acusticos.json") as f:
            diccionario = json.load(f)
    except FileNotFoundError:
        diccionario = {}
    diccionario[id] = hist.tolist()
    guardar_json(diccionario, json_path)

    #TODO :POR AHORA INSERT CSV   -> DEBE INSERTAR EN TABLA 
    insertar_csv(name,id,name_wav)

    print("[DEBUG]  isertado el audio ",name," id: ",id ,"correctamente")

# PRUEBA
id=max_key("./utils/histogramas_acusticos.json")+1
audio="C:\\Users\\jefersson\\Downloads\\coldplay.mpeg"

insert_audio(audio,id)


Convertido: C:\Users\jefersson\Downloads\coldplay.mpeg → c:\Users\jefersson\Desktop\ANACONDA\audios_wav\coldplay.wav
Histogramas guardados en: ./utils/histogramas_acusticos.json
[DEBUG]  isertado el audio  coldplay  id:  2914 correctamente


In [7]:
# Haciendo JOIN con el CSV para ver caules son las musicas recomendadas
import pandas as pd

audio = "C:\\Users\\jefersson\\Downloads\\coldplay.mpeg"
recomendaciones = obtener_recomendaciones_por_audio_mp3(audio, k=5)




df = pd.read_csv("./dataset/spotify_songs_download.csv")
df['id'] = df['id'].astype(str) # tpecasteo el id ->str
# JOIN
filas = df[df['id'].isin(recomendaciones)]
filas['score'] = filas['id'].map(recomendaciones) # add column score

filas = filas.sort_values('score', ascending=False)
filas

Convertido: C:\Users\jefersson\Downloads\coldplay.mpeg → c:\Users\jefersson\Desktop\ANACONDA\audios_temp\temp.wav


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filas['score'] = filas['id'].map(recomendaciones)


Unnamed: 0,track_id,track_album_id,id,track_artist,track_name,playlist_genre,playlist_subgenre,path_download_wav,lyrics,path_download,score
2771,3jBCTvWRsLVgzryMaPJKhw,7tGNMJ8LL9pBbxfySrAZ1S,2772,Wiwek,Chemistry,edm,progressive electro house,"./audios_wav/wiwek, rachel west - chemistry.wav",I spy your face in the dark Lit with a glow fr...,,0.779034
2814,056uvAVDFOa7kv9idKPpf0,5wkQo9u8qrKOdfGqoizWPo,2815,Gloria Estefan,I See Your Smile,latin,latin pop,./audios_wav/gloria estefan - i see your smile...,I get a little tongue twisted Every time I tal...,,0.778525
1348,31F0KxmTD4rz3o0tJht5RL,7lXkGYfPd8ygerG681NxQG,1349,Toro y Moi,Ordinary Pleasure,r&b,hip pop,./audios_wav/toro y moi - ordinary pleasure.wav,It's always the same as always It's a game tha...,,0.768655
2846,6LBcHFbzmKSgGjZapRE2B5,3m1Nxg2YS7QIs0v428yNLP,2847,Akon,Angel,latin,latin hip hop,./audios_wav/akon - angel.wav,I'm looking at an angel And believe me when I ...,,0.758072
581,7pgv0D1HCBAFbGHNqHmegV,7tB40pGzj6Tg0HePj2jWZt,582,Queen,It's Late - Remastered 2011,rock,album rock,./audios_wav/queen - it's late - remastered 20...,You say you love me And I hardly know your nam...,,0.753449
