In [1]:
import sys
print("✅ Python activo:", sys.executable)

✅ Python activo: c:\Users\wladi\Desktop\Data Science\Portafolio\mood_music\.venv\Scripts\python.exe


In [2]:
from dotenv import load_dotenv
import os

# Escapa los espacios usando una raw string (r"")
env_path = r"C:\Users\wladi\Desktop\Data Science\Portafolio\mood_music\.env"

# Cargar el archivo .env desde la ruta absoluta
load_dotenv(dotenv_path=env_path, override=True)

# Verificar valores cargados
print("CLIENT ID:", os.getenv("SPOTIPY_CLIENT_ID"))
print("SECRET:", os.getenv("SPOTIPY_CLIENT_SECRET")[:6] + "********")
print("URI:", os.getenv("SPOTIPY_REDIRECT_URI"))

CLIENT ID: a67980b418c04ab88af01012e40c204e
SECRET: 901d81********
URI: http://127.0.0.1:8080


In [6]:
# ==========================================
# 01_data_collection.ipynb
# Proyecto: Mood vs Música 🎧
# Autor: Eder Cabascango
# ==========================================

# --------------------------
# 1. Librerías necesarias
# --------------------------
import os
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
from datetime import datetime
from dotenv import load_dotenv

# --------------------------
# 2. Cargar credenciales del archivo .env
# --------------------------
env_path = r"C:\Users\wladi\Desktop\Data Science\Portafolio\mood_music\.env"
load_dotenv(dotenv_path=env_path, override=True)

CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
REDIRECT_URI = os.getenv("SPOTIPY_REDIRECT_URI")

if not CLIENT_ID or not CLIENT_SECRET:
    raise ValueError("⚠️ No se encontraron las credenciales de Spotify. Verifica el archivo .env")

# --------------------------
# 3. Autenticación con Spotify API
# --------------------------
scope = "user-read-recently-played,user-top-read,user-read-playback-state"

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    redirect_uri=REDIRECT_URI,
    scope=scope
))

print("✅ Autenticado como:", sp.current_user()["display_name"])

# --------------------------
# 4. Función auxiliar para obtener canciones y filtrar válidas
# --------------------------
def get_valid_tracks(source="recently", limit=50):
    """Obtiene canciones válidas (con audio features disponibles)."""
    
    if source == "recently":
        results = sp.current_user_recently_played(limit=limit)
        items = results["items"]
        print(f"🎧 Canciones recientes encontradas: {len(items)}")
    else:
        results = sp.current_user_top_tracks(limit=limit, time_range="medium_term")
        items = results["items"]
        print(f"🔥 Canciones más escuchadas encontradas: {len(items)}")

    tracks_data = []
    for item in items:
        track = item["track"] if source == "recently" else item

        # Ignorar podcasts o canciones sin ID válido
        if not track.get("id"):
            continue

        try:
            features = sp.audio_features(track["id"])
            if not features or not features[0]:
                continue

            f = features[0]
            tracks_data.append({
                "source": source,
                "played_at": item["played_at"] if source == "recently" else None,
                "date": item["played_at"][:10] if source == "recently" else None,
                "track_name": track["name"],
                "artist": track["artists"][0]["name"],
                "danceability": f["danceability"],
                "energy": f["energy"],
                "valence": f["valence"],
                "tempo": f["tempo"],
                "acousticness": f["acousticness"],
                "instrumentalness": f["instrumentalness"]
            })
        except spotipy.exceptions.SpotifyException:
            continue

    return pd.DataFrame(tracks_data)

# --------------------------
# 5. Intentar obtener canciones recientes primero
# --------------------------
df_tracks = get_valid_tracks("recently")

if df_tracks.empty:
    print("⚠️ No se encontraron canciones recientes válidas. Probando con tus top tracks...")
    df_tracks = get_valid_tracks("top")

if df_tracks.empty:
    raise RuntimeError("❌ No se pudieron obtener canciones con audio features.")

# --------------------------
# 6. Mostrar resultados
# --------------------------
print(f"\n✅ Canciones procesadas correctamente: {len(df_tracks)}")
display(df_tracks.head())

# --------------------------
# 7. Guardar datos crudos
# --------------------------
os.makedirs("data/raw", exist_ok=True)
filename = f"data/raw/spotify_tracks_{datetime.today().strftime('%Y%m%d')}.csv"
df_tracks.to_csv(filename, index=False, encoding="utf-8")

print(f"💾 Archivo guardado en: {filename}")
print("🎵 Dataset listo para análisis.")


✅ Autenticado como: zerow9


HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=569xAmXCkpM3IsEOiG4hWe with Params: {} returned 403 due to None


🎧 Canciones recientes encontradas: 50


HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=6e2xraDTaq3NyfngeuwsdX with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=48vdWqCEBHtxZOEu1McS76 with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=2JLMWMR9Q3HCIFADdPpGiW with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=2DjPGjQZrWdVHx2Bh9uvcL with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=3Y2IOJTjVsOuzvHsllpQDW with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=0Zj3TMvZlJoqFgqyDkuTGr with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=5MUy9g1yOErICSjY5Wro5x with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=

⚠️ No se encontraron canciones recientes válidas. Probando con tus top tracks...


HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=6aaPUBUFw9KEW1p1inVQv9 with Params: {} returned 403 due to None


🔥 Canciones más escuchadas encontradas: 50


HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=7HKRWMTErKh56EIBeFcmdf with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=3lCEKPzDk6VZRUZtD6OusZ with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=4wDk9P2WSvHOsRW6SDYE7L with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=0kqspGAduSicKRpmF0wiU5 with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=1Hb8GNhC9n0rBKQGqd2O0M with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=3ddJ8Vbc5tYPxq5zKRVke6 with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=50mG9Vcem553GV9RkSVwe3 with Params: {} returned 403 due to None
HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=

RuntimeError: ❌ No se pudieron obtener canciones con audio features.

In [7]:
# Prueba directa: Avenged Sevenfold - Hail to the King
test_id = "0xMCEp7mK5B9lxYtY6yK1R"  # ID real de "Hail to the King"

features = sp.audio_features(test_id)

if features and features[0]:
    print("✅ Audio features disponibles:")
    display(pd.DataFrame(features))
else:
    print("❌ No se pudieron obtener las características de la canción.")


HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=0xMCEp7mK5B9lxYtY6yK1R with Params: {} returned 403 due to None


SpotifyException: http status: 403, code: -1 - https://api.spotify.com/v1/audio-features/?ids=0xMCEp7mK5B9lxYtY6yK1R:
 None, reason: None