## imports

In [1]:
import os
import time
from datetime import datetime

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from dotenv import load_dotenv

# --- PAR√ÅMETROS ESPEC√çFICOS PARA LA TAREA ---


In [2]:
# Rango de a√±os
YEARS = [2010, 2024]

In [3]:
# Lista de g√©neros relacionados con funk
funk_related_genres = [
    # Funk cl√°sico
    "funk",
    "classic funk",
    "deep funk",
    "funky",
    "funky breaks",
    "p-funk",
    "funk rock",
    "funk metal",
    "afro funk",
    "jazz funk",
    "disco funk",
    "boogie",
    "old school funk",

    # Soul / neo soul
    "soul",
    "classic soul",
    "neo soul",
    "contemporary r&b",
    "motown",
    "funk soul",

    # Funk house / dance
    "funk house",
    "disco house",
    "funky house",
    "deep funk house",
    "electro funk",
    "dance funk",
    "groove house"
]

# T√©rminos para b√∫squeda
query_terms = funk_related_genres

## CARGAMOS CREDENCIALES DESDE .env

In [4]:
load_dotenv()  # Carga variables del archivo .env

SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
LASTFM_USER_AGENT = os.getenv("LASTFM_USER_AGENT", "spotify-lastfm-project")

if not SPOTIPY_CLIENT_ID or not SPOTIPY_CLIENT_SECRET:
    raise ValueError("Faltan SPOTIPY_CLIENT_ID o SPOTIPY_CLIENT_SECRET en el archivo .env")
if not LASTFM_API_KEY:
    raise ValueError("Falta LASTFM_API_KEY en el archivo .env")

## INICIALIZAMOS CLIENTE SPOTIFY

In [5]:
try:
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
        client_id=SPOTIPY_CLIENT_ID,
        client_secret=SPOTIPY_CLIENT_SECRET
    ))
    print("‚úÖ Conexi√≥n a Spotify exitosa.")
except Exception as e:
    print(f"‚ùå Error de conexi√≥n a Spotify: {e}")
    sp = None

print("Objeto sp:", type(sp))

‚úÖ Conexi√≥n a Spotify exitosa.
Objeto sp: <class 'spotipy.client.Spotify'>


## EXTRAEMOS CANCIONES TRACKS (FUNG Y SUBGENEROS)

In [6]:
def extract_spotify_data_funk_ampliada(
    sp,
    years,
    query_terms,
    max_total_songs=500,
    max_songs_per_year=250
):
    """
    Itera por A√ëO y por cada T√âRMINO de g√©nero.
    - Limita el total global de canciones a max_total_songs.
    - Limita el m√°ximo por a√±o a max_songs_per_year.
    """
    spotify_data = []
    total_songs = 0  # contador global

    for year in years:
        year_range = f"{year}-{year}"
        songs_in_year = 0  # contador por a√±o
        print(f"\n--- INICIANDO EXTRACCI√ìN PARA EL A√ëO {year} ---")

        for term in query_terms:
            # Si ya se alcanz√≥ el l√≠mite global o el del a√±o, paramos
            if total_songs >= max_total_songs:
                print("\n‚ö†Ô∏è L√≠mite global de canciones alcanzado. Deteniendo extracci√≥n.")
                return pd.DataFrame(spotify_data)

            if songs_in_year >= max_songs_per_year:
                print(f"‚ö†Ô∏è L√≠mite de canciones para el a√±o {year} alcanzado ({songs_in_year}). Pasando al siguiente a√±o.")
                break

            query = f'genre:"{term}" year:{year_range}'
            print(f"   -> Buscando con t√©rmino: {term}")

            try:
                results = sp.search(q=query, type="track", limit=50)
                total_resultados_api = results["tracks"]["total"] if results and "tracks" in results else 0
                print(f"   -> Spotify reporta {total_resultados_api} resultados para '{term}'.")

                while results and total_songs < max_total_songs and songs_in_year < max_songs_per_year:
                    for track in results["tracks"]["items"]:
                        track_id = track["id"]

                        # Evitar duplicados
                        if any(d.get("ID_Spotify") == track_id for d in spotify_data):
                            continue

                        if total_songs >= max_total_songs or songs_in_year >= max_songs_per_year:
                            break

                        data_row = {
                            "Artista": track["artists"][0]["name"],
                            "G√©nero musical": f"funk (Origen: {term})",
                            "Tipo": "Canci√≥n",
                            "Nombre": track["name"],
                            "A√±o de lanzamiento": year,
                            "ID_Spotify": track_id,
                            "ID_Album": track["album"]["id"]
                        }
                        spotify_data.append(data_row)
                        total_songs += 1
                        songs_in_year += 1

                    # Paginaci√≥n
                    if results["tracks"]["next"] and total_songs < max_total_songs and songs_in_year < max_songs_per_year:
                        results = sp.next(results["tracks"])
                    else:
                        results = None

                    time.sleep(0.5)

                print(f"      -> Acumulado a√±o {year}: {songs_in_year} | Total global: {total_songs}")

            except Exception as e:
                print(f"   -> Error al buscar con t√©rmino '{term}': {e}")
                time.sleep(2)

    return pd.DataFrame(spotify_data)

## EXTRAEMOS DETALLES DE ALBUM

In [7]:
def extract_album_details(sp, album_ids):
    """
    Extrae los metadatos completos (nombre, a√±o, artista) de los √°lbumes.
    """
    album_details = []

    # IDs √∫nicos de √°lbum
    unique_album_ids = set(album_ids)

    print(f"\n--- INICIANDO EXTRACCI√ìN DE METADATOS DE {len(unique_album_ids)} √ÅLBUMES ---")

    for album_id in unique_album_ids:
        try:
            album_info = sp.album(album_id)

            data_row = {
                "ID_Album": album_id,
                "Nombre_Album": album_info["name"],
                "A√±o_Lanzamiento_Album": album_info["release_date"][:4],  # solo a√±o
                "Artista_Principal_Album": album_info["artists"][0]["name"],
                "Tipo_Lanzamiento": album_info["album_type"]  # album, single, compilation
            }
            album_details.append(data_row)
            time.sleep(0.2)

        except Exception as e:
            print(f"   -> Error al extraer detalles del √°lbum {album_id}: {e}")
            pass

    print(f"‚úÖ Extracci√≥n de {len(album_details)} √°lbumes completada.")
    return pd.DataFrame(album_details)


## ALBUMES + TRACKS

In [8]:
if sp:
    # 6.1 Extraer canciones
    df_spotify_raw = extract_spotify_data_funk_ampliada(
        sp,
        YEARS,
        query_terms,
        max_total_songs=500,    # m√°ximo total
        max_songs_per_year=250  # m√°ximo por a√±o
    )

    print("\n--- RESULTADO FINAL DE FUNK (ESTRATEGIA ROBUSTA) ---")
    print(df_spotify_raw.head(10))
    total_canciones = len(df_spotify_raw)
    print(f"\nüî¢ N√∫mero total de canciones extra√≠das: {total_canciones}")

    # 6.2 Extraer detalles de √°lbum
    album_ids = df_spotify_raw["ID_Album"].tolist()
    df_albums = extract_album_details(sp, album_ids)

    print("\n--- RESUMEN √ÅLBUMES ---")
    print(df_albums.head(10))
    print(f"\nüî¢ N√∫mero total de √°lbumes con detalles: {len(df_albums)}")
    print(f"üî¢ √Ålbumes √∫nicos en df_spotify_raw: {df_spotify_raw['ID_Album'].nunique()}")
else:
    print("‚ùå 'sp' no est√° inicializado. Revisa las credenciales y la conexi√≥n.")


--- INICIANDO EXTRACCI√ìN PARA EL A√ëO 2010 ---
   -> Buscando con t√©rmino: funk
   -> Spotify reporta 85 resultados para 'funk'.
      -> Acumulado a√±o 2010: 85 | Total global: 85
   -> Buscando con t√©rmino: classic funk
   -> Spotify reporta 5 resultados para 'classic funk'.
      -> Acumulado a√±o 2010: 85 | Total global: 85
   -> Buscando con t√©rmino: deep funk
   -> Spotify reporta 10 resultados para 'deep funk'.
      -> Acumulado a√±o 2010: 85 | Total global: 85
   -> Buscando con t√©rmino: funky
   -> Spotify reporta 9 resultados para 'funky'.
      -> Acumulado a√±o 2010: 85 | Total global: 85
   -> Buscando con t√©rmino: funky breaks
   -> Spotify reporta 7 resultados para 'funky breaks'.
      -> Acumulado a√±o 2010: 85 | Total global: 85
   -> Buscando con t√©rmino: p-funk
   -> Spotify reporta 21 resultados para 'p-funk'.
      -> Acumulado a√±o 2010: 85 | Total global: 85
   -> Buscando con t√©rmino: funk rock
   -> Spotify reporta 5 resultados para 'funk rock'.
    

In [9]:
# VERIFICACI√ìN DE CANCIONES
print(f"N√∫mero de canciones encontradas en df_spotify_raw: {len(df_spotify_raw)}")
print(f"Primeras filas (head) de df_spotify_raw:\n{df_spotify_raw.head()}")

N√∫mero de canciones encontradas en df_spotify_raw: 500
Primeras filas (head) de df_spotify_raw:
                         Artista       G√©nero musical     Tipo  \
0                    Art Neville  funk (Origen: funk)  Canci√≥n   
1                    Art Neville  funk (Origen: funk)  Canci√≥n   
2                    Ursula 1000  funk (Origen: funk)  Canci√≥n   
3  Ernie and The Top Notes, Inc.  funk (Origen: funk)  Canci√≥n   
4                       Eddie Bo  funk (Origen: funk)  Canci√≥n   

               Nombre  A√±o de lanzamiento              ID_Spotify  \
0  I'm a Fool to Care                2010  1ABUVk83wzhmgK2QGhK33A   
1   Arabian Love Call                2010  3XVaEtvOiTOxG2UfC3v5sv   
2             Stinger                2010  1dmfqU3Fzfa6mP87cbLN36   
3   Just A Little Bit                2010  4MPB8JiWnfXgd3mvjZaVE1   
4         Roamin-Itis                2010  1rhNln9ZuBUaho7dofca4x   

                 ID_Album  
0  3v2MCcbzJGPau9T9ViBkwr  
1  3v2MCcbzJGPau9T9ViBkwr  


## HACEMOS EXTRACCION DESDE LAST.FM

In [17]:
import requests

def get_lastfm_artist_info(artist_name):
    """
    Usa artist.getInfo para obtener:
    - Biograf√≠a breve
    - Listeners y playcount
    - Lista de artistas similares
    """
    base_url = "https://ws.audioscrobbler.com/2.0/"
    params = {
        "method": "artist.getInfo",
        "api_key": LASTFM_API_KEY,
        "artist": artist_name,
        "format": "json",
        "autocorrect": 1
    }
    headers = {"user-agent": LASTFM_USER_AGENT}

    try:
        resp = requests.get(base_url, params=params, headers=headers, timeout=10)
        data = resp.json()

        if "artist" not in data:
            return None

        a = data["artist"]

        # Popularidad / estad√≠sticas
        stats = a.get("stats", {})
        listeners = stats.get("listeners")
        playcount = stats.get("playcount")

        # Biograf√≠a: cogemos la versi√≥n corta (summary) y la recortamos un poco
        bio = a.get("bio", {}).get("summary", "")
        bio_short = bio.split("<a href=")[0].strip() if bio else ""

        # Artistas similares (nombres)
        similars = a.get("similar", {}).get("artist", [])
        similar_names = [s.get("name") for s in similars][:5]

        return {
            "Artista": artist_name,
            "Biografia_Resumen": bio_short,
            "Listeners_LastFM": listeners,
            "Playcount_LastFM": playcount,
            "Artistas_Similares": ", ".join(similar_names)
        }

    except Exception as e:
        print(f"Error Last.fm (artist.getInfo) para {artist_name}: {e}")
        return None


In [18]:
def build_lastfm_artist_df(df_spotify_raw, sleep_time=0.2):
    """
    A partir de df_spotify_raw (Spotify), construye un DF de artistas con datos Last.fm.
    """
    unique_artists = df_spotify_raw["Artista"].dropna().unique()
    lastfm_rows = []

    print(f"\n--- EXTRACCI√ìN LAST.FM PARA {len(unique_artists)} ARTISTAS ---")

    for idx, artist in enumerate(unique_artists, start=1):
        info = get_lastfm_artist_info(artist)

        if info is None:
            info = {
                "Artista": artist,
                "Biografia_Resumen": None,
                "Listeners_LastFM": None,
                "Playcount_LastFM": None,
                "Artistas_Similares": None
            }

        lastfm_rows.append(info)
        print(f"Procesado artista Last.fm {idx}/{len(unique_artists)}: {artist}")
        time.sleep(sleep_time)

    df_lastfm = pd.DataFrame(lastfm_rows)
    return df_lastfm


In [21]:
def build_lastfm_artist_df(df_spotify_raw, sleep_time=0.2):
    unique_artists = df_spotify_raw["Artista"].dropna().unique()
    lastfm_rows = []

    print(f"\n--- EXTRACCI√ìN LAST.FM PARA {len(unique_artists)} ARTISTAS ---")

    for idx, artist in enumerate(unique_artists, start=1):
        info = get_lastfm_artist_info(artist)
        if info is None:
            info = {
                "Artista": artist,
                "Biografia_Resumen": None,
                "Listeners_LastFM": None,
                "Playcount_LastFM": None,
                "Artistas_Similares": None
            }

        lastfm_rows.append(info)
        # comenta esta l√≠nea para que no saque cada artista:
        # print(f"Procesado artista Last.fm {idx}/{len(unique_artists)}: {artist}")
        time.sleep(sleep_time)

    df_lastfm = pd.DataFrame(lastfm_rows)
    return df_lastfm


In [26]:
df_lastfm = build_lastfm_artist_df(df_spotify_raw, sleep_time=0.2)

print("\n--- EJEMPLO DE ARTISTAS LAST.FM (10 FILAS) ---")
print(df_lastfm.head(10))

df_proyecto_final = pd.merge(
    df_spotify_raw,
    df_lastfm,
    on="Artista",
    how="left"
)

output_filename = "musicstream_funk_lastfm_2010_2024.csv"
df_proyecto_final.to_csv(output_filename, index=False, encoding="utf-8")

print("\nüéâ ¬°FASE 1 COMPLETA CON SPOTIFY + LAST.FM! üéâ")
print(f"Archivo guardado como: {output_filename}")
print(f"Filas totales en df_proyecto_final: {len(df_proyecto_final)}")
print("\n--- EJEMPLO DE 10 FILAS DEL DF FINAL ---")
print(df_proyecto_final.head(10))



--- EXTRACCI√ìN LAST.FM PARA 148 ARTISTAS ---

--- EJEMPLO DE ARTISTAS LAST.FM (10 FILAS) ---
                         Artista  \
0                    Art Neville   
1                    Ursula 1000   
2  Ernie and The Top Notes, Inc.   
3                       Eddie Bo   
4                         Latino   
5                           Roni   
6                        Mudbone   
7             Antonio Sorrentino   
8                        Di Paul   
9                     Sr Mandril   

                                   Biografia_Resumen Listeners_LastFM  \
0  Art Neville (Arthur Lanon Neville, New Orleans...            24871   
1  Ursula 1000 is a lounge music project of DJ Al...           221406   
2  Considered by most to be the pioneer in what w...             7382   
3  Eddie Bo (born Edwin Joseph Bocage, New Orlean...            68058   
4  The name Latino can be attributed to:\n\n1)  R...           134947   
5  There are several artists known as 'Roni':\n- ...             8232 

## fase 2: BBDD SQL

In [27]:
import pandas as pd

df = pd.read_csv("musicstream_funk_lastfm_2010_2024.csv")
df.head()


Unnamed: 0,Artista,G√©nero musical,Tipo,Nombre,A√±o de lanzamiento,ID_Spotify,ID_Album,Biografia_Resumen,Listeners_LastFM,Playcount_LastFM,Artistas_Similares
0,Art Neville,funk (Origen: funk),Canci√≥n,I'm a Fool to Care,2010,1ABUVk83wzhmgK2QGhK33A,3v2MCcbzJGPau9T9ViBkwr,"Art Neville (Arthur Lanon Neville, New Orleans...",24871,96973,"Jessie Hill, Dave Bartholomew, Warren Lee, Ben..."
1,Art Neville,funk (Origen: funk),Canci√≥n,Arabian Love Call,2010,3XVaEtvOiTOxG2UfC3v5sv,3v2MCcbzJGPau9T9ViBkwr,"Art Neville (Arthur Lanon Neville, New Orleans...",24871,96973,"Jessie Hill, Dave Bartholomew, Warren Lee, Ben..."
2,Ursula 1000,funk (Origen: funk),Canci√≥n,Stinger,2010,1dmfqU3Fzfa6mP87cbLN36,2LVt2AhBDjQSL3S0aRS9DV,Ursula 1000 is a lounge music project of DJ Al...,221406,2598979,"Soma Sonic, Rainstick Orchestra, Marsmobil, Zi..."
3,"Ernie and The Top Notes, Inc.",funk (Origen: funk),Canci√≥n,Just A Little Bit,2010,4MPB8JiWnfXgd3mvjZaVE1,0twe7YjMxsBncxZuGR1j1z,Considered by most to be the pioneer in what w...,7382,25011,"The Explosions, Lee Dorsey & Betty Harris, Mar..."
4,Eddie Bo,funk (Origen: funk),Canci√≥n,Roamin-Itis,2010,1rhNln9ZuBUaho7dofca4x,3ajZCsPQc48KL5ik17x7QC,"Eddie Bo (born Edwin Joseph Bocage, New Orlean...",68058,365931,"Betty Harris, Joe Tex, Johnny Adams, Lee Dorse..."


In [28]:
print(df.columns.tolist())


['Artista', 'G√©nero musical', 'Tipo', 'Nombre', 'A√±o de lanzamiento', 'ID_Spotify', 'ID_Album', 'Biografia_Resumen', 'Listeners_LastFM', 'Playcount_LastFM', 'Artistas_Similares']


In [29]:
!pip install mysql-connector-python
# O usa el que suele ser m√°s robusto con Pandas:
!pip install sqlalchemy mysqlclient




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting sqlalchemy
  Downloading sqlalchemy-2.0.45-cp313-cp313-win_amd64.whl.metadata (9.8 kB)
Collecting mysqlclient
  Downloading mysqlclient-2.2.7-cp313-cp313-win_amd64.whl.metadata (4.8 kB)
Collecting greenlet>=1 (from sqlalchemy)
  Downloading greenlet-3.3.0-cp313-cp313-win_amd64.whl.metadata (4.2 kB)
Downloading sqlalchemy-2.0.45-cp313-cp313-win_amd64.whl (2.1 MB)
   ---------------------------------------- 0.0/2.1 MB ? eta -:--:--
   ---------------------------------------- 2.1/2.1 MB 19.2 MB/s  0:00:00
Downloading mysqlclient-2.2.7-cp313-cp313-win_amd64.whl (208 kB)
Downloading greenlet-3.3.0-cp313-cp313-win_amd64.whl (301 kB)
Installing collected packages: mysqlclient, greenlet, sqlalchemy

   ---------------------------------------- 0/3 [mysqlclient]
   ------------- -------------------------- 1/3 [greenlet]
   ------------- -------------------------- 1/3 [greenlet]
   ------------- -------------------------- 1/3 [greenlet]
   -------------------------- ------------- 2/3 [


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [30]:
import mysql.connector
import pandas as pd

# =====================================
# 1. Par√°metros de conexi√≥n y nombres
# =====================================
MYSQL_HOST = "localhost"
MYSQL_USER = "root"
MYSQL_PASSWORD = "AlumnaAdalab"

DB_NAME = "funk_db"                  # nombre de tu BBDD
TABLE_NAME = "tracks_funk"           # nombre de tabla
CSV_FILE = "musicstream_funk_lastfm_2010_2024.csv"   # tu CSV final


# =====================================
# 2. Crear BBDD si no existe
# =====================================
conn_root = mysql.connector.connect(
    host=MYSQL_HOST,
    user=MYSQL_USER,
    password=MYSQL_PASSWORD
)
cursor_root = conn_root.cursor()

cursor_root.execute(f"CREATE DATABASE IF NOT EXISTS {DB_NAME}")
print(f"‚úÖ Base de datos '{DB_NAME}' creada o verificada en el servidor.")

cursor_root.close()
conn_root.close()


# =====================================
# 3. Conectarse a la BBDD funk_db
# =====================================
conn = mysql.connector.connect(
    host=MYSQL_HOST,
    user=MYSQL_USER,
    password=MYSQL_PASSWORD,
    database=DB_NAME
)
cursor = conn.cursor()
print(f"‚úÖ Conexi√≥n establecida a la base de datos MySQL: {DB_NAME}")


# =====================================
# 4. Cargar CSV con pandas
# =====================================
df = pd.read_csv(CSV_FILE)
filas = len(df)

# (OPCIONAL) Renombrar alguna columna si lo necesitas, por ejemplo:
# df = df.rename(columns={"Artista": "artista", "Nombre": "titulo_cancion"})

print(f"‚úÖ Datos cargados y columna renombrada: {filas} filas.")


# =====================================
# 5. Crear tabla (ejemplo sencillo)
#    Ajusta tipos seg√∫n tus columnas reales
# =====================================

# Borro la tabla si ya existe (opcional durante desarrollo)
cursor.execute(f"DROP TABLE IF EXISTS {TABLE_NAME}")

create_table_sql = f"""
CREATE TABLE {TABLE_NAME} (
    id INT AUTO_INCREMENT PRIMARY KEY,
    Artista VARCHAR(255),
    `G√©nero musical` VARCHAR(255),
    Tipo VARCHAR(50),
    Nombre VARCHAR(255),
    `A√±o de lanzamiento` INT,
    ID_Spotify VARCHAR(50),
    ID_Album VARCHAR(50),
    Biografia_Resumen TEXT,
    Listeners_LastFM BIGINT,
    Playcount_LastFM BIGINT
);
"""
cursor.execute(create_table_sql)
conn.commit()


# =====================================
# 6. Insertar datos de pandas en MySQL
# =====================================

insert_sql = f"""
INSERT INTO {TABLE_NAME} (
    Artista, `G√©nero musical`, Tipo, Nombre, `A√±o de lanzamiento`,
    ID_Spotify, ID_Album, Biografia_Resumen, Listeners_LastFM, Playcount_LastFM
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

for _, row in df.iterrows():
    cursor.execute(insert_sql, (
        row.get("Artista"),
        row.get("G√©nero musical"),
        row.get("Tipo"),
        row.get("Nombre"),
        int(row.get("A√±o de lanzamiento")) if pd.notna(row.get("A√±o de lanzamiento")) else None,
        row.get("ID_Spotify"),
        row.get("ID_Album"),


_IncompleteInputError: incomplete input (1516372998.py, line 104)