## imports

In [1]:
import os
import time
from datetime import datetime

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
from dotenv import load_dotenv

# --- PAR√ÅMETROS ESPEC√çFICOS PARA LA TAREA ---


In [2]:
# Rango de a√±os
YEARS = [2010, 2024]

In [3]:
# Lista de g√©neros relacionados con funk
funk_related_genres = [
    # Funk cl√°sico
    "funk",
    "classic funk",
    "deep funk",
    "funky",
    "funky breaks",
    "p-funk",
    "funk rock",
    "funk metal",
    "afro funk",
    "jazz funk",
    "disco funk",
    "boogie",
    "old school funk",

    # Soul / neo soul
    "soul",
    "classic soul",
    "neo soul",
    "contemporary r&b",
    "motown",
    "funk soul",

    # Funk house / dance
    "funk house",
    "disco house",
    "funky house",
    "deep funk house",
    "electro funk",
    "dance funk",
    "groove house"
]

# T√©rminos para b√∫squeda
query_terms = funk_related_genres

## CARGAMOS CREDENCIALES DESDE .env

In [4]:
load_dotenv()  # Carga variables del archivo .env

SPOTIPY_CLIENT_ID = os.getenv("SPOTIPY_CLIENT_ID")
SPOTIPY_CLIENT_SECRET = os.getenv("SPOTIPY_CLIENT_SECRET")
LASTFM_API_KEY = os.getenv("LASTFM_API_KEY")
LASTFM_USER_AGENT = os.getenv("LASTFM_USER_AGENT", "spotify-lastfm-project")

if not SPOTIPY_CLIENT_ID or not SPOTIPY_CLIENT_SECRET:
    raise ValueError("Faltan SPOTIPY_CLIENT_ID o SPOTIPY_CLIENT_SECRET en el archivo .env")
if not LASTFM_API_KEY:
    raise ValueError("Falta LASTFM_API_KEY en el archivo .env")

## INICIALIZAMOS CLIENTE SPOTIFY

In [5]:
try:
    sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
        client_id=SPOTIPY_CLIENT_ID,
        client_secret=SPOTIPY_CLIENT_SECRET
    ))
    print("‚úÖ Conexi√≥n a Spotify exitosa.")
except Exception as e:
    print(f"‚ùå Error de conexi√≥n a Spotify: {e}")
    sp = None

print("Objeto sp:", type(sp))

‚úÖ Conexi√≥n a Spotify exitosa.
Objeto sp: <class 'spotipy.client.Spotify'>


## EXTRAEMOS CANCIONES TRACKS (FUNk Y SUBGENEROS)

## EXTRAEMOS DETALLES DE ALBUM

In [6]:
import pandas as pd
import time

def extract_spotify_data_funk(
    sp,
    years,
    query_terms,
    max_total_songs=500,
    max_songs_per_year=250
):
    spotify_data = []
    total_songs = 0

    for year in years:
        songs_in_year = 0
        print(f"\n--- EXTRAYENDO A√ëO {year} ---")

        for term in query_terms:
            if total_songs >= max_total_songs:
                print("‚ö†Ô∏è L√≠mite global alcanzado")
                return pd.DataFrame(spotify_data)

            query = f'{term} year:{year}'
            print(f"üîé {query}")

            results = sp.search(q=query, type="track", limit=50)

            while results and songs_in_year < max_songs_per_year and total_songs < max_total_songs:

                for track in results["tracks"]["items"]:
                    track_id = track["id"]

                    # evitar duplicados
                    if any(d["ID_Spotify"] == track_id for d in spotify_data):
                        continue

                    spotify_data.append({
                        "Artista": track["artists"][0]["name"],
                        "Genero": "funk",
                        "Origen_busqueda": term,
                        "Cancion": track["name"],
                        "Year": year,
                        "ID_Spotify": track_id,
                        "ID_Album": track["album"]["id"],
                        "Popularidad": track["popularity"]
                    })

                    songs_in_year += 1
                    total_songs += 1

                    if songs_in_year >= max_songs_per_year or total_songs >= max_total_songs:
                        break

                # paginaci√≥n
                if results["tracks"]["next"]:
                    results = sp.next(results["tracks"])
                    time.sleep(1)
                else:
                    break

        print(f"‚úÖ A√±o {year}: {songs_in_year} canciones")

    return pd.DataFrame(spotify_data)               

In [7]:
# DIAGN√ìSTICO: Verificar par√°metros de entrada
print("\n=== DIAGN√ìSTICO ===")
print(f"Conexi√≥n sp: {sp is not None}")
print(f"YEARS: {YEARS}")
print(f"query_terms: {query_terms}")
print(f"Tipo de query_terms: {type(query_terms)}")

# Prueba manual de b√∫squeda simple
try:
    test_search = sp.search(q='funk', type='track', limit=5)
    print(f"\n‚úÖ B√∫squeda manual exitosa: {len(test_search['tracks']['items'])} resultados")
    if test_search['tracks']['items']:
        print(f"Ejemplo: {test_search['tracks']['items'][0]['name']}")
except Exception as e:
    print(f"‚ùå Error en b√∫squeda manual: {e}")


=== DIAGN√ìSTICO ===
Conexi√≥n sp: True
YEARS: [2010, 2024]
query_terms: ['funk', 'classic funk', 'deep funk', 'funky', 'funky breaks', 'p-funk', 'funk rock', 'funk metal', 'afro funk', 'jazz funk', 'disco funk', 'boogie', 'old school funk', 'soul', 'classic soul', 'neo soul', 'contemporary r&b', 'motown', 'funk soul', 'funk house', 'disco house', 'funky house', 'deep funk house', 'electro funk', 'dance funk', 'groove house']
Tipo de query_terms: <class 'list'>

‚úÖ B√∫squeda manual exitosa: 5 resultados
Ejemplo: Funk de Beleza - Slowed


## ALBUMES + TRACKS

In [15]:
df_spotify_raw = extract_spotify_data_funk(
    sp,
    YEARS,
    query_terms,
    max_total_songs=500,
    max_songs_per_year=250
)

print(f"N√∫mero de canciones encontradas en df_spotify_raw: {len(df_spotify_raw)}")
print(df_spotify_raw.head())



--- EXTRAYENDO A√ëO 2010 ---
üîé funk year:2010
üîé classic funk year:2010
üîé deep funk year:2010
üîé funky year:2010
üîé funky breaks year:2010
üîé p-funk year:2010
üîé funk rock year:2010
üîé funk metal year:2010
üîé afro funk year:2010
üîé jazz funk year:2010
üîé disco funk year:2010
üîé boogie year:2010
üîé old school funk year:2010
üîé soul year:2010
üîé classic soul year:2010
üîé neo soul year:2010
üîé contemporary r&b year:2010
üîé motown year:2010
üîé funk soul year:2010
üîé funk house year:2010
üîé disco house year:2010
üîé funky house year:2010
üîé deep funk house year:2010
üîé electro funk year:2010
üîé dance funk year:2010
üîé groove house year:2010
‚úÖ A√±o 2010: 250 canciones

--- EXTRAYENDO A√ëO 2024 ---
üîé funk year:2024
üîé classic funk year:2024
üîé deep funk year:2024
üîé funky year:2024
‚ö†Ô∏è L√≠mite global alcanzado
N√∫mero de canciones encontradas en df_spotify_raw: 500
          Artista Genero Origen_busqueda              Cancion

In [16]:
# VERIFICACI√ìN DE CANCIONES
print(f"N√∫mero de canciones encontradas en df_spotify_raw: {len(df_spotify_raw)}")
print(f"Primeras filas (head) de df_spotify_raw:\n{df_spotify_raw.head()}")

N√∫mero de canciones encontradas en df_spotify_raw: 500
Primeras filas (head) de df_spotify_raw:
          Artista Genero Origen_busqueda              Cancion  Year  \
0     Bryce Janey   funk            funk   Funky Guitar Blues  2010   
1     Funky Bijou   funk            funk   Funky Bijou Anthem  2010   
2           Kas√≠r   funk            funk     The Funky Spider  2010   
3  La Discoth√®que   funk            funk            Funkytown  2010   
4      Tom Browne   funk            funk  Funkin' for Jamaica  2010   

               ID_Spotify                ID_Album  Popularidad  
0  2dqkDzJPwSIduZlKJh0lvB  1ULW18kqQ47Qp3YUATkvFQ           42  
1  3vqJENUjIAwBiWs5zVLWha  4ieRodIyGcBr9uU20D2MQU           35  
2  5LXgUQ5VRmm7PsVBTnQyOn  6tZRWMS69RdXXvcuSC0ILj           34  
3  3fehLpIWjlE3YnTXRkqPN1  6CXNfnJUwEHFYUaiUNmeby           34  
4  1es1B258zAVeXx6uzksrJd  34psLr9vLAgw49ehAgo6SJ           28  


## HACEMOS EXTRACCION DESDE LAST.FM

In [17]:
import requests

def get_lastfm_artist_info(artist_name):
    """
    Usa artist.getInfo para obtener:
    - Biograf√≠a breve
    - Listeners y playcount
    - Lista de artistas similares
    """
    base_url = "https://ws.audioscrobbler.com/2.0/"
    params = {
        "method": "artist.getInfo",
        "api_key": LASTFM_API_KEY,
        "artist": artist_name,
        "format": "json",
        "autocorrect": 1
    }
    headers = {"user-agent": LASTFM_USER_AGENT}

    try:
        resp = requests.get(base_url, params=params, headers=headers, timeout=10)
        data = resp.json()

        if "artist" not in data:
            return None

        a = data["artist"]

        # Popularidad / estad√≠sticas
        stats = a.get("stats", {})
        listeners = stats.get("listeners")
        playcount = stats.get("playcount")

        # Biograf√≠a: cogemos la versi√≥n corta (summary) y la recortamos un poco
        bio = a.get("bio", {}).get("summary", "")
        bio_short = bio.split("<a href=")[0].strip() if bio else ""

        # Artistas similares (nombres)
        similars = a.get("similar", {}).get("artist", [])
        similar_names = [s.get("name") for s in similars][:5]

        return {
            "Artista": artist_name,
            "Biografia_Resumen": bio_short,
            "Listeners_LastFM": listeners,
            "Playcount_LastFM": playcount,
            "Artistas_Similares": ", ".join(similar_names)
        }

    except Exception as e:
        print(f"Error Last.fm (artist.getInfo) para {artist_name}: {e}")
        return None


In [19]:
def build_lastfm_artist_df(df_spotify_raw, sleep_time=0.2):
    unique_artists = df_spotify_raw["Artista"].dropna().unique()
    lastfm_rows = []

    print(f"\n--- EXTRACCI√ìN LAST.FM PARA {len(unique_artists)} ARTISTAS ---")

    for idx, artist in enumerate(unique_artists, start=1):
        info = get_lastfm_artist_info(artist)
        if info is None:
            info = {
                "Artista": artist,
                "Biografia_Resumen": None,
                "Listeners_LastFM": None,
                "Playcount_LastFM": None,
                "Artistas_Similares": None
            }

        lastfm_rows.append(info)
        # comenta esta l√≠nea para que no saque cada artista:
        # print(f"Procesado artista Last.fm {idx}/{len(unique_artists)}: {artist}")
        time.sleep(sleep_time)

    df_lastfm = pd.DataFrame(lastfm_rows)
    return df_lastfm


In [20]:
df_lastfm = build_lastfm_artist_df(df_spotify_raw, sleep_time=0.2)

print("\n--- EJEMPLO DE ARTISTAS LAST.FM (10 FILAS) ---")
print(df_lastfm.head(10))

df_proyecto_final = pd.merge(
    df_spotify_raw,
    df_lastfm,
    on="Artista",
    how="left"
)

output_filename = "musicstream_funk_lastfm_2010_2024.csv"
df_proyecto_final.to_csv(output_filename, index=False, encoding="utf-8")

print("\nüéâ ¬°FASE 1 COMPLETA CON SPOTIFY + LAST.FM! üéâ")
print(f"Archivo guardado como: {output_filename}")
print(f"Filas totales en df_proyecto_final: {len(df_proyecto_final)}")
print("\n--- EJEMPLO DE 10 FILAS DEL DF FINAL ---")
print(df_proyecto_final.head(10))



--- EXTRACCI√ìN LAST.FM PARA 312 ARTISTAS ---

--- EJEMPLO DE ARTISTAS LAST.FM (10 FILAS) ---
            Artista                                  Biografia_Resumen  \
0       Bryce Janey  Bryce Janey began his career at age thirteen i...   
1       Funky Bijou                                                      
2             Kas√≠r  Kas√≠r are a young Danish trio, who play Irish ...   
3    La Discoth√®que                                                      
4        Tom Browne  Tom Browne (born October 30, 1954, Queens, New...   
5    The Funky Town                                                      
6       Mr. Talkbox                                                      
7      The Sequence  "The Sequence" can refer to either a 1980's hi...   
8          Maxipaul                                                      
9  The Jive Turkeys                                                      

  Listeners_LastFM Playcount_LastFM  \
0            25748           142353   
1        

## fase 2: BBDD SQL

In [35]:
import pandas as pd

# 1Ô∏è‚É£ Cargar CSV
df = pd.read_csv("musicstream_funk_lastfm_2010_2024.csv")

# 2Ô∏è‚É£ Eliminar columnas de audio features (aunque no existan)
df = df.drop(
    columns=['Bailabilidad', 'Energia', 'Valence', 'Tempo'],
    errors='ignore'
)

# 3Ô∏è‚É£ (Opcional pero muy recomendado) Normalizar nombres de columnas
df = df.rename(columns={
    'Artista': 'artista',
    'G√©nero musical': 'genero',
    'Tipo': 'tipo',
    'Nombre': 'cancion',
    'A√±o de lanzamiento': 'year',
    'ID_Spotify': 'spotify_id',
    'ID_Album': 'album_id',
    'Popularidad': 'spotify_popularity',
    'Biografia_Resumen': 'bio_lastfm',
    'Listeners_LastFM': 'lastfm_listeners',
    'Playcount_LastFM': 'lastfm_playcount',
    'Artistas_Similares': 'similar_artists'
})

# 4Ô∏è‚É£ Verificaci√≥n final
print(df.columns.tolist())
df.head()


['artista', 'genero', 'tipo', 'cancion', 'year', 'spotify_id', 'album_id', 'spotify_popularity', 'bio_lastfm', 'lastfm_listeners', 'lastfm_playcount', 'similar_artists']


Unnamed: 0,artista,genero,tipo,cancion,year,spotify_id,album_id,spotify_popularity,bio_lastfm,lastfm_listeners,lastfm_playcount,similar_artists
0,Bryce Janey,funk (Origen: funk),Canci√≥n,Funky Guitar Blues,2010,2dqkDzJPwSIduZlKJh0lvB,1ULW18kqQ47Qp3YUATkvFQ,42,Bryce Janey began his career at age thirteen i...,25748.0,142353.0,"Tony Spinner, Philip Sayce Group, Jason Elmore..."
1,Funky Bijou,funk (Origen: funk),Canci√≥n,Funky Bijou Anthem,2010,3vqJENUjIAwBiWs5zVLWha,4ieRodIyGcBr9uU20D2MQU,35,,8626.0,56316.0,"Atomic Project, A.Skillz, Dj Lean Rock, Illag,..."
2,Kas√≠r,funk (Origen: funk),Canci√≥n,The Funky Spider,2010,5LXgUQ5VRmm7PsVBTnQyOn,6tZRWMS69RdXXvcuSC0ILj,34,"Kas√≠r are a young Danish trio, who play Irish ...",11027.0,66571.0,"The Outside Track, P√°draig Rynne, John McSherr..."
3,La Discoth√®que,funk (Origen: funk),Canci√≥n,Funkytown,2010,3fehLpIWjlE3YnTXRkqPN1,6CXNfnJUwEHFYUaiUNmeby,34,,4609.0,14002.0,"The Disco Orchestra, The Disco Brothers, Toni ..."
4,Tom Browne,funk (Origen: funk),Canci√≥n,Funkin' for Jamaica,2010,1es1B258zAVeXx6uzksrJd,34psLr9vLAgw49ehAgo6SJ,28,"Tom Browne (born October 30, 1954, Queens, New...",173816.0,828300.0,"Bobbi Humphrey, Loose Ends, Con Funk Shun, Ron..."


In [22]:
!pip install mysql-connector-python
# O usa el que suele ser m√°s robusto con Pandas:
!pip install sqlalchemy mysqlclient




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [23]:
import mysql.connector
import pandas as pd

# =====================================
# 1. Par√°metros de conexi√≥n y nombres
# =====================================
MYSQL_HOST = "localhost"
MYSQL_USER = "root"
MYSQL_PASSWORD = "AlumnaAdalab"

DB_NAME = "funk_db"                  # nombre de tu BBDD
TABLE_NAME = "tracks_funk"           # nombre de tabla
CSV_FILE = "musicstream_funk_lastfm_2010_2024.csv"   # tu CSV final


# =====================================
# 2. Crear BBDD si no existe
# =====================================
conn_root = mysql.connector.connect(
    host=MYSQL_HOST,
    user=MYSQL_USER,
    password=MYSQL_PASSWORD
)
cursor_root = conn_root.cursor()

cursor_root.execute(f"CREATE DATABASE IF NOT EXISTS {DB_NAME}")
print(f"‚úÖ Base de datos '{DB_NAME}' creada o verificada en el servidor.")

cursor_root.close()
conn_root.close()


# =====================================
# 3. Conectarse a la BBDD funk_db
# =====================================
conn = mysql.connector.connect(
    host=MYSQL_HOST,
    user=MYSQL_USER,
    password=MYSQL_PASSWORD,
    database=DB_NAME
)
cursor = conn.cursor()
print(f"‚úÖ Conexi√≥n establecida a la base de datos MySQL: {DB_NAME}")


# =====================================
# 4. Cargar CSV con pandas
# =====================================
df = pd.read_csv(CSV_FILE)
filas = len(df)

# (OPCIONAL) Renombrar alguna columna si lo necesitas, por ejemplo:
# df = df.rename(columns={"Artista": "artista", "Nombre": "titulo_cancion"})

print(f"‚úÖ Datos cargados y columna renombrada: {filas} filas.")


# =====================================
# 5. Crear tabla (ejemplo sencillo)
#    Ajusta tipos seg√∫n tus columnas reales
# =====================================

# Borro la tabla si ya existe (opcional durante desarrollo)
cursor.execute(f"DROP TABLE IF EXISTS {TABLE_NAME}")

create_table_sql = f"""
CREATE TABLE {TABLE_NAME} (
    id INT AUTO_INCREMENT PRIMARY KEY,
    Artista VARCHAR(255),
    `G√©nero musical` VARCHAR(255),
    Tipo VARCHAR(50),
    Nombre VARCHAR(255),
    `A√±o de lanzamiento` INT,
    ID_Spotify VARCHAR(50),
    ID_Album VARCHAR(50),
    Biografia_Resumen TEXT,
    Listeners_LastFM BIGINT,
    Playcount_LastFM BIGINT
);
"""
cursor.execute(create_table_sql)
conn.commit()


# =====================================
# 6. Insertar datos de pandas en MySQL
# =====================================

insert_sql = f"""
INSERT INTO {TABLE_NAME} (
    Artista, `G√©nero musical`, Tipo, Nombre, `A√±o de lanzamiento`,
    ID_Spotify, ID_Album, Biografia_Resumen, Listeners_LastFM, Playcount_LastFM
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""

for _, row in df.iterrows():
    cursor.execute(insert_sql, (
        row.get("Artista"),
        row.get("G√©nero musical"),
        row.get("Tipo"),
        row.get("Nombre"),
        int(row.get("A√±o de lanzamiento")) if pd.notna(row.get("A√±o de lanzamiento")) else None,
        row.get("ID_Spotify"),
        row.get("ID_Album"),
        row.get("Biografia_Resumen"),
        int(row.get("Listeners_LastFM")) if pd.notna(row.get("Listeners_LastFM")) else None,
        int(row.get("Playcount_LastFM")) if pd.notna(row.get("Playcount_LastFM")) else None
    ))

conn.commit()
print(f"‚úÖ Insertadas {filas} filas en la tabla {TABLE_NAME}.")

cursor.close()
conn.close()
print("‚úÖ Conexi√≥n cerrada.")

‚úÖ Base de datos 'funk_db' creada o verificada en el servidor.
‚úÖ Conexi√≥n establecida a la base de datos MySQL: funk_db
‚úÖ Datos cargados y columna renombrada: 500 filas.


ProgrammingError: 1054 (42S22): Unknown column 'nan' in 'field list'