In [None]:
import requests
import json
import time
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import zscore

API_KEY = '1a9ea75b5f92dfda381d817fc00e5458' 
USER_AGENT = '808e0a8bbaf9d30bf181edb60c742824'  
LIMIT = 20  ##Número de resultados a obtener

headers = {
    'user-agent': USER_AGENT
}

##sacar top artists
def get_top_artists():
    params = {
        'method': 'chart.getTopArtists',
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['artists']['artist'])
    else:
        print(f"Error en la solicitud de top artistas globales: {response.status_code}")
    time.sleep(1)
    return df

##sacar top artistas por pais
def get_top_artists_by_country(country):
    params = {
        'method': 'geo.getTopArtists',
        'country': country,
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['topartists']['artist'])
    else:
        print(f"Error en la solicitud de top artistas por país: {response.status_code}")
    time.sleep(1)
    return df

##sacar top canciones por pais
def get_top_tracks_by_country(country):
    params = {
        'method': 'geo.getTopTracks',
        'country': country,
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['tracks']['track'])
    else:
        print(f"Error en la solicitud de top canciones por país: {response.status_code}")
    time.sleep(1)
    return df

##sacar top album de x artista
def get_top_albums_by_artist(artist):
    params = {
        'method': 'artist.getTopAlbums',
        'artist': artist,
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['topalbums']['album'])
    else:
        print(f"Error en la solicitud de top álbumes del artista: {response.status_code}")
    time.sleep(1)
    return df

##limpia y ordena dataframe
def limpiar_y_ordenar(df, columnas_a_eliminar=None):
    if columnas_a_eliminar:
        df = df.drop(columns=columnas_a_eliminar)
    if 'listeners' in df.columns:
        df = df.sort_values(by='listeners', ascending=False)
    df = df.reset_index(drop=True)
    return df

In [None]:
df_artist_DE = get_top_artists_by_country("Germany")
df_artist_ES = get_top_artists_by_country("Spain")
df_artist_FR = get_top_artists_by_country("France")
df_artist_RUS = get_top_artists_by_country("Russian Federation")
df_artist_UK = get_top_artists_by_country("United Kingdom")
df_artist_US = get_top_artists_by_country("United States")

df_artist_DE

In [None]:
df_artist_DE_filtered = limpiar_y_ordenar(df_artist_DE, ["mbid", "url", "streamable", "image"])
df_artist_ES_filtered = limpiar_y_ordenar(df_artist_ES, ["mbid", "url", "streamable", "image"])
df_artist_FR_filtered = limpiar_y_ordenar(df_artist_FR, ["mbid", "url", "streamable", "image"])
df_artist_RUS_filtered = limpiar_y_ordenar(df_artist_RUS, ["mbid", "url", "streamable", "image"])
df_artist_UK_filtered = limpiar_y_ordenar(df_artist_UK, ["mbid", "url", "streamable", "image"])
df_artist_US_filtered = limpiar_y_ordenar(df_artist_US, ["mbid", "url", "streamable", "image"])

df_artist_DE_filtered

In [None]:
df_artist_DE_filtered["Country"] = "Germany"
df_artist_ES_filtered["Country"] = "Spain"
df_artist_FR_filtered["Country"] = "France"
df_artist_RUS_filtered["Country"] = "Russia"
df_artist_UK_filtered["Country"] = "United Kingdom"
df_artist_US_filtered["Country"] = "United States"

df_artist_DE_filtered.info()
df_artist_DE_filtered

In [None]:
df_artist_DE_filtered["listeners"] = df_artist_DE_filtered["listeners"].astype(float)
df_artist_ES_filtered["listeners"] = df_artist_ES_filtered["listeners"].astype(float)
df_artist_FR_filtered["listeners"] = df_artist_FR_filtered["listeners"].astype(float)
df_artist_RUS_filtered["listeners"] = df_artist_RUS_filtered["listeners"].astype(float)
df_artist_UK_filtered["listeners"] = df_artist_UK_filtered["listeners"].astype(float)
df_artist_US_filtered["listeners"] = df_artist_US_filtered["listeners"].astype(float)

df_artist_DE_filtered.info()
df_artist_DE_filtered

In [None]:
media_listeners_DE = df_artist_DE_filtered["listeners"].mean()
media_listeners_ES = df_artist_ES_filtered["listeners"].mean()
media_listeners_FR = df_artist_FR_filtered["listeners"].mean()
media_listeners_RUS = df_artist_RUS_filtered["listeners"].mean()
media_listeners_UK = df_artist_UK_filtered["listeners"].mean()
media_listeners_US = df_artist_US_filtered["listeners"].mean()

print(f"La media de oyentes en Alemania: {media_listeners_DE}")
print(f"La media de oyentes en España: {media_listeners_ES}")
print(f"La media de oyentes en Francia: {media_listeners_FR}")
print(f"La media de oyentes en Rusia: {media_listeners_RUS}")
print(f"La media de oyentes en Reino Unido: {media_listeners_UK}")
print(f"La media de oyentes en Estados Unidos: {media_listeners_US}")

In [None]:
mediana_listeners_DE = df_artist_DE_filtered["listeners"].median()
mediana_listeners_ES = df_artist_ES_filtered["listeners"].median()
mediana_listeners_FR = df_artist_FR_filtered["listeners"].median()
mediana_listeners_RUS = df_artist_RUS_filtered["listeners"].median()
mediana_listeners_UK = df_artist_UK_filtered["listeners"].median()
mediana_listeners_US = df_artist_US_filtered["listeners"].median()

print(f"La mediana de oyentes en Alemania: {mediana_listeners_DE}")
print(f"La mediana de oyentes en España: {mediana_listeners_ES}")
print(f"La mediana de oyentes en Francia: {mediana_listeners_FR}")
print(f"La mediana de oyentes en Rusia: {mediana_listeners_RUS}")
print(f"La mediana de oyentes en Reino Unido: {mediana_listeners_UK}")
print(f"La mediana de oyentes en Estados Unidos: {mediana_listeners_US}")

In [None]:
df_artists_country_concat = pd.concat([df_artist_DE_filtered, df_artist_ES_filtered, df_artist_FR_filtered, df_artist_RUS_filtered, df_artist_UK_filtered, df_artist_US_filtered])

df_artists_country_concat

In [None]:
df_artists_country_concat_sorted = df_artists_country_concat.sort_values("listeners", ascending = False)
df_artists_country_concat_sorted

In [None]:
countries = df_artists_country_concat_sorted

In [None]:
total_artists = df_artists_country_concat_sorted["name"].nunique()
print(f"En un total de {df_artists_country_concat_sorted["name"].count()} artistas analizados en {df_artists_country_concat_sorted["Country"].nunique()} paises, se encuentran {total_artists} artistas únicos.")

In [None]:
countries_by_artist_count = df_artists_country_concat_sorted["name"].value_counts()
countries_by_artist_count

In [None]:
plt.figure(figsize = (10, 6))
sns.barplot(x = "Country", y = "listeners", hue = "Country", data = df_artists_country_concat_sorted, palette = "Paired", legend = False)

plt.xlabel("Países")
plt.ylabel("Oyentes")
plt.title("Oyentes por País")
plt.grid(axis = "y")

plt.show()

In [None]:
df_artists_country_concat_sorted["Oyentes_Log"] = np.log1p(df_artists_country_concat_sorted["listeners"])

plt.figure(figsize = (10, 6))
sns.boxplot(x = "Country", y = "Oyentes_Log", hue = "Country", legend = False, data = df_artists_country_concat_sorted, palette = "Paired")

plt.xlabel("Paises")
plt.ylabel("Log (Oyentes)")
plt.title("Oyentes por País Log")
plt.grid()

plt.show()

In [None]:
media_oyentes = df_artists_country_concat_sorted["listeners"].mean()
mediana_oyentes = df_artists_country_concat_sorted["listeners"].median()

plt.figure(figsize =(10, 10))
plt.barh(df_artists_country_concat_sorted["name"], df_artists_country_concat_sorted["listeners"], color="steelblue")

plt.axvline(x = media_oyentes, color = "yellowgreen", linestyle = "--", label = f"Media: {int(media_oyentes)}")
plt.axvline(x = mediana_oyentes, color = "coral", linestyle = "--", label = f"Mediana: {int(mediana_oyentes)}")

plt.xlabel("Oyentes")
plt.ylabel("Artista")
plt.title("Oyentes por Artista con Media y Mediana")
plt.legend()
plt.grid()

plt.show()

In [None]:
df_artists_country_concat_sorted["Z_Score"] = zscore(df_artists_country_concat_sorted["listeners"])

plt.figure(figsize = (10, 10))
sns.barplot(y = df_artists_country_concat_sorted["name"], 
            x = df_artists_country_concat_sorted["Z_Score"], 
            hue = df_artists_country_concat_sorted["name"], 
            legend = False, palette = "Spectral")

plt.axvline(x = 2.5, color = "coral", linestyle = "--", label = "Outlier (>3)")
plt.axvline(x = -2.5, color = "coral", linestyle = "--", label = "Outlier (<-3)")
plt.axvline(x = 0, color = "yellowgreen", linestyle = "--", label = "Media (Z-score = 0)")

plt.xlabel("Oyentes")
plt.ylabel("Artistas")
plt.title("Oyentes por Artista con Z-score")
plt.grid(axis = "x")
plt.legend()

plt.show()

In [None]:
countries = df_artists_country_concat_sorted["Country"].unique()

fig, axes = plt.subplots(nrows = 2, ncols = 3, figsize = (18, 10))
axes = axes.flatten()

for i, country in enumerate(countries):
    df_country = df_artists_country_concat_sorted[df_artists_country_concat_sorted["Country"] == country]
    
    sns.barplot(x = "listeners", y = "name", hue = "Country", data = df_country, ax = axes[i], palette = "viridis", legend = False)
    axes[i].set_title(f"Oyentes por artista en {country}")
    axes[i].set_xlabel("Oyentes")
    axes[i].set_ylabel("Artistas")

plt.tight_layout()
plt.show()

In [None]:
df_total_listeners_by_artist = df_artists_country_concat_sorted.groupby("name", as_index = False)["listeners"].sum()

plt.figure(figsize = (10, 10))
sns.barplot(x = "listeners", y = "name", hue = "name", data = df_total_listeners_by_artist, palette = "Spectral", legend = False)

plt.xlabel("Oyentes Totales")
plt.ylabel("Artistas")
plt.title("Oyentes Totales por Artista")
plt.grid(axis = "x")

plt.show()

In [None]:
df_total_listeners_by_artist = df_artists_country_concat_sorted.groupby(["name", "Country"], as_index = False)["listeners"].sum()
df_most_popular = df_total_listeners_by_artist[df_total_listeners_by_artist["listeners"] > 5e6]

plt.figure(figsize = (10, 8))
sns.scatterplot(x = "Country", y = "name", size = "listeners", sizes = (100, 1000), alpha = 0.6, hue = "Country", palette = "Paired", data = df_most_popular)

plt.xlabel("Paises")
plt.ylabel("Artistas")
plt.title("Artistas por país con mas de 5M de oyentes")
plt.grid()
plt.legend(title = "País")

plt.show()

In [None]:
df_total_listeners_by_artist = df_artists_country_concat_sorted.groupby(["name", "Country"], as_index = False)["listeners"].sum()
df_most_popular = df_total_listeners_by_artist[df_total_listeners_by_artist["listeners"] > 5e6]

plt.figure(figsize = (10, 8))
sns.violinplot(x = "Country", y = "listeners", hue = "Country", legend = False, data = df_most_popular, palette = "Paired")

# Personalización del gráfico
plt.xlabel("Países")
plt.ylabel("Oyentes")
plt.title("Distribución de oyentes por país para artistas con más de 5M de oyentes")
plt.grid()

plt.show()

In [None]:
# df_total_listeners_by_artist = df_artists_country_concat_sorted.groupby(["name", "Country"], as_index = False)["listeners"].sum()
# df_most_popular = df_total_listeners_by_artist[df_total_listeners_by_artist["listeners"] > 6e6]

# plt.figure(figsize = (8, 6))
# sns.scatterplot(x = "listeners", y = "name", hue = "Country", size = "listeners", sizes = (100, 1000), alpha = 0.7, palette = "Paired", data = df_most_popular)

# plt.xlabel("Oyentes")
# plt.ylabel("Artistas")
# plt.title("Artistas más populares: Oyentes por País")

# plt.show()

In [None]:
# df_artists_country_concat_sorted["name_num"] = pd.factorize(df_artists_country_concat_sorted["name"])[0]

# plt.figure(figsize = (8, 6))
# sns.kdeplot(
#     x = df_artists_country_concat_sorted["listeners"], 
#     y = df_artists_country_concat_sorted["name_num"], 
#     hue = df_artists_country_concat_sorted["Country"], 
#     fill = True, alpha = 0.5, palette = "Paired", clip = ((0, None), (0, None))
# )

# plt.xlabel("Número de Oyentes")
# plt.ylabel("Artista (Codificado)")
# plt.title("Distribución KDE de Oyentes por Artista y País")
# plt.grid()

# plt.show()