In [None]:
import requests
import json
import time
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from scipy import stats
from scipy.stats import zscore

API_KEY = '1a9ea75b5f92dfda381d817fc00e5458' 
USER_AGENT = '808e0a8bbaf9d30bf181edb60c742824'  
LIMIT = 20  ##Número de resultados a obtener

headers = {
    'user-agent': USER_AGENT
}

##sacar top artists
def get_top_artists():
    params = {
        'method': 'chart.getTopArtists',
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['artists']['artist'])
    else:
        print(f"Error en la solicitud de top artistas globales: {response.status_code}")
    time.sleep(1)
    return df

##sacar top artistas por pais
def get_top_artists_by_country(country):
    params = {
        'method': 'geo.getTopArtists',
        'country': country,
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['topartists']['artist'])
    else:
        print(f"Error en la solicitud de top artistas por país: {response.status_code}")
    time.sleep(1)
    return df

##sacar top canciones por pais
def get_top_tracks_by_country(country):
    params = {
        'method': 'geo.getTopTracks',
        'country': country,
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['tracks']['track'])
    else:
        print(f"Error en la solicitud de top canciones por país: {response.status_code}")
    time.sleep(1)
    return df


##sacar top canciones de x artista
def get_top_tracks_by_artist(artist):
    
    params = {
        'method': 'artist.getTopTracks',
        'artist': artist,
        'api_key': API_KEY,
        'format': 'json',
        'limit': LIMIT
    }
    response = requests.get('https://ws.audioscrobbler.com/2.0/', headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        df = pd.json_normalize(data['toptracks']['track'])
    else:
        print(f"Error en la solicitud de top álbumes del artista: {response.status_code}")
    time.sleep(1)
    return df
    
##limpia y ordena dataframe
def limpiar_y_ordenar(df, columnas_a_eliminar=None):
    if columnas_a_eliminar:
        df = df.drop(columns=columnas_a_eliminar)
    if 'listeners' in df.columns:
        df = df.sort_values(by='listeners', ascending=False)
    df = df.reset_index(drop=True)
    return df

In [None]:
df_top_artist = get_top_artists()
df_top_artist

In [None]:
df_top_artist_filtered = limpiar_y_ordenar(df_top_artist, ["mbid", "url", "streamable", "image"])
df_top_artist_filtered

In [None]:
df_top_artist_filtered.info()

In [None]:
df_top_artist_filtered[["playcount", "listeners"]] = df_top_artist_filtered[["playcount", "listeners"]].astype(float)
df_top_artist_filtered.info()

In [None]:
media_playcount = df_top_artist_filtered["playcount"].mean()
media_playcount

In [None]:
media_listeners = df_top_artist_filtered["listeners"].mean()
media_listeners

In [None]:
mediana_playcount = df_top_artist_filtered["playcount"].median()
mediana_playcount

In [None]:
mediana_listeners = df_top_artist_filtered["listeners"].median()
mediana_listeners

In [None]:
sns.barplot(x = "name", y = "playcount", data = df_top_artist_filtered)
plt.xticks(rotation=90)
plt.axhline(mediana_playcount, color='red', linestyle='--', label=f'Mediana: {mediana_playcount}')
plt.axhline(media_playcount, color='green', linestyle='--', label=f'Media: {media_playcount}')
plt.legend()
plt.show()

In [None]:
sns.barplot(x = "name", y = "listeners", data = df_top_artist_filtered)
plt.xticks(rotation=90)
plt.axhline(mediana_listeners, color='red', linestyle='--', label=f'Mediana: {mediana_listeners}')
plt.axhline(media_listeners, color='green', linestyle='--', label=f'Media: {media_listeners}')
plt.legend()
plt.title("Oyentes por Artista con Media y Mediana")
plt.xlabel("Artistas")
plt.ylabel("Oyentes")
plt.show()

In [None]:
sns.barplot(x = "listeners", y = "playcount", data = df_top_artist_filtered)
plt.xticks(rotation=90)
plt.title("Reproducciones por Oyentes")
plt.xlabel("Oyentes")
plt.ylabel("Reproducciones")
plt.show()

In [None]:
listeners= df_top_artist_filtered["listeners"]
playcount= df_top_artist_filtered["playcount"]
stats.pearsonr(listeners, playcount)[0]

In [None]:
sns.set(style='whitegrid')
sns.lineplot(data=df_top_artist_filtered, x='listeners', y='playcount', marker='o')

plt.title("Relación entre Reproducciones y Oyentes")
plt.ylabel('Oyentes')
plt.xlabel('Reproducciones')
plt.tight_layout()
plt.show()
#El número de oyentes de los artistas no influye directamente en el número de reproducciones.

In [None]:
#TOP 20 CANCIONES POR PAIS
df_tracks_Spain = get_top_tracks_by_country("Spain")
df_tracks_france = get_top_tracks_by_country("France")
df_tracks_germany = get_top_tracks_by_country("Germany")
df_tracks_uk = get_top_tracks_by_country("United Kingdom")
df_tracks_usa= get_top_tracks_by_country("United States")
df_tracks_Russia = get_top_tracks_by_country("Russian Federation")

df_tracks_Spain

In [None]:
df_tracks_Spain_filtered = limpiar_y_ordenar(df_tracks_Spain,["mbid", "url", "image", "streamable.#text", "streamable.fulltrack", "artist.mbid",	"artist.url", "@attr.rank"])
df_tracks_france_filtered = limpiar_y_ordenar(df_tracks_france,["mbid", "url", "image", "streamable.#text", "streamable.fulltrack", "artist.mbid",	"artist.url", "@attr.rank"])
df_tracks_germany_filtered = limpiar_y_ordenar(df_tracks_germany,["mbid", "url", "image", "streamable.#text", "streamable.fulltrack", "artist.mbid",	"artist.url", "@attr.rank"])
df_tracks_uk_filtered = limpiar_y_ordenar(df_tracks_uk,["mbid", "url", "image", "streamable.#text", "streamable.fulltrack", "artist.mbid",	"artist.url", "@attr.rank"])
df_tracks_usa_filtered = limpiar_y_ordenar(df_tracks_usa,["mbid", "url", "image", "streamable.#text", "streamable.fulltrack", "artist.mbid",	"artist.url", "@attr.rank"])
df_tracks_Russia_filtered = limpiar_y_ordenar(df_tracks_Russia,["mbid", "url", "image", "streamable.#text", "streamable.fulltrack", "artist.mbid",	"artist.url", "@attr.rank"])    

df_tracks_Spain_filtered

In [None]:
df_tracks_Spain_filtered[["duration", "listeners"]] = df_tracks_Spain_filtered[["duration", "listeners"]].astype(float)
df_tracks_france_filtered[["duration", "listeners"]] = df_tracks_france_filtered[["duration", "listeners"]].astype(float)
df_tracks_germany_filtered[["duration", "listeners"]] = df_tracks_germany_filtered[["duration", "listeners"]].astype(float)
df_tracks_uk_filtered[["duration", "listeners"]] = df_tracks_uk_filtered[["duration", "listeners"]].astype(float)
df_tracks_usa_filtered[["duration", "listeners"]] = df_tracks_usa_filtered[["duration", "listeners"]].astype(float)
df_tracks_Russia_filtered[["duration", "listeners"]] = df_tracks_Russia_filtered[["duration", "listeners"]].astype(float)

df_tracks_Spain_filtered.info()

In [None]:
df_tracks_Spain_filtered['pais'] = 'Spain'
df_tracks_france_filtered['pais'] = "France"
df_tracks_germany_filtered['pais'] = "Germany"
df_tracks_uk_filtered['pais'] = "United Kingdom"
df_tracks_usa_filtered['pais'] = "United States"
df_tracks_Russia_filtered ['pais'] = "Russian Federation"

df_tracks_Spain_filtered

In [None]:
df_tracks_concat = pd.concat([df_tracks_Spain_filtered, df_tracks_france_filtered, df_tracks_germany_filtered, df_tracks_uk_filtered, df_tracks_usa_filtered, df_tracks_Russia_filtered ], axis = 0)
df_tracks_concat

In [None]:
df_tracks_concat_sorted = df_tracks_concat.sort_values("listeners", ascending = False)
df_tracks_concat_sorted

In [None]:
media_Spain = df_tracks_Spain_filtered["listeners"].mean()
media_france= df_tracks_france_filtered["listeners"].mean()
media_germany= df_tracks_germany_filtered["listeners"].mean()
media_uk= df_tracks_uk_filtered["listeners"].mean()
media_usa= df_tracks_usa_filtered["listeners"].mean()
media_Russia= df_tracks_Russia_filtered["listeners"].mean() 

print(f"La media de oyentes en Spain es {media_Spain}")
print(f"La media de oyentes en France es {media_france}")
print(f"La media de oyentes en Germany es {media_germany}")
print(f"La media de oyentes en United Kingdom es {media_uk}")
print(f"La media de oyentes en United States es {media_usa}")
print(f"La media de oyentes en Russia es {media_Russia}")

In [None]:
mediana_Spain = df_tracks_Spain_filtered["listeners"].median()
mediana_france= df_tracks_france_filtered["listeners"].median()
mediana_germany= df_tracks_germany_filtered["listeners"].median()
mediana_uk= df_tracks_uk_filtered["listeners"].median()
mediana_usa= df_tracks_usa_filtered["listeners"].median()
mediana_Russia= df_tracks_Russia_filtered["listeners"].median() 

print(f"La mediana de oyentes en Spain es {mediana_Spain}")
print(f"La mediana de oyentes en France es {mediana_france}")
print(f"La mediana de oyentes en Germany es {mediana_germany}")
print(f"La mediana de oyentes en United Kingdom es {mediana_uk}")
print(f"La mediana de oyentes en United States es {mediana_usa}")
print(f"La mediana de oyentes en Russia es {mediana_Russia}")

In [None]:
canciones_por_artista = df_tracks_concat_sorted['artist.name'].value_counts()
canciones_por_artista 

In [None]:
plt.figure(figsize = (10, 6))
sns.barplot(x = canciones_por_artista.index, y = canciones_por_artista.values)
plt.xticks(rotation=90, fontsize=8)
plt.xlabel("Artistas")
plt.ylabel("Canciones en el TOP")
plt.title("Canciones por artista")
plt.show()

In [None]:
pais = df_tracks_concat_sorted['pais'].unique()
 
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 10))
axes = axes.flatten()
 
for i, pais in enumerate(pais):
    datos_pais = df_tracks_concat_sorted[df_tracks_concat_sorted['pais'] == pais]
    sns.barplot(x='listeners', y='name', data=datos_pais, ax=axes[i])
    axes[i].set_title(f'Oyentes de {pais}')
    axes[i].set_xlabel('Oyentes')
    axes[i].set_ylabel('Canción')
 
plt.tight_layout()
plt.show()

In [None]:
df_duration_media= df_tracks_concat_sorted["duration"].map(lambda x : x if x > 0 else df_tracks_concat_sorted["duration"].mean())

sns.kdeplot(x = df_duration_media,
            y = df_tracks_concat_sorted["listeners"])

plt.xlabel("Duración")
plt.ylabel("Oyentes")
plt.title("Relación entre Oyentes y duración de canción")
plt.xticks(rotation=90)
plt.show()

In [None]:
artista_pais_esp = df_tracks_Spain_filtered['artist.name'].value_counts()
artista_pais_fr = df_tracks_france_filtered['artist.name'].value_counts()
artista_pais_ger =df_tracks_germany_filtered['artist.name'].value_counts()
artista_pais_uk=df_tracks_uk_filtered['artist.name'].value_counts()
artista_pais_usa=df_tracks_usa_filtered['artist.name'].value_counts()
artista_pais_Russia=df_tracks_Russia_filtered['artist.name'].value_counts()

print(f"Spain \n {artista_pais_esp}")
print(f"France \n {artista_pais_fr}")
print(f"Germany \n {artista_pais_ger}")
print(f"United Kingdom \n {artista_pais_uk}")
print(f"United States \n {artista_pais_usa}")
print(f"Russia \n {artista_pais_Russia}")

In [None]:
sns.histplot(df_tracks_concat_sorted["listeners"], color = "green", kde = True)

plt.xlabel("Oyentes")
plt.ylabel("Frecuencia")
plt.title("Frecuencia de Oyentes")
plt.show()

stats.skew(df_tracks_concat_sorted["listeners"])
#Asimetría negativa (cola a la izquierda)

In [None]:
##CANCIONES POR ARTISTA
df_lana = get_top_tracks_by_artist("Lana del Rey")
df_ic3peak = get_top_tracks_by_artist("Ic3peak")
df_billie = get_top_tracks_by_artist("Billie Eilish")
df_sia = get_top_tracks_by_artist("Sia")
df_estopa = get_top_tracks_by_artist("Estopa")
df_badbunny = get_top_tracks_by_artist("Bad Bunny")

In [None]:
df_lana_filtered = limpiar_y_ordenar(df_lana,["url","streamable","image","artist.url", "@attr.rank","mbid","artist.mbid"])
df_ic3peak_filtered = limpiar_y_ordenar(df_ic3peak,["url","streamable","image","artist.url", "@attr.rank","artist.mbid"])
df_billie_filtered = limpiar_y_ordenar(df_billie,["url","streamable","image","artist.url", "@attr.rank"])
df_sia_filtered = limpiar_y_ordenar(df_sia,["url","streamable","image","artist.url", "@attr.rank","mbid","artist.mbid"])
df_estopa_filtered = limpiar_y_ordenar(df_estopa,["url","streamable","image","artist.url", "@attr.rank","mbid","artist.mbid"])
df_badbunny_filtered = limpiar_y_ordenar(df_badbunny,["url","streamable","image","artist.url", "@attr.rank"])

In [None]:
df_lana_filtered[["playcount", "listeners"]] = df_lana_filtered[["playcount", "listeners"]].astype(float)
df_ic3peak_filtered[["playcount", "listeners"]] = df_ic3peak_filtered[["playcount", "listeners"]].astype(float)
df_billie_filtered[["playcount", "listeners"]] = df_billie_filtered[["playcount", "listeners"]].astype(float)
df_sia_filtered[["playcount", "listeners"]] = df_sia_filtered[["playcount", "listeners"]].astype(float)
df_estopa_filtered[["playcount", "listeners"]] = df_estopa_filtered[["playcount", "listeners"]].astype(float)
df_badbunny_filtered[["playcount", "listeners"]] = df_badbunny_filtered[["playcount", "listeners"]].astype(float)

In [None]:
df_concatenated = pd.concat([df_lana_filtered, df_ic3peak_filtered, df_billie_filtered, df_sia_filtered, df_estopa_filtered, df_badbunny_filtered], ignore_index=True)
df_top_canciones = df_concatenated.sort_values(by="listeners", ascending=False).reset_index(drop=True)

In [None]:
reproducciones_por_artista = df_top_canciones.groupby('artist.name')['playcount'].sum().sort_values(ascending=False)

oyentes_por_artista = df_top_canciones.groupby('artist.name')['listeners'].sum().sort_values(ascending=False)

In [None]:
##Gráfico de barras para reproducciones
sns.barplot(x=reproducciones_por_artista.values, y=reproducciones_por_artista.index)
plt.title('Reproducciones por Artista')
plt.xlabel('Reproducciones')
plt.ylabel('Artista')
plt.show()

##Gráfico de barras para oyentes
sns.barplot(x=oyentes_por_artista.values, y=oyentes_por_artista.index)
plt.title('Oyentes por Artista')
plt.xlabel('Oyentes')
plt.ylabel('Artista')
plt.show()

In [None]:
top_canciones = df_top_canciones.sort_values(by='playcount', ascending=False).head(10)

sns.barplot(x='playcount', y='name', data=top_canciones, hue='artist.name')
plt.title('Top 10 Canciones por Reproducciones')
plt.xlabel('Reproducciones')
plt.ylabel('Canción')
plt.legend(title='Artista')
plt.show()


In [None]:
##correlacion de pearson
listeners= df_top_canciones["listeners"]
playcount= df_top_canciones["playcount"]
stats.pearsonr(listeners, playcount)[0]

In [None]:
##relacion oyentes y reproducciones
sns.scatterplot(data=df_top_canciones, x='listeners', y='playcount', hue='artist.name')
plt.title('Oyentes vs Reproducciones por Canción')
plt.xlabel('Oyentes')
plt.ylabel('Reproducciones')
plt.legend(title='Artista')
plt.show()

In [None]:
df_top_canciones['Reproducciones_por_Oyente'] = df_top_canciones['playcount'] / df_top_canciones['listeners']

sns.scatterplot(data=df_top_canciones, x='Reproducciones_por_Oyente', y='playcount', hue='artist.name')
plt.title('Reproducciones por Oyente vs Reproducciones Totales')
plt.xlabel('Reproducciones por Oyente')
plt.ylabel('Reproducciones Totales')
plt.legend(title='Artista')
plt.show()

In [None]:
mediana = df_top_canciones['Reproducciones_por_Oyente'].median()

##calcular IQR
Q1 = df_top_canciones['Reproducciones_por_Oyente'].quantile(0.25)
Q3 = df_top_canciones['Reproducciones_por_Oyente'].quantile(0.75)
IQR = Q3 - Q1

# Definir los límites para los outliers
limite_inferior = Q1 - 1.5 * IQR
limite_superior = Q3 + 1.5 * IQR

##columna outlier
df_top_canciones['Es_Outlier'] = df_top_canciones['Reproducciones_por_Oyente'].apply(
    lambda x: 'Outlier' if x < limite_inferior or x > limite_superior else 'Normal')

sns.histplot(data=df_top_canciones, x='Reproducciones_por_Oyente', hue='Es_Outlier', bins=30, palette={'Normal': 'skyblue', 'Outlier': 'orange'}, multiple='stack')

##mediana
plt.axvline(mediana, color='green', linestyle='--', label=f'Mediana: {mediana:.2f}')
plt.axvline(x = Q1 - 1.5 * IQR, color = "coral", linestyle = "--", label = "Límite Q3")
plt.axvline(x = Q3 + 1.5 * IQR, color = "coral", linestyle = "--", label = "Límite Q1")


# Personalizar el gráfico
plt.title('Distribución de Reproducciones por Oyente con Outliers')
plt.xlabel('Reproducciones por Oyente')
plt.ylabel('Frecuencia')
plt.legend()
plt.show()

In [None]:
artistas = df_top_canciones['artist.name'].unique()

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(18, 10))
axes = axes.flatten()

for i, artista in enumerate(artistas):
    datos_artista = df_top_canciones[df_top_canciones['artist.name'] == artista]
    sns.barplot(x='playcount', y='name', data=datos_artista, ax=axes[i])
    axes[i].set_title(f'Reproducciones de {artista}')
    axes[i].set_xlabel('Reproducciones')
    axes[i].set_ylabel('Canción')

plt.tight_layout()
plt.show()

In [None]:
##Distribución por reproducciones
sns.histplot(df_top_canciones['playcount'], bins=20, kde=True)
plt.title('Distribución de Reproducciones')
plt.xlabel('Reproducciones')
plt.ylabel('Frecuencia')
plt.show()

##Distribución por oyentes
sns.histplot(df_top_canciones['listeners'], bins=20, kde=True)
plt.title('Distribución de Oyentes')
plt.xlabel('Oyentes')
plt.ylabel('Frecuencia')
plt.show()

In [None]:
df_top_canciones['Reproducciones_Log'] = np.log1p(df_top_canciones['playcount'])
df_top_canciones['Oyentes_Log'] = np.log1p(df_top_canciones['listeners'])

##Distribución por reproducciones Log
sns.histplot(df_top_canciones['Reproducciones_Log'], bins=20, kde=True)
plt.title('Distribución de Reproducciones')
plt.xlabel('Reproducciones')
plt.ylabel('Frecuencia')
plt.show()

##Distribución por oyentes Log
sns.histplot(df_top_canciones['Oyentes_Log'], bins=20, kde=True)
plt.title('Distribución de Oyentes')
plt.xlabel('Oyentes')
plt.ylabel('Frecuencia')
plt.show()

In [None]:
# ARTISTAS POR PAIS
df_artist_DE = get_top_artists_by_country("Germany")
df_artist_ES = get_top_artists_by_country("Spain")
df_artist_FR = get_top_artists_by_country("France")
df_artist_RUS = get_top_artists_by_country("Russian Federation")
df_artist_UK = get_top_artists_by_country("United Kingdom")
df_artist_US = get_top_artists_by_country("United States")

df_artist_DE

In [None]:
df_artist_DE_filtered = limpiar_y_ordenar(df_artist_DE, ["mbid", "url", "streamable", "image"])
df_artist_ES_filtered = limpiar_y_ordenar(df_artist_ES, ["mbid", "url", "streamable", "image"])
df_artist_FR_filtered = limpiar_y_ordenar(df_artist_FR, ["mbid", "url", "streamable", "image"])
df_artist_RUS_filtered = limpiar_y_ordenar(df_artist_RUS, ["mbid", "url", "streamable", "image"])
df_artist_UK_filtered = limpiar_y_ordenar(df_artist_UK, ["mbid", "url", "streamable", "image"])
df_artist_US_filtered = limpiar_y_ordenar(df_artist_US, ["mbid", "url", "streamable", "image"])

df_artist_DE_filtered

In [None]:
df_artist_DE_filtered["Country"] = "Germany"
df_artist_ES_filtered["Country"] = "Spain"
df_artist_FR_filtered["Country"] = "France"
df_artist_RUS_filtered["Country"] = "Russia"
df_artist_UK_filtered["Country"] = "United Kingdom"
df_artist_US_filtered["Country"] = "United States"

df_artist_DE_filtered.info()
df_artist_DE_filtered

In [None]:
df_artist_DE_filtered["listeners"] = df_artist_DE_filtered["listeners"].astype(float)
df_artist_ES_filtered["listeners"] = df_artist_ES_filtered["listeners"].astype(float)
df_artist_FR_filtered["listeners"] = df_artist_FR_filtered["listeners"].astype(float)
df_artist_RUS_filtered["listeners"] = df_artist_RUS_filtered["listeners"].astype(float)
df_artist_UK_filtered["listeners"] = df_artist_UK_filtered["listeners"].astype(float)
df_artist_US_filtered["listeners"] = df_artist_US_filtered["listeners"].astype(float)

df_artist_DE_filtered.info()
df_artist_DE_filtered

In [None]:
media_listeners_DE = df_artist_DE_filtered["listeners"].mean()
media_listeners_ES = df_artist_ES_filtered["listeners"].mean()
media_listeners_FR = df_artist_FR_filtered["listeners"].mean()
media_listeners_RUS = df_artist_RUS_filtered["listeners"].mean()
media_listeners_UK = df_artist_UK_filtered["listeners"].mean()
media_listeners_US = df_artist_US_filtered["listeners"].mean()

print(f"La media de oyentes en Alemania: {media_listeners_DE}")
print(f"La media de oyentes en Spain: {media_listeners_ES}")
print(f"La media de oyentes en France: {media_listeners_FR}")
print(f"La media de oyentes en Russia: {media_listeners_RUS}")
print(f"La media de oyentes en Reino Unido: {media_listeners_UK}")
print(f"La media de oyentes en Estados Unidos: {media_listeners_US}")

In [None]:
mediana_listeners_DE = df_artist_DE_filtered["listeners"].median()
mediana_listeners_ES = df_artist_ES_filtered["listeners"].median()
mediana_listeners_FR = df_artist_FR_filtered["listeners"].median()
mediana_listeners_RUS = df_artist_RUS_filtered["listeners"].median()
mediana_listeners_UK = df_artist_UK_filtered["listeners"].median()
mediana_listeners_US = df_artist_US_filtered["listeners"].median()

print(f"La mediana de oyentes en Alemania: {mediana_listeners_DE}")
print(f"La mediana de oyentes en Spain: {mediana_listeners_ES}")
print(f"La mediana de oyentes en France: {mediana_listeners_FR}")
print(f"La mediana de oyentes en Russia: {mediana_listeners_RUS}")
print(f"La mediana de oyentes en Reino Unido: {mediana_listeners_UK}")
print(f"La mediana de oyentes en Estados Unidos: {mediana_listeners_US}")

In [None]:
df_artists_country_concat = pd.concat([df_artist_DE_filtered, df_artist_ES_filtered, df_artist_FR_filtered, df_artist_RUS_filtered, df_artist_UK_filtered, df_artist_US_filtered])

df_artists_country_concat

In [None]:
df_artists_country_concat_sorted = df_artists_country_concat.sort_values("listeners", ascending = False)
df_artists_country_concat_sorted

In [None]:
countries_by_artist_count = df_artists_country_concat_sorted["name"].value_counts()
countries_by_artist_count

In [None]:
plt.figure(figsize = (10, 6))
sns.barplot(x = "Country", y = "listeners", hue = "Country", data = df_artists_country_concat_sorted, palette = "Paired", legend = False)

plt.xlabel("Países")
plt.ylabel("Oyentes")
plt.title("Oyentes por País")
plt.grid(axis = "y")

plt.show()

In [None]:
df_artists_country_concat_sorted["Oyentes_Log"] = np.log1p(df_artists_country_concat_sorted["listeners"])

plt.figure(figsize = (10, 6))
sns.boxplot(x = "Country", y = "Oyentes_Log", hue = "Country", legend = False, data = df_artists_country_concat_sorted, palette = "Paired")

plt.xlabel("Paises")
plt.ylabel("Log (Oyentes)")
plt.title("Oyentes por País Log")
plt.grid()

plt.show()

In [None]:
media_oyentes = df_artists_country_concat_sorted["listeners"].mean()
mediana_oyentes = df_artists_country_concat_sorted["listeners"].median()

plt.figure(figsize =(10, 10))
plt.barh(df_artists_country_concat_sorted["name"], df_artists_country_concat_sorted["listeners"], color="steelblue")

plt.axvline(x = media_oyentes, color = "yellowgreen", linestyle = "--", label = f"Media: {int(media_oyentes)}")
plt.axvline(x = mediana_oyentes, color = "coral", linestyle = "--", label = f"Mediana: {int(mediana_oyentes)}")

plt.xlabel("Oyentes")
plt.ylabel("Artista")
plt.title("Oyentes por Artista con Media y Mediana")
plt.legend()
plt.grid()

plt.show()

In [None]:
df_artists_country_concat_sorted["Z_Score"] = zscore(df_artists_country_concat_sorted["listeners"])

plt.figure(figsize = (10, 10))
sns.barplot(y = df_artists_country_concat_sorted["name"], 
            x = df_artists_country_concat_sorted["Z_Score"], 
            hue = df_artists_country_concat_sorted["name"], 
            legend = False, palette = "Spectral")

plt.axvline(x = 2.5, color = "coral", linestyle = "--", label = "Outlier (>3)")
plt.axvline(x = -2.5, color = "coral", linestyle = "--", label = "Outlier (<-3)")
plt.axvline(x = 0, color = "yellowgreen", linestyle = "--", label = "Media (Z-score = 0)")

plt.xlabel("Oyentes")
plt.ylabel("Artistas")
plt.title("Oyentes por Artista con Z-score")
plt.grid(axis = "x")
plt.legend()

plt.show()

In [None]:
countries = df_artists_country_concat_sorted["Country"].unique()

fig, axes = plt.subplots(nrows = 2, ncols = 3, figsize = (18, 10))
axes = axes.flatten()

for i, country in enumerate(countries):
    df_country = df_artists_country_concat_sorted[df_artists_country_concat_sorted["Country"] == country]
    
    sns.barplot(x = "listeners", y = "name", hue = "Country", data = df_country, ax = axes[i], palette = "viridis", legend = False)
    axes[i].set_title(f"Oyentes por artista en {country}")
    axes[i].set_xlabel("Oyentes")
    axes[i].set_ylabel("Artistas")

plt.tight_layout()
plt.show()

In [None]:
df_total_listeners_by_artist = df_artists_country_concat_sorted.groupby("name", as_index = False)["listeners"].sum()

plt.figure(figsize = (10, 10))
sns.barplot(x = "listeners", y = "name", hue = "name", data = df_total_listeners_by_artist, palette = "Spectral", legend = False)

plt.xlabel("Oyentes Totales")
plt.ylabel("Artistas")
plt.title("Oyentes Totales por Artista")
plt.grid(axis = "x")

plt.show()

In [None]:
df_total_listeners_by_artist = df_artists_country_concat_sorted.groupby(["name", "Country"], as_index = False)["listeners"].sum()
df_most_popular = df_total_listeners_by_artist[df_total_listeners_by_artist["listeners"] > 5e6]

plt.figure(figsize = (10, 8))
sns.scatterplot(x = "Country", y = "name", size = "listeners", sizes = (100, 1000), alpha = 0.6, hue = "Country", palette = "Paired", data = df_most_popular)

plt.xlabel("Paises")
plt.ylabel("Artistas")
plt.title("Artistas por país con mas de 5M de oyentes")
plt.grid()
plt.legend(title = "País")

plt.show()

In [None]:
df_total_listeners_by_artist = df_artists_country_concat_sorted.groupby(["name", "Country"], as_index = False)["listeners"].sum()
df_most_popular = df_total_listeners_by_artist[df_total_listeners_by_artist["listeners"] > 5e6]

plt.figure(figsize = (10, 8))
sns.violinplot(x = "Country", y = "listeners", hue = "Country", legend = False, data = df_most_popular, palette = "Paired")

plt.xlabel("Países")
plt.ylabel("Oyentes")
plt.title("Distribución de oyentes por país para artistas con más de 5M de oyentes")
plt.grid()

plt.show()