# Exploration Données de Kronyx67

### Importation

In [52]:
import pandas as pd
import plotly.express as px
import numpy as np

### Téléchargement des données 

In [36]:
df = pd.read_csv("../data/Gautier.csv")

# 2. Aperçu rapide
df.head()        # premières lignes

Unnamed: 0,uts,utc_time,artist,artist_mbid,album,album_mbid,track,track_mbid
0,1756792590,"02 Sep 2025, 05:56",eyeto8,,Boy Meets the World,,Sound of Time Passing,
1,1756792013,"02 Sep 2025, 05:46",eyeto8,,The Old Electronic Eldorado,,The Shift,
2,1756791598,"02 Sep 2025, 05:39",eyeto8,,The Old Electronic Eldorado,,Audentity,
3,1756791295,"02 Sep 2025, 05:34",eyeto8,,The Old Electronic Eldorado,,Urgent,
4,1756705616,"01 Sep 2025, 05:46",eyeto8,,The Old Electronic Eldorado,,Lost Disco Thrills,


### Ajout de Colonne supplémentaires

In [37]:
# Convertir la colonne de temps
df["utc_time"] = pd.to_datetime(df["utc_time"], format="%d %b %Y, %H:%M")

# Ajouter des colonnes utiles
df["date"] = df["utc_time"].dt.date
df["year"] = df["utc_time"].dt.year
df["hour"] = df["utc_time"].dt.hour
df["weekday"] = df["utc_time"].dt.day_name()
df["week"] = df["utc_time"].dt.isocalendar().week  # Numéro de semaine ISO

In [38]:
df.head()

Unnamed: 0,uts,utc_time,artist,artist_mbid,album,album_mbid,track,track_mbid,date,year,hour,weekday,week
0,1756792590,2025-09-02 05:56:00,eyeto8,,Boy Meets the World,,Sound of Time Passing,,2025-09-02,2025,5,Tuesday,36
1,1756792013,2025-09-02 05:46:00,eyeto8,,The Old Electronic Eldorado,,The Shift,,2025-09-02,2025,5,Tuesday,36
2,1756791598,2025-09-02 05:39:00,eyeto8,,The Old Electronic Eldorado,,Audentity,,2025-09-02,2025,5,Tuesday,36
3,1756791295,2025-09-02 05:34:00,eyeto8,,The Old Electronic Eldorado,,Urgent,,2025-09-02,2025,5,Tuesday,36
4,1756705616,2025-09-01 05:46:00,eyeto8,,The Old Electronic Eldorado,,Lost Disco Thrills,,2025-09-01,2025,5,Monday,36


In [62]:
# --- Option : sélectionner une année spécifique ---
year_selected = 2025
df_year = df[df["year"] == year_selected]

jours_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# --- Regrouper par semaine et jour ---
heatmap_data = df_year.groupby(['week', 'weekday']).size().reset_index(name='plays')

# Pivot pour créer la matrice
matrix = heatmap_data.pivot(index='weekday', columns='week', values='plays').reindex(jours_order)

# Remplacer les 0 par NaN pour laisser les cases vides
matrix = matrix.replace(0, np.nan)

# --- Étiquettes pour axes ---
semaines = [f"W{w}" for w in matrix.columns]
jours = matrix.index.tolist()

# --- Création de la heatmap ---
fig = px.imshow(
    matrix.values,
    x=semaines,
    y=jours,
    text_auto=True,
    color_continuous_scale='Turbo'
)

# --- Mettre le fond blanc pour les NaN ---
fig.update_traces(
    hovertemplate="%{y}, %{x}: %{z}<extra></extra>",
    zmin=0
)
fig.update_layout(
    title=f"Activité hebdomadaire par semaine - Année {year_selected}",
    xaxis_title="Semaine de l'année",
    yaxis_title="Jour de la semaine",
    plot_bgcolor='white',   # fond de la zone de tracé
    paper_bgcolor='white'   # fond autour du graphique
)

fig.show()

In [None]:
fig = px.line(daily_counts, 
              x="date", 
              y="plays", 
              title="Écoutes par jour", 
              markers=True)

# Nombre maximum d'écoute d'un jour
max_plays = daily_counts["plays"].max()
line_y = max_plays * 1.1  # +5% de marge verticale

fig.add_hline(
    y=max_plays,
    line_dash="dash",
    line_color="red",
    annotation_text=f"Top nombre d'écoute: {max_plays}",
    annotation_position="top left"
)

# Ajuster les limites du graphe pour laisser de l’espace au-dessus
fig.update_yaxes(range=[0, line_y])

fig.show()

In [None]:

artist_counts = df["artist"].value_counts().reset_index()
artist_counts.columns = ["artist", "plays"]
fig = px.bar(
    artist_counts.head(10),
    x="plays",
    y="artist",
    orientation="h",
    title="Top 10 artistes écoutés",
    color="plays",
    color_continuous_scale="viridis"
)
fig.show()


In [21]:
hourly = df.groupby("hour").size().reset_index(name="plays")
px.area(hourly, x="hour", y="plays", title="Répartition des écoutes par heure", markers=True)
