# 🎶 Objectif 1 : Visualisation des Collaborations Musicales

#### Rappel

Visualisation interactive du réseau des collaborations entre artistes basée sur leurs morceaux communs. L'objectif est d'explorer les connexions artistiques en mettant en évidence l'importance des artistes à travers leur nombre de streams et leurs collaborations.

### Installation des packages nécessaires

In [1]:
%pip install pandas networkx plotly dash

Note: you may need to restart the kernel to use updated packages.


Imports et configuration

In [2]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output
import re
import numpy as np
from itertools import combinations

## Chargement et préparation des données
Cette section contient les fonctions pour charger et préparer les données des collaborations musicales.

In [11]:
def load_and_prepare_data(file_path='dataset/dataset_filtered.csv'):
    df = pd.read_csv(file_path)
    df['streams'] = pd.to_numeric(df['streams'], errors='coerce')
    df = df.dropna(subset=['streams'])
    
    def extract_artists(artist_string):
        artist_string = re.sub(r'\s*(feat\.|ft\.|&|,|and)\s*', ',', str(artist_string), flags=re.IGNORECASE)
        return [a.strip() for a in artist_string.split(',')]
    
    df['artists_list'] = df['artistname'].apply(extract_artists)
    return df

def build_graph(df, top_n_artists=500):
    artist_streams_total = df.explode('artists_list').groupby('artists_list')['streams'].sum()
    stream_threshold = artist_streams_total.quantile(0.5)
    top_artists = artist_streams_total[artist_streams_total >= stream_threshold].index
    
    df_filtered = df[df['artists_list'].apply(lambda artists: sum(artist in top_artists for artist in artists) >= 1)]
    
    edges = []
    for idx, row in df_filtered.iterrows():
        artists = row['artists_list']
        streams = row['streams']
        track = row['track']
        if len(artists) >= 1:
            for pair in combinations(artists, 2):
                if pair[0] in top_artists and pair[1] in top_artists:
                    edges.append({
                        'artist1': pair[0],
                        'artist2': pair[1],
                        'streams': streams,
                        'track_name': track,
                        'weight': streams
                    })
    
    edges_df = pd.DataFrame(edges)
    edges_df['streams'] = pd.to_numeric(edges_df['streams'], errors='coerce')
    edges_df = edges_df.dropna(subset=['streams'])
    
    G = nx.Graph()
    for idx, row in edges_df.iterrows():
        artist1, artist2 = row['artist1'], row['artist2']
        if G.has_edge(artist1, artist2):
            G[artist1][artist2]['streams'] += row['streams']
            G[artist1][artist2]['weight'] += row['weight']
            G[artist1][artist2]['tracks'].append(row['track_name'])
        else:
            G.add_edge(artist1, artist2, 
                      streams=row['streams'],
                      weight=row['weight'],
                      tracks=[row['track_name']])
    
    return G, artist_streams_total

## Construction du graphe de collaborations
Cette section construit le graphe des collaborations entre artistes.

### Initialisation des données et de l'application

In [13]:
# Chargement des données
df = load_and_prepare_data()
G, artist_streams_total = build_graph(df)

# Création de l'application Dash
app = Dash(__name__)

# Styles CSS
styles = {
    'container': {
        'max-width': '1200px',
        'margin': '0 auto',
        'padding': '20px'
    },
    'header': {
        'backgroundColor': '#f8f9fa',
        'padding': '20px',
        'borderRadius': '10px',
        'marginBottom': '20px',
        'boxShadow': '0 2px 4px rgba(0,0,0,0.1)'
    },
    'controls': {
        'display': 'flex',
        'justifyContent': 'space-between',
        'alignItems': 'center',
        'marginBottom': '20px'
    }
}

### Layout de l'application

In [14]:
app.layout = html.Div([
    html.Div([
        html.H1("Réseau des Collaborations entre Artistes", 
                style={'textAlign': 'center', 'color': '#2c3e50'}),
        html.P("Explorez les collaborations musicales entre artistes et découvrez leurs connexions.",
               style={'textAlign': 'center', 'color': '#7f8c8d'})
    ], style=styles['header']),
    
    html.Div([
        html.Div([
            html.Div([
                html.Label("Rechercher un artiste:", 
                          style={'fontWeight': 'bold', 'marginBottom': '5px'}),
                dcc.Dropdown(
                    id='artist-search',
                    options=[{'label': artist, 'value': artist} for artist in sorted(G.nodes())],
                    value=None,
                    placeholder='Sélectionnez un artiste...',
                    style={'width': '300px'}
                )
            ], style={'marginBottom': '20px'})
        ], style={'flex': '1'})
    ], style=styles['controls']),
    
    html.Div([
        html.Div([
            dcc.Graph(
                id='network-graph',
                style={'height': '700px'},
                config={
                    'scrollZoom': True,
                    'displayModeBar': True,
                    'modeBarButtonsToAdd': ['select2d', 'lasso2d']
                }
            )
        ], style={'flex': '3'}),
        
        html.Div([
            html.H3("Statistiques", style={'textAlign': 'center', 'color': '#2c3e50'}),
            html.Div(id='stats-panel', children=[
                html.Div(id='selected-artist-stats'),
                html.Hr(style={'margin': '20px 0'}),
                html.Div(id='general-stats')
            ], style={'padding': '20px'})
        ], style={'flex': '1', 'backgroundColor': '#f8f9fa', 'margin': '0 0 0 20px', 'borderRadius': '10px'})
    ], style={'display': 'flex', 'marginBottom': '20px'}),
    
    html.Div([
        html.Label(
            'Seuil minimum de streams :',
            style={'fontWeight': 'bold', 'marginBottom': '10px', 'display': 'block'}
        ),
        dcc.Slider(
            id='stream-threshold-slider',
            min=0,
            max=int(max(artist_streams_total)),
            value=int(max(artist_streams_total) * 0.1),
            marks={
                int(i): f"{int(i/1e6)}M" 
                for i in np.linspace(0, int(max(artist_streams_total)), 6)
            },
            tooltip={'placement': 'bottom', 'always_visible': True}
        )
    ], style={'padding': '20px', 'backgroundColor': '#f8f9fa', 'borderRadius': '10px'})
], style=styles['container'])

### Fonctions auxiliaires et callback

In [15]:
def adjust_positions(pos, min_distance):
    adjusted_pos = pos.copy()
    nodes = list(pos.keys())
    
    for _ in range(25): 
        moved = False
        for i, node1 in enumerate(nodes):
            for node2 in nodes[i+1:]:
                dx = adjusted_pos[node1][0] - adjusted_pos[node2][0]
                dy = adjusted_pos[node1][1] - adjusted_pos[node2][1]
                distance = np.sqrt(dx*dx + dy*dy)
                
                if distance < min_distance:
                    force = (min_distance - distance) / distance
                    move_x = dx * force * 0.5
                    move_y = dy * force * 0.5
                    
                    adjusted_pos[node1][0] += move_x
                    adjusted_pos[node1][1] += move_y
                    adjusted_pos[node2][0] -= move_x
                    adjusted_pos[node2][1] -= move_y
                    moved = True
        
        if not moved:
            break
    
    return adjusted_pos

@app.callback(
    [
        Output('network-graph', 'figure'),
        Output('selected-artist-stats', 'children'),
        Output('general-stats', 'children')
    ],
    [
        Input('stream-threshold-slider', 'value'),
        Input('artist-search', 'value')
    ],
)
def update_visualization(threshold, selected_artist):

    # Filtrage du graphe
    G_filtered = G.copy()
    nodes_to_remove = [
        node for node in G_filtered.nodes()
        if artist_streams_total.get(node, 0) < threshold
    ]
    G_filtered.remove_nodes_from(nodes_to_remove)
    

    # Création du sous-graphe pour l'artiste sélectionné
    if selected_artist and selected_artist in G_filtered:
        neighbors = list(G_filtered.neighbors(selected_artist))
        G_filtered = G_filtered.subgraph([selected_artist] + neighbors)
    
    # Vérification si le graphe n'est pas vide
    if len(G_filtered.nodes()) > 0:
        # Calcul du layout optimisé
        pos = nx.spring_layout(
            G_filtered,
            k=2.0 if not selected_artist else 1.5,
            iterations=100 if not selected_artist else 50,
            weight='weight'
        )
        
        # Ajustement des positions pour éviter les chevauchements
        pos = adjust_positions(pos, min_distance=0.2 if selected_artist else 0.1)
        
        # Création des traces pour les arêtes
        edge_traces = []
        streams_list = [G_filtered[u][v]['streams'] for u, v in G_filtered.edges()]
        
        if streams_list:
            max_streams = max(streams_list)
            min_streams = min(streams_list)
            stream_range = max_streams - min_streams if max_streams != min_streams else 1
            
            for edge in G_filtered.edges(data=True):
                x0, y0 = pos[edge[0]]
                x1, y1 = pos[edge[1]]
                streams = edge[2]['streams']
                
                # Largeur et couleur des arêtes optimisées
                width = 0.5 + (streams - min_streams) / stream_range * 2
                color_intensity = 0.1 + (streams - min_streams) / stream_range * 0.4
                
                # Couleur différente pour les connexions de l'artiste sélectionné
                if selected_artist and (edge[0] == selected_artist or edge[1] == selected_artist):
                    edge_color = f'rgba(255, 0, 0, {color_intensity * 2})'
                else:
                    edge_color = f'rgba(0, 0, 200, {color_intensity})'
                
                # Points pour le hover
                x_hover = np.linspace(x0, x1, 50)
                y_hover = np.linspace(y0, y1, 50)
                
                # Informations détaillées pour le hover
                tracks_info = edge[2]['tracks']
                hover_text = (
                    f"{edge[0]} & {edge[1]}<br>"
                    f"Streams: {streams:,.0f}<br>"
                    f"Collaborations: {len(tracks_info)}<br>"
                    f"Titres: {', '.join(tracks_info[:3])}"
                    f"{'...' if len(tracks_info) > 3 else ''}"
                )
                
                # Création des traces d'arêtes
                edge_traces.extend([
                    go.Scatter(
                        x=[x0, x1, None],
                        y=[y0, y1, None],
                        line=dict(width=width, color=edge_color),
                        hoverinfo='skip',
                        mode='lines'
                    ),
                    go.Scatter(
                        x=x_hover,
                        y=y_hover,
                        mode='lines',
                        line=dict(width=10, color='rgba(0,0,0,0)'),
                        hoverinfo='text',
                        text=hover_text,
                        showlegend=False
                    )
                ])
        
        # Création des traces pour les nœuds
        node_x, node_y, node_size, node_color, node_text, node_hovertext = [], [], [], [], [], []
        
        streams_values = [max(artist_streams_total.get(node, 1), 1) for node in G_filtered.nodes()] 
        log_streams = np.log10(streams_values)
        min_log = min(log_streams)
        max_log = max(log_streams)
        size_range = 45  
        min_size = 5
        
        for node in G_filtered.nodes():

            # Ajout d'un petit décalage aléatoire
            x = pos[node][0] + np.random.uniform(-0.02, 0.02)
            y = pos[node][1] + np.random.uniform(-0.02, 0.02)
            node_x.append(x)
            node_y.append(y)
            
            # Calcul de la taille et de la couleur
            streams = max(artist_streams_total.get(node, 1), 1) 
            log_size = np.log10(streams)
            
            # Éviter les NaN en s'assurant que max_log > min_log
            if max_log > min_log:
                normalized_size = min_size + size_range * ((log_size - min_log) / (max_log - min_log)) ** 2
            else:
                normalized_size = min_size
            
            if node == selected_artist:
                normalized_size *= 1.5
            
            node_size.append(normalized_size)
            node_color.append(streams)
            
            show_this_label = node == selected_artist or (
                selected_artist and 
                selected_artist in G_filtered and 
                node in G_filtered.neighbors(selected_artist)
            )
            node_text.append(node if show_this_label else '')
            
            # Texte au survol
            hover_text = (
                f"{'🎤 ' if node == selected_artist else ''}{node}<br>"
                f"Streams totaux: {streams:,.0f}<br>"
                f"Collaborateurs: {len(list(G_filtered.neighbors(node)))}"
            )
            node_hovertext.append(hover_text)
        
        
        node_trace = go.Scatter(
            x=node_x,
            y=node_y,
            mode='markers+text',
            text=node_text,
            textposition='top center',
            hoverinfo='text',
            hovertext=node_hovertext,
            marker=dict(
                colorscale='Viridis',
                color=node_color,
                size=node_size,
                line=dict(color='black', width=1),
                showscale=True,
                colorbar=dict(
                    title='Streams totaux',
                    titleside='right',
                    tickformat=',.0f',
                    ticksuffix=' streams',
                    thickness=20
                )
            )
        )
        
        # Génération des statistiques pour l'artiste sélectionné
        selected_stats = []
        if selected_artist:
            artist_streams = artist_streams_total.get(selected_artist, 0)
            if artist_streams >= threshold:
                neighbors = list(G_filtered.neighbors(selected_artist))
                total_collabs = sum(len(G_filtered[selected_artist][neighbor]['tracks']) 
                                  for neighbor in neighbors)
                
                selected_stats = html.Div([
                    html.H4(f"Statistiques de {selected_artist}"),
                    html.Ul([
                        html.Li(f"Streams totaux: {artist_streams:,.0f}"),
                        html.Li(f"Nombre de collaborateurs: {len(neighbors)}"),
                        html.Li(f"Nombre total de collaborations: {total_collabs}"),
                        html.Li(
                            f"Collaborateur principal: {max(neighbors, key=lambda x: G_filtered[selected_artist][x]['streams']) if neighbors else 'Aucun'}"
                        )
                    ], style={'listStyleType': 'none', 'padding': '0'})
                ])
            else:
                selected_stats = html.Div([
                    html.H4(f"Statistiques de {selected_artist}"),
                    html.P(f"Cet artiste a {artist_streams:,} streams. Réglez le seuil en dessous de cette valeur pour le voir.",
                          style={'color': 'red'})
                ])
        
        # Statistiques générales
        general_stats = html.Div([
            html.H4("Statistiques globales"),
            html.Ul([
                html.Li(f"Nombre d'artistes visibles: {len(G_filtered.nodes())}"),
                html.Li(f"Nombre de collaborations: {len(G_filtered.edges())}"),
                html.Li(
                    f"Moyenne de streams: {np.mean([artist_streams_total.get(node, 0) for node in G_filtered.nodes()]):,.0f}"
                )
            ], style={'listStyleType': 'none', 'padding': '0'})
        ])
        
        # Configuration du layout
        layout = go.Layout(
            title=dict(
                text=f"{'Réseau de ' + selected_artist if selected_artist else 'Réseau complet des collaborations'}",
                x=0.5,
                font=dict(color='black')
            ),
            paper_bgcolor='white',
            plot_bgcolor='white',
            showlegend=False,
            hovermode='closest',
            margin=dict(b=20, l=5, r=5, t=40),
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, color='black'),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, color='black'),
            annotations=[
                dict(
                    text="Utilisez la molette pour zoomer",
                    showarrow=False,
                    x=0.5,
                    y=1.1,
                    xref='paper',
                    yref='paper',
                    font=dict(color='black', size=12)
                )
            ]
        )
        
        return go.Figure(data=edge_traces + [node_trace], layout=layout), selected_stats, general_stats
    
    return go.Figure(), "Aucun artiste sélectionné", "Aucune donnée à afficher"


### Lancement de l'application

In [16]:
if __name__ == '__main__':
    app.run(debug=True)