In [2]:
import networkx as nx
import pandas as pd
from collections import defaultdict
import numpy as np

from Skeletons.Lab_AGX_202223_S1_skeleton import get_track_data

In [19]:
def retrieve_bidirectional_edges(g: nx.DiGraph, out_filename: str) -> nx.Graph:
    """
    Convert a directed graph into an undirected graph by considering bidirectional edges only.

    :param g: a networkx digraph.
    :param out_filename: name of the file that will be saved.
    :return: a networkx undirected graph.
    """
    undirected_graph = nx.Graph()

    bidirectional_edges = []    #List of tuples. Each tuple represents and edge
    nodes_attributes    = {}    #Dictionary to store all the information of the nodes
    # Iterate over the edges of the directed graph
    for u, v in g.edges():       
        if g.has_edge(v, u):  # Check if both edges (v, u) and (u, v) exist
            bidirectional_edges.append((u, v))      #Store the nodes that have a bidirectional edge
            nodes_attributes[v] = g.nodes()[v]      #And their attributes
            nodes_attributes[u] = g.nodes()[u]
            
    undirected_graph.add_edges_from(bidirectional_edges)    #Add all the new nodes, with the edges
    nx.set_node_attributes(undirected_graph, nodes_attributes)  #Add the nodes attributes retrive earlier.
    nx.write_graphml_lxml(undirected_graph, out_filename)  # Write the undirected graph to a GraphML file
    return undirected_graph  # Return the undirected graph

def prune_low_degree_nodes(g: nx.Graph, min_degree: int, out_filename: str) -> nx.Graph:
    """
    Prune a graph by removing nodes with degree < min_degree.

    :param g: a networkx graph.
    :param min_degree: lower bound value for the degree.
    :param out_filename: name of the file that will be saved.
    :return: a pruned networkx graph.
    """
    # ------- IMPLEMENT HERE THE BODY OF THE FUNCTION ------- #

    # Create a list of nodes to remove based on their degree being less than min_degree
    remove_nodes = [id for id, degree in g.degree() if degree < min_degree]
    
    # Remove the nodes from the graph
    g.remove_nodes_from(remove_nodes)

    # Create a list of nodes with zero degree
    zero_degree_nodes = [id for id, degree in g.degree() if degree == 0]
    
    # Remove the nodes with zero degree from the graph
    g.remove_nodes_from(zero_degree_nodes)

    # ----------------- END OF FUNCTION --------------------- #
    nx.write_graphml_lxml(g, out_filename)  # Write the pruned graph to a GraphML file
    return g  # Return the pruned graph


def prune_low_weight_edges(g: nx.Graph, min_weight=None, min_percentile=None, out_filename: str = None) -> nx.Graph:
    """
    Prune a graph by removing edges with weight < threshold. Threshold can be specified as a value or as a percentile.
    :param g: a weighted networkx graph.
    :param min_weight: lower bound value for the weight.
    :param min_percentile: lower bound percentile for the weight.
    :param out_filename: name of the file that will be saved.
    :return: a pruned networkx graph.
    """
    
    if ((min_weight == None) and (min_percentile == None)) or ((min_weight != None) and (min_percentile != None)):
        raise Exception  # Raise an exception if both min_weight and min_percentile are None or if both are not None.
    
    if min_percentile != None:
        sorted_weights = sorted([data["weight"] for _, _, data in g.edges(data=True)])  # Get a sorted list of edge weights
        pos = int((min_percentile * 100) * len(sorted_weights) - 1)  # Calculate the position based on percentile
        min_weight = sorted_weights[pos]  # Set the min_weight based on the calculated position in the sorted list

    # Find edges with weight less than min_weight and remove them from the graph
    remove_edges = [(u, v) for u, v, data in g.edges(data=True) if data["weight"] < min_weight]
    g.remove_edges_from(remove_edges)

    # Find nodes with zero degree and remove them from the graph
    zero_degree_nodes = [id for id, degree in g.degree() if degree == 0]
    g.remove_nodes_from(zero_degree_nodes)

    # Write the pruned graph to a file if out_filename is specified

    # Uncomment the line below to save the pruned graph as a graphml file
    nx.write_graphml_lxml(g, out_filename)
    return g

def compute_mean_audio_features(tracks_df: pd.DataFrame, out_filename) -> pd.DataFrame:
    """
    Compute the mean audio features for tracks of the same artist.

    :param tracks_df: tracks dataframe (with audio features per each track).
    :return: artist dataframe (with mean audio features per each artist).
    """

    mean_audio_features = {}  # Dictionary to store mean audio features for each artist

    # Convert string representations of dictionaries to actual dictionaries
    tracks_df["audio_feature"] = tracks_df["audio_feature"].apply(eval)
    tracks_df["song_data"] = tracks_df["song_data"].apply(eval)
    tracks_df["artists"] = tracks_df["artists"].apply(eval)
    tracks_df['albums'] = tracks_df['albums'].apply(eval)

    # Get unique artist names from the dataframe
    artist_names = tracks_df['artists'].apply(lambda x: x.get('name')).unique()

    for artist_name in artist_names:
        filtered_df = tracks_df[tracks_df['artists'].apply(lambda x: x.get('name') == artist_name)]
        artist_id = filtered_df["artists"].iat[0]['id']  # Get the artist ID from the first row

        grouped_audio_features = defaultdict(int)  # Defaultdict to store aggregated audio features
        num_songs = len(filtered_df)  # Number of songs for the artist

        for entry in filtered_df["audio_feature"].values:
            for key in entry:
                grouped_audio_features[key] += entry[key]  # Aggregate audio features

        # Calculate mean audio features
        mean_audio = {key: round(grouped_audio_features[key] / num_songs, 2) for key in grouped_audio_features}
        mean_audio_features[artist_id] = {"artist_id":artist_id, 
                                          "artist_name": artist_name,                              
                                          "danceability": mean_audio['danceability'],
                                          "energy": mean_audio['energy'],
                                          "loudness": mean_audio['loudness'],
                                          "speechiness": mean_audio['speechiness'],
                                          "acousticness": mean_audio['acousticness'],
                                          "instrumentalness":mean_audio['instrumentalness'],
                                          "liveness": mean_audio['liveness'],
                                          "valence": mean_audio['valence'],
                                          "tempo": mean_audio['tempo']
                                          }

    df = pd.DataFrame.from_dict(mean_audio_features, orient="index")
    df.to_csv(out_filename, index=False)
    return df


def cosine_similiarity(vector1, vector2):
    return np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))

def euclidean_similiarity(vector1, vector2):
    return 1 / (1 + np.linalg.norm(vector1 - vector2))

def create_similarity_graph(artist_audio_features_df: pd.DataFrame, similarity: str, out_filename: str = None) -> \
        nx.Graph:
    
    """
    Create a similarity graph from a dataframe with mean audio features per artist.

    :param artist_audio_features_df: dataframe with mean audio features per artist.
    :param similarity: the name of the similarity metric to use (e.g. "cosine" or "euclidean").
    :param out_filename: name of the file that will be saved.
    :return: a networkx graph with the similarity between artists as edge weights.
    """
    # ------- IMPLEMENT HERE THE BODY OF THE FUNCTION ------- #
    audio_features = ['danceability', 'energy', 'loudness',
       'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo']
    
    edges_weights = []
    audio_features_mean = artist_audio_features_df[audio_features]
    artist_id = artist_audio_features_df['artist_id']

    for u in artist_id:
        vector1 = audio_features_mean.loc[u].values
        for v in artist_id:
            if v == u:
                continue
            vector2 = audio_features_mean.loc[v].values
            if similarity == 'cosine':
                similarity = cosine_similiarity(vector1, vector2)
            else:
                similarity = euclidean_similiarity(vector1, vector2)
            edges_weights.append((u, v, {"weight":similarity}))

    # Create an empty graph
    graph = nx.Graph()
    # Add nodes and weighted edges to the graph
    graph.add_edges_from(edges_weights)
    
    #nx.write_graphml(graph, out_filename)
    return graph
    # ----------------- END OF FUNCTION --------------------- #

In [47]:
Graph_D_pandas = pd.read_csv('/Users/nbiescas/Desktop/Graphs/Graphs_data/Pandas_Graph_D.csv', index_col='song_id')

In [48]:
Mean_audio_features_data = compute_mean_audio_features(Graph_D_pandas, 'Mean_Audio_Features_graph_D.csv')
result = create_similarity_graph(Mean_audio_features_data, similarity = 'cosine')

In [20]:
artists_audio_feat  = pd.read_csv('/Users/nbiescas/Desktop/Graphs/Graphs_data/D.csv')

In [21]:
artists_audio_feat

Unnamed: 0,song_id,song_data,audio_feature,albums,artists
0,6S4WM9LW6oGkAtBFePffh2,"{'id': '6S4WM9LW6oGkAtBFePffh2', 'duration_ms'...","{'danceability': 0.731, 'energy': 0.663, 'loud...","{'id': '1GG6U2SSJPHO6XsFiBzxYv', 'name': 'CALL...","{'id': '68kEuyFKyqrdQQLLsmiatm', 'name': 'Vinc..."
1,0lqAn1YfFVQ3SdoF7tRZO2,"{'id': '0lqAn1YfFVQ3SdoF7tRZO2', 'duration_ms'...","{'danceability': 0.593, 'energy': 0.947, 'loud...","{'id': '2G549zeda2XNICgLmU0pNW', 'name': 'RAMO...","{'id': '68kEuyFKyqrdQQLLsmiatm', 'name': 'Vinc..."
2,4uQ7wYsuL0DryknoDc11Hk,"{'id': '4uQ7wYsuL0DryknoDc11Hk', 'duration_ms'...","{'danceability': 0.925, 'energy': 0.549, 'loud...","{'id': '4Csoz10NhNJOrCTUoPBdUD', 'name': ""Summ...","{'id': '68kEuyFKyqrdQQLLsmiatm', 'name': 'Vinc..."
3,7uX3gufAoBVqKVoD3dBLLD,"{'id': '7uX3gufAoBVqKVoD3dBLLD', 'duration_ms'...","{'danceability': 0.776, 'energy': 0.379, 'loud...","{'id': '6eQpKhYCtvD0TwXQVHsgC2', 'name': '&bur...","{'id': '68kEuyFKyqrdQQLLsmiatm', 'name': 'Vinc..."
4,5OFEJMirg4NPz7G1zgXDYu,"{'id': '5OFEJMirg4NPz7G1zgXDYu', 'duration_ms'...","{'danceability': 0.859, 'energy': 0.423, 'loud...","{'id': '2suR5CCbtL2Wq8ShFo8rFr', 'name': 'Vinc...","{'id': '68kEuyFKyqrdQQLLsmiatm', 'name': 'Vinc..."
...,...,...,...,...,...
3638,5lrxyg4P8SjFHZMcMMugJD,"{'id': '5lrxyg4P8SjFHZMcMMugJD', 'duration_ms'...","{'danceability': 0.832, 'energy': 0.687, 'loud...","{'id': '7vdWpL6rL5GsgnCDhWR1yT', 'name': 'H-To...","{'id': '4VIcSZlwRxIF6gPgtwSrbq', 'name': 'Dat ..."
3639,5ZQJcNKPMrBNLgj31nyTVM,"{'id': '5ZQJcNKPMrBNLgj31nyTVM', 'duration_ms'...","{'danceability': 0.868, 'energy': 0.602, 'loud...","{'id': '6JoX770NDDxQ8HK01lADMG', 'name': 'Thoe...","{'id': '4VIcSZlwRxIF6gPgtwSrbq', 'name': 'Dat ..."
3640,7sKoOlt8LsH1hEVglkhyKR,"{'id': '7sKoOlt8LsH1hEVglkhyKR', 'duration_ms'...","{'danceability': 0.549, 'energy': 0.743, 'loud...","{'id': '1ydQb2XD9O5z4afVIM6SjD', 'name': 'Pict...","{'id': '4VIcSZlwRxIF6gPgtwSrbq', 'name': 'Dat ..."
3641,75cR9rCCz7GuSDlbtmWiBg,"{'id': '75cR9rCCz7GuSDlbtmWiBg', 'duration_ms'...","{'danceability': 0.667, 'energy': 0.686, 'loud...","{'id': '1ydQb2XD9O5z4afVIM6SjD', 'name': 'Pict...","{'id': '4VIcSZlwRxIF6gPgtwSrbq', 'name': 'Dat ..."


In [22]:
Mean_audio_features_data = compute_mean_audio_features(artists_audio_feat, 'All_Mean_Audio_Features_graph.csv')

In [23]:
Mean_audio_features_data

Unnamed: 0,artist_id,artist_name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
68kEuyFKyqrdQQLLsmiatm,68kEuyFKyqrdQQLLsmiatm,Vince Staples,0.75,0.62,-7.31,0.23,0.34,0.01,0.32,0.42,117.90
0VRj0yCOv2FXJNP47XQnx5,0VRj0yCOv2FXJNP47XQnx5,Quavo,0.76,0.65,-5.36,0.16,0.12,0.00,0.15,0.42,143.10
6extd4B6hl8VTmnlhpl2bY,6extd4B6hl8VTmnlhpl2bY,Huncho Jack,0.78,0.70,-3.87,0.10,0.03,0.00,0.18,0.30,144.23
7v9bFXCdSMA2o3gS4nvp0F,7v9bFXCdSMA2o3gS4nvp0F,Pimp C,0.74,0.72,-5.84,0.22,0.05,0.00,0.22,0.52,106.04
46SHBwWsqBkxI7EeeBEQG7,46SHBwWsqBkxI7EeeBEQG7,Kodak Black,0.80,0.61,-6.29,0.13,0.15,0.00,0.16,0.45,120.63
...,...,...,...,...,...,...,...,...,...,...,...
09My0T8N3gNusVZtEiprhc,09My0T8N3gNusVZtEiprhc,Yungstar,0.73,0.56,-8.41,0.26,0.27,0.00,0.16,0.53,111.06
6aLoJJxz7MV2iZ423S8tJC,6aLoJJxz7MV2iZ423S8tJC,Doe Boy,0.77,0.60,-6.89,0.34,0.06,0.00,0.17,0.44,145.75
2OqENqJFXPORP4BUGnu2Qq,2OqENqJFXPORP4BUGnu2Qq,Youngbloodz,0.80,0.64,-7.61,0.28,0.03,0.00,0.24,0.53,109.79
0Ug6OsFk2B0mLvXrpmpRs6,0Ug6OsFk2B0mLvXrpmpRs6,Brown Boy,0.70,0.57,-6.92,0.11,0.35,0.00,0.17,0.61,109.19
