# Graph Making
This code will make a graph connecting genres with shared artists. 

In [None]:
import pandas as pd

# Load the data
data = pd.read_csv("../Data/artists_dataset.csv")
# Clean the genres column
import re
genre_list = data['genres'].tolist()
genre_list = [re.sub(r'[^\w\s]','',str(genre)) for genre in genre_list]

# Count the frequency of each genre
from collections import Counter
unique_genres = list(set([genre for genre in genre_list]))

genre_counts = Counter([genre for genre in genre_list])

genre_frequency = {genre: count for genre, count in genre_counts.items()}

genre_frequency = {k: v for k, v in sorted(genre_frequency.items(), key=lambda item: item[1], reverse=True)}
sorted_genre_frequency = dict(sorted(genre_frequency.items(), key=lambda item: item[1], reverse=True))
# Get the artists for each genre
artists_by_genre = {}

for genre in sorted_genre_frequency.keys():
    genre_artists = data[data['genres'].str.contains(genre)]['artist_name'].tolist()
    artists_by_genre[genre] = genre_artists

#Build the graph
%pip install networkx
import networkx as nx
import matplotlib.pyplot as plt

# Create a new graph
G = nx.Graph()

# Add nodes for each genre
for genre in artists_by_genre.keys():
    G.add_node(genre)

# Add edges between genres that share an artist
for genre1 in artists_by_genre.keys():
    for genre2 in artists_by_genre.keys():
        if genre1 != genre2:
            shared_artists = set(artists_by_genre[genre1]).intersection(artists_by_genre[genre2])
            if shared_artists:
                G.add_edge(genre1, genre2, weight=len(shared_artists))

#Build the Communities using the Louvain method

%pip install python-louvain
import community as community_louvain
import matplotlib.pyplot as plt

# compute the best partition
partition = community_louvain.best_partition(G)

# draw the graph
pos = nx.spring_layout(G)

# color the nodes according to their partition
cmap = cm.get_cmap('viridis', max(partition.values()) + 1)
nx.draw_networkx_nodes(G, pos, partition.keys(), node_size=40, cmap=cmap, node_color=list(partition.values()))
nx.draw_networkx_edges(G, pos, alpha=0.5)
plt.show()