# Relating band members to the individual artists

Domenstrate doing this using a graph, and see how fast it is

In [11]:
import pandas as pd
import numpy as np
import networkx as nx
import requests
import pprint
from pprint import pprint as pp

## Get some data

We get a list of names which we'll pretend are "artists".
Names are easy for humans to work with/read/remember.

In [5]:
r = requests.get('https://raw.githubusercontent.com/dominictarr/random-name/master/first-names.txt')
r.raise_for_status()
names = r.text.strip().splitlines() 

In [12]:
print(len(names))
pp(names[:5])

4945
['Aaren', 'Aarika', 'Abagael', 'Abagail', 'Abbe']


In [90]:
def make_bands(names, n_bands, max_bandsize):
    """Randomly combine names into lists of names containin between 2 and max_bandsize names"""
    band_size_arr = np.random.randint(2, max_bandsize, (n_bands,))
    bands = list()
    for bs in band_size_arr:
        bands.append(np.random.choice(names, bs).tolist())
    return bands


# Check that it works
make_bands(names, 5, 8)

[['Joanna', 'Daniella', 'Krysta', 'Ashia', 'Ingunna', 'Norri', 'Inger'],
 ['Breanne', 'Lynette', 'Aubine', 'Bernice', 'Nancie', 'Lorita', 'Darryl'],
 ['Lissie', 'Celeste', 'Petronilla', 'Amelita', 'Angie', 'Gilly'],
 ['Reeva', 'Amaleta', 'Lucy', 'Tracie', 'Allie', 'Adda', 'Stormy'],
 ['Merissa', 'Pooh']]

In [91]:
df = pd.Series(make_bands(names, 10_000, 9), name="Bands").to_frame() 

In [92]:
df.head() 

Unnamed: 0,Bands
0,"[Tisha, Sukey, Timmie]"
1,"[Dion, Halette, Suzy, Caron, Mindy, Marybelle,..."
2,"[Mireille, Ludovika]"
3,"[Maisey, Ursa, Tomasina]"
4,"[Jacinta, Malory, Albertine]"


This is the base data foundation.
We can now chose to ignore the ordering of the names in the band names, or preserve them — by casting the `Bands`-columns to use frozenset, we ignore the ordering, but if we were to use tuple instead, we would perserve them.
`forzenset` is an immutable (and this hashable) version if a `set`, just like `tuple` is an immutable and hashable version of a `list`.

In [94]:
df['Bands'] = df.Bands.apply(frozenset)  # tuple is an alternative for preserving order of band members

In [95]:
df.head() 

Unnamed: 0,Bands
0,"(Sukey, Timmie, Tisha)"
1,"(Caron, Dion, Selie, Suzy, Mindy, Marybelle, H..."
2,"(Ludovika, Mireille)"
3,"(Tomasina, Ursa, Maisey)"
4,"(Jacinta, Malory, Albertine)"


## Construct the bands and bandmembers graph

In [99]:
%%time

g = nx.Graph()
for band in df.Bands:
    for artist in band:
        g.add_edge(band, artist)

CPU times: user 127 ms, sys: 5.59 ms, total: 133 ms
Wall time: 133 ms


Now test it

In [100]:
# Number of bands Catriona are participating in
g.degree('Catriona')

13

In [101]:
# Let's get a list of her bands (the g.neighbours returns an iterator, thus the call to list)

print(*list(g.neighbors('Catriona')), sep='\n')

frozenset({'Danella', 'Chrissie', 'Sibylla', 'Drusi', 'Gerry', 'Catriona'})
frozenset({'Vonni', 'Margalo', 'Christal', 'Koralle', 'Donielle', 'Catriona', 'Michele', 'Jerrine'})
frozenset({'Phebe', 'Fara', 'Blinny', 'Sascha', 'Catriona'})
frozenset({'Trude', 'Marta', 'Roana', 'Livia', 'Zaneta', 'Catriona'})
frozenset({'Ruthanne', 'Kimberlee', 'Mariam', 'Vanny', 'Kerrin', 'Catriona'})
frozenset({'Cathee', 'Ali', 'Zsazsa', 'Margette', 'Monika', 'Edita', 'Catriona', 'Happy'})
frozenset({'Drona', 'Lucita', 'Josselyn', 'Catriona', 'Fernandina', 'Doe', 'Alisa', 'Jessica'})
frozenset({'Catriona', 'Ardine'})
frozenset({'Theresa', 'Catriona', 'Patience'})
frozenset({'Margalo', 'Ciel', 'Windy', 'Phedra', 'Catriona', 'Rozelle', 'Alleen'})
frozenset({'Anastassia', 'Christabel', 'Micaela', 'Rubina', 'Catriona', 'Allyn', 'Susannah'})
frozenset({'Brena', 'Paloma', 'Nelie', 'Kary', 'Tonya', 'Ginelle', 'Catriona'})
frozenset({'Verna', 'Fanchon', 'Gavra', 'Donelle', 'Kathe', 'Catriona', 'Elayne'})
