# Generate the association network

In [68]:
import pandas as pd
from helper.constantes import *
from ast import literal_eval

In [69]:
networks = pd.read_csv(cleaned_folder+"selected_chars.csv",converters={"associated_groups": literal_eval,"connection_label": literal_eval})

In [70]:
networks = networks.rename(columns={'Unnamed: 0':"id"})

In [71]:
networks_assoc = networks.loc[:,['name','associated_groups']]

In [72]:
networks_assoc.head()

Unnamed: 0,name,associated_groups
0,Aragog,[]
1,Ludovic Bagman,[British Ministry of Magic]
2,Bane,[Hogwarts School of Witchcraft and Wizardry]
3,Cuthbert Binns,[Hogwarts School of Witchcraft and Wizardry]
4,Regulus Black,"[Slytherin, Hogwarts School of Witchcraft and ..."


In [73]:
all_assoc = {}

def add_assoc_to_dico(index, list_groups, dico):
    for l in list_groups:
        if l in dico: 
            cur_set = dico[l]
        else:
            cur_set = set()
        cur_set.add(index)
        dico[l] = cur_set
networks_assoc.apply(lambda x: add_assoc_to_dico(x.name,x['associated_groups'],all_assoc),axis=1)
None

In [74]:
len(all_assoc["Hogwarts School of Witchcraft and Wizardry"])

109

In [75]:
len(all_assoc["Gryffindor"])

37

In [76]:
len(all_assoc["Slytherin"])

20

In [77]:
len(all_assoc["Hufflepuff"])

12

We can see that in the Hogwarts "association", all the students and teacher are included. This results in 109 different people, meaning that there will be $\frac{109\cdot 108}{2}$ different links in the network just for this association. This will quickly become not possible to visualise. We will therefore remove the Hogwarts school of Witchcraft and wizardry association. To get more meaningful results, we will add a bit of preprocessing about the association. We will also remove the different Hogwarts houses as they add many links and there are already explored in a previous visualisation.

The rest of the notebook will be split in the following way:
- Define the more insightful "association" and add their members. 
- Regenerate the dictionnary with the different person belonging to different associations 
- Generate and export the graph for the visualisation using NetworkX

In [78]:
def row_checking_predicate(predicate, df):
    return df[df.apply(lambda x: predicate(x),axis=1)]

## Association of interest
The associations or groups we will focus on are the following:
- Ministry of magic employee (*)
- Hogwarts staff
- Order of the Phoenix 1st generation (1st wave)
- Order of the Phoenix 2nd generate (2nd wave) 
- Weasley family (*)
- Potter family
- Death eaters (*)
- Dumbledore's Army (*)
- Gryffindor Quidditch team (*)
- House of Black 
- House of Gaunt 
- Order of Merlin (*)
- Slug club (*)
- Advanced guard (*)
- Hogwarts Headmasters

The first step we will have to do is to remove any of the association that are not in the above list. In our exploratory data anaalysis, we already started to do some cleaning (the name of the above association is copied from the exploratory data analysis results). This will become more intensive from now on. The association marked with (\*) correspond to association available in our dataset, we will therefore not double check tham. The others are handcrafted groups that will be made according to the Harry Potter fandom wiki website and wikipedia. 

In [79]:
lst = ["Advanced Guard", "Order of Merlin", "Gryffindor Quidditch team", "Dumbledore's Army", "Death Eaters", "Weasley family","British Ministry of Magic", "Potter family"]
networks_assoc['associated_groups'] = networks_assoc.apply(lambda x: [y for y in x['associated_groups'] if y in lst],axis=1)


In [80]:
def add_assoc_to_list(row, name):
    row.append(name)
    return row 

In [81]:
def add_association_to_people(people_list, df, association_name):
    lower_people_list = [x.lower() for x in people_list]
    people_in_df = row_checking_predicate(lambda x: x['name'].lower() in lower_people_list, df)
    print(f"Number of matching people in the dataframe: {len(people_in_df)}")
    df['associated_groups'] = df.apply(lambda x: add_assoc_to_list(x['associated_groups'], association_name)  if x['name'].lower() in lower_people_list else x['associated_groups'],axis=1)
    people_with_new_name = row_checking_predicate(lambda x: association_name in x['associated_groups'], df)
    print(f"Number of people with new association name: {len(people_with_new_name)}")
    print(f"Consistent numbers ? {len(people_with_new_name)==len(people_in_df)}")
    return df

We will start with Hogwarts teaching staff (based on this [wikipedia page](https://en.wikipedia.org/wiki/Hogwarts_staff))

In [82]:
hogwarts_staff = [
    "Albus Dumbledore", # Transfiguration then Headmaster
    "Alastor Moody", #Defense against the Dark Arts (DADA)
    "Argus Filch",#Caretaker
    "Aurora Sinistra",#Astronomy
    "Charity Burbage", # Muggle studies
    "Cuthbert Binns", # history of magic
    "Dolores Umbridge", #DADA
    "Filius Flitwick",  # Charms
    "Firenze", #divination
    "Gilderoy Lockhart", # DADA
    "Horace Slughorn", # Potions
    "Irma Pince", #Librarian
    "Minerva McGonagall",# transfiguration, headmistress
    "Pomona Sprout", #herbology
    "Poppy Pomfrey", # matron
    "Quirinus Quirrell", #DADA
    "Remus Lupin", # DADA
    "Rolanda Hooch",# Flying
    "Rubeus Hagrid",# Care of magical creatures
    "Septima Vector", # arithmancy
    "Severus Snape", # Potions, DADA, Headmasters
    "Silvanus Kettleburn", # care of magical creatures
    "Sybill Trelawney", # Divination
    "Wilhelmina Grubbly-Plank", #Care of magical creatures
]

In [83]:
networks_assoc = add_association_to_people(hogwarts_staff, networks_assoc, "Hogwarts staff")

Number of matching people in the dataframe: 22
Number of people with new association name: 22
Consistent numbers ? True


In [84]:
# based on the following list: https://harrypotter.fandom.com/wiki/Hogwarts_Headmaster
hogwarts_headmasters = [
    "Albus Dumbledore",
    "Dolores Umbridge",
    "Minerva McGonagall",
    "Severus Snape",
    "Armando Dippet",
    "Phineas Black"
]

In [85]:
networks_assoc = add_association_to_people(hogwarts_headmasters, networks_assoc, "Hogwarts headmasters")

Number of matching people in the dataframe: 4
Number of people with new association name: 4
Consistent numbers ? True


In [86]:
gaunt_house = [
    "Salazar Slytherin",
    "Lord Voldemort",
    "Marvolo Gaunt",
    "Morfin Gaunt",
    "Merope Gaunt"
]

In [87]:
networks_assoc = add_association_to_people(gaunt_house, networks_assoc, "House of Gaunt")

Number of matching people in the dataframe: 5
Number of people with new association name: 5
Consistent numbers ? True


In [88]:
ootp_1 = [
      "Albus Dumbledore",
    "Alastor Moody",
    "Aberforth Dumbledore",
    "Arabella Fig",
    "Alice Longbottom",
    "Benjy Fenwick",
    "Caradoc Dearborn",
    "Dedalus Diggle",
    "Dorcas Meadowes",
    "Edgar Bones",
    "Elphias Doge",
    "Emmeline Vance",
    "Fabian Prewett",
    "Frank Longbottom",
    "Gideon Prewett",
    "James Potter",
    "Lily Potter",
    "Marlene McKinnon",
    "Mundungus Fletcher",
    "Peter Pettigrew",
    "Remus Lupin",
    "Rubeus Hagrid",
    "Severus Snape",
    "Sirius Black",
    "Sturgis Podmore"
]

In [106]:
ootp_2 = [
    "Albus Dumbledore",
    "Alastor Moody",
    "Kingsley Shacklebot",
    "Aberforth Dumbledore",
    "Arabella Fig",
    "Dedalus Diggle",
    "Elphias Doge",
    "Emmeline Vance",
    "Minerva McGonagall",
    "Mundungus Fletcher",
    "Remus Lupin",
    "Rubeus Hagrid",
    "Severus Snape",
    "Sirius Black",
    "Sturgis Podmore",
    "Arthur Weasley",
    "Bill Weasley",
    "Charlie Weasley",
    "Hestia Jones",
    "Molly Weasley",
    "Nymphadora Tonks",
    "Fleur Delacour",
    "George Weasley",
    "Fred Weasley",
    "Harry Potter",
    "Hermione Granger",
    "Ron Weasley"
]

In [107]:
networks_assoc = add_association_to_people(ootp_1, networks_assoc, "1st Order of the Phoenix")

Number of matching people in the dataframe: 17
Number of people with new association name: 28
Consistent numbers ? False


In [108]:
networks_assoc = add_association_to_people(ootp_2, networks_assoc, "2nd Order of the Phoenix")

Number of matching people in the dataframe: 23
Number of people with new association name: 28
Consistent numbers ? False


In [109]:
black = [
    "Rodolphus Lestrange",
    "Ted Tonks",
    "Bellatrix Lestrange",
    "Narcissa Malfoy",
    "Lucius Malfoy",
    "Andromeda Tonks",
    "Sirius Black",
    "Regulus Black",
    "Phineas Nigellus Black"
]

In [110]:
networks_assoc = add_association_to_people(black, networks_assoc, "House of Black")

Number of matching people in the dataframe: 8
Number of people with new association name: 8
Consistent numbers ? True


In [111]:
all_assoc = {}
networks_assoc.apply(lambda x: add_assoc_to_dico(x.name,x['associated_groups'],all_assoc),axis=1)
None

In [112]:
all_perm = [(x,y,k) for k,v in all_assoc.items() for x in list(v) for y in list(v)  if x != y]
edges_assoc = pd.DataFrame(all_perm, columns=['source', 'target', 'association']).groupby(['source','target'])['association'].apply(list).reset_index()

In [113]:
all_assoc

{'British Ministry of Magic': {1,
  12,
  20,
  22,
  25,
  43,
  64,
  73,
  75,
  84,
  91,
  92,
  93,
  99,
  103,
  107,
  108,
  131,
  134,
  165,
  175},
 'Hogwarts staff': {3,
  35,
  38,
  41,
  55,
  62,
  66,
  73,
  86,
  95,
  98,
  120,
  135,
  137,
  146,
  153,
  154,
  163,
  166,
  167,
  170,
  175},
 'House of Black': {4, 5, 61, 70, 101, 140, 144, 164},
 'Potter family': {5, 66, 67, 84, 85, 103, 108, 113, 134, 155, 172, 173},
 '2nd Order of the Phoenix': {5,
  21,
  25,
  27,
  40,
  55,
  63,
  66,
  73,
  79,
  83,
  84,
  85,
  98,
  103,
  104,
  105,
  106,
  109,
  125,
  134,
  141,
  146,
  155,
  165,
  168,
  173,
  175},
 '1st Order of the Phoenix': {5,
  21,
  25,
  27,
  40,
  55,
  63,
  66,
  73,
  79,
  83,
  84,
  85,
  98,
  103,
  104,
  105,
  106,
  109,
  125,
  134,
  141,
  146,
  155,
  165,
  168,
  173,
  175},
 "Dumbledore's Army": {7,
  9,
  13,
  16,
  24,
  36,
  37,
  46,
  56,
  64,
  68,
  71,
  84,
  100,
  105,
  106,
  108,
  1

### Generate graph in networkx

In [114]:
import networkx as nx
from networkx.readwrite import json_graph
import json

In [115]:
def add_nodes_attributes_and_save(filepath, edge_list, node_info):
    edge_list = edge_list.reset_index().rename(columns={'index':'id'})
    g=nx.from_pandas_edgelist(edge_list, source='source', target='target',edge_attr='id')
    node_info.fillna("",inplace=True)
    
    node_attr = node_info.to_dict(orient='index') 
    nx.set_node_attributes(g, node_attr)
    nx.set_node_attributes(g,{k:{"x":v[0], "y":v[1], "color":"#EEEEEE","size":2} for k,v in nx.spring_layout(g).items()})
    
    json_net = json_graph.node_link_data(g)
    edges = json_net['links']
    json_net["edges"]=edges.copy()
    del json_net['links']
    
    with open(filepath, 'w') as f:
        json.dump(json_net, f)

In [116]:
networks['associated_groups'] = networks_assoc['associated_groups']
networks.head(5)

Unnamed: 0,id,name,birth,death,species,ancestry,gender,hair_color,eye_color,patronus,...,core,Note,id.1,label,link,link_image,connection_label,nb_associations,nb_interactions,image_name
0,0,Aragog,c. 1942,"20 April, 1997 (aged around 55)",Acromantula,,Male,Brown,Black & Grey/Milky-White (Blind),,...,,,150,Aragog,Aragog,http://img4.wikia.nocookie.net/__cb20100611145...,"[Albus Dumbledore, Argus Filch, Cornelius Fudg...",0,13,aragog
1,1,Ludovic Bagman,Before 1964 (most likely),,Human,Pure-blood or Half-blood,Male,Blond,Baby blue,,...,,,1,Ludovic Bagman,Ludo_Bagman,http://img1.wikia.nocookie.net/__cb20111028215...,"[Alastor Moody, Augustus Rookwood, Bartemius C...",1,11,ludovic_bagman
2,2,Bane,,,Centaur,,Male,Black,,,...,,,151,Bane,Bane,http://img3.wikia.nocookie.net/__cb20100712171...,"[Dolores Umbridge, Firenze, Harry Potter, Rube...",1,4,bane
3,3,Cuthbert Binns,,"Pre 1970s, Staffroom, Hogwarts Castle",Ghost,,Male,White (balding),Black,,...,,,4,Cuthbert Binns,Cuthbert_Binns,http://img1.wikia.nocookie.net/__cb20130907183...,"[Argus Filch, Bathilda Bagshot, Dolores Umbrid...",1,16,cuthbert_binns
4,4,Regulus Black,1961,1979 (aged 18),Human,Pure-blood,Male,Black,,Non-corporeal,...,,,5,Regulus Black,Regulus_Black,http://img2.wikia.nocookie.net/__cb20111004231...,"[Albus Dumbledore, Andromeda Tonks, Bartemius ...",2,16,regulus_black


In [117]:
add_nodes_attributes_and_save(cleaned_folder + "association_network.json", edges_assoc, networks)


  node_attr = node_info.to_dict(orient='index')


In [118]:
networks_assoc.apply(lambda x: print(x['associated_groups']),axis=1)

[]
['British Ministry of Magic']
[]
['Hogwarts staff']
['House of Black', 'House of Black']
['Potter family', '2nd Order of the Phoenix', '1st Order of the Phoenix', 'House of Black', '1st Order of the Phoenix', '2nd Order of the Phoenix', 'House of Black']
[]
["Dumbledore's Army"]
[]
["Dumbledore's Army"]
[]
['Death Eaters']
['British Ministry of Magic']
["Dumbledore's Army"]
['Death Eaters']
[]
["Dumbledore's Army"]
[]
[]
['Death Eaters']
['British Ministry of Magic']
['2nd Order of the Phoenix', '1st Order of the Phoenix', '1st Order of the Phoenix', '2nd Order of the Phoenix']
['British Ministry of Magic']
[]
["Dumbledore's Army"]
['British Ministry of Magic', '2nd Order of the Phoenix', '1st Order of the Phoenix', '1st Order of the Phoenix', '2nd Order of the Phoenix']
['Death Eaters']
['2nd Order of the Phoenix', '1st Order of the Phoenix', '1st Order of the Phoenix', '2nd Order of the Phoenix']
[]
[]
[]
[]
[]
[]
[]
['Hogwarts staff']
["Dumbledore's Army"]
["Dumbledore's Army"]
[

0      None
1      None
2      None
3      None
4      None
       ... 
171    None
172    None
173    None
174    None
175    None
Length: 176, dtype: object