# Quakers  and the Centres for the Development of Anthropology in Britain (CEDA) 1830 - 1870 SNA #

## This project explores the 593 Quakers membership in the CEDA ##


In [7]:
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community 
#This part of networkx, for community detection, needs to be imported separately.
import nbconvert

<img src="quaker_ceda.png">

In [28]:
with open('vw_1_quaker_all_202109031112.csv', 'r') as nodecsv: # Open the Nodes csv file
    nodereader = csv.reader(nodecsv) # Read the csv
    nodes = [n for n in nodereader][1:]# Retrieve the data (using Python list comprhension and list slicing
                                       # to remove the header row
    node_names = [n[0] for n in nodes] # Get a list of only the node names

with open('vw_4_quakers_ceda_202109021829.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edge_list = list(edgereader) # Convert to list, so can iterate below in for loop
    
    # Create empty arrays to store edge data and edge attribute data
    edges = []
    edges_attributes = []
    
    # Fill the arrays with data from CSV
    for e in edge_list[1:]:
        edges.append(tuple(e[0:2])) # Get the first 2 columns (source, target) and add to array
        edges_attributes.append(tuple(e[2:4])) # Get the 3rd column (relationship_type_id) and add to array
    
edge_names = [e[0] for e in edges] # Get a list of only the edge names   

In [29]:
print("Nodes length: ", len(node_names))
print("Edges length: ", len(edges))
print("Edges attributes length: ", len(edges_attributes)) # This should be the same length as edges

Nodes length:  593
Edges length:  644
Edges attributes length:  644


In [30]:
print("First 5 nodes:", node_names[0:5])
print("First 5 edges:", edges[0:5])
print("First 5 edges attributes:", edges_attributes[0:5])

# The output will appear below this code cell.

First 5 nodes: ['William Spicer Wood', 'William Wilson', 'James Wilson', 'E T Wakefield', 'John Ross']
First 5 edges: [('William Spicer Wood', 'APS'), ('William Spicer Wood', 'ASL'), ('William Spicer Wood', 'AI'), ('William Wilson', 'APS'), ('William Wilson', 'ASL')]
First 5 edges attributes: [('1864', '1867'), ('1863', '1871'), ('1863', '1871'), ('1838', '1865'), ('1865', '1866')]


In [31]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 598
Number of edges: 644
Average degree:   2.1538


In [32]:
# Nodes attributes
birth_year_dict = {}
death_year_dict = {}
religion_dict = {}
confirmed_dict = {}
notes_dict = {}

# Edges attributes
person_ceda_first_year_dict = {}
person_ceda_last_year_dict = {}
religion_name_dict = {}

In [33]:
for node in nodes: # Loop through the list, one row at a time
    birth_year_dict [node[0]] = node[1]
    death_year_dict [node[0]] = node[2]
    religion_dict [node[0]] = node[3]
    confirmed_dict [node[0]] = node[4]
    notes_dict [node[0]] = node[5]
       

In [38]:
for i, edge in enumerate(edges): # Loop through the list, one row at a time
    person_ceda_first_year_dict [(edge[0], edge[1])] = edges_attributes[i][0]  
    person_ceda_last_year_dict [(edge[0], edge[1])] = edges_attributes[i][1]  


In [21]:
#print(relationship_type_id_dict)# list Source, target and first_year (all records). This shows the data that will be exported to Gephi.
#print(len(relationship_type_id_dict))# At the end of the file print a count of all first_year check 3946)


In [40]:
# Nodes

nx.set_node_attributes(G, birth_year_dict, 'birth_year')
nx.set_node_attributes(G, death_year_dict, 'death_year')
nx.set_node_attributes(G, religion_dict, 'religion_id')
nx.set_node_attributes(G, confirmed_dict,'confirmed')
nx.set_node_attributes(G, notes_dict, 'notes') 
                 

# Edges
nx.set_edge_attributes(G, person_ceda_first_year_dict, 'ceda_first_year_type_id')
nx.set_edge_attributes(G, person_ceda_last_year_dict, 'ceda_last_year_type_id')


In [39]:
#for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
#print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"

In [41]:
nx.write_gexf(G, 'quakers_ceda.gexf')