# Quakers  SNA #

## This project explores the 593 Quakers found amongst the entire community of 3095 ##

19% of the community are Quakers.

In [1]:
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.
import nbconvert

<img src="quakers.png">

In [2]:
with open('vw_4_quaker_nodes_202108191716.csv', 'r') as nodecsv: # Open the Nodes csv file
    nodereader = csv.reader(nodecsv) # Read the csv
    nodes = [n for n in nodereader][1:]# Retrieve the data (using Python list comprhension and list slicing
                                       # to remove the header row
    node_names = [n[0] for n in nodes] # Get a list of only the node names

with open('vw_5_person1_person2.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edge_list = list(edgereader) # Convert to list, so can iterate below in for loop
    
    # Create empty arrays to store edge data and edge attribute data
    edges = []
    edges_attributes = []
    
    # Fill the arrays with data from CSV
    for e in edge_list[1:]:
        edges.append(tuple(e[0:2])) # Get the first 2 columns (source, target) and add to array
        edges_attributes.append(tuple(e[2:3])) # Get the 3rd column (relationship_type_id) and add to array
    
edge_names = [e[0] for e in edges] # Get a list of only the edge names   

In [3]:
print("Nodes length: ", len(node_names))
print("Edges length: ", len(edges))
print("Edges attributes length: ", len(edges_attributes)) # This should be the same length as edges

Nodes length:  644
Edges length:  2099
Edges attributes length:  2099


In [4]:
print("First 5 nodes:", node_names[0:5])
print("First 5 edges:", edges[0:5])
print("First 5 edges attributes:", edges_attributes[0:5])

# The output will appear below this code cell.

First 5 nodes: ['William Spicer Wood', 'William Spicer Wood', 'William Spicer Wood', 'William Wilson', 'William Wilson']
First 5 edges: [('William Aldam', 'x Fox'), ('William Jun Aldam', 'x Fox'), ('Frederick Alexander', 'R D Alexander'), ('G W Alexander', 'R D Alexander'), ('Henry Alexander', 'R D Alexander')]
First 5 edges attributes: [('1',), ('1',), ('1',), ('1',), ('1',)]


In [5]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 593
Number of edges: 2099
Average degree:   7.0793


In [7]:
# Nodes attributes
birth_year_dict = {}
death_year_dict = {}
religion_name_dict = {}
ceda_name_dict = {}
person_ceda_first_year_dict = {}
person_ceda_last_year_dict = {}

# Edges attributes
relationship_type_id_dict = {}

In [8]:
for node in nodes: # Loop through the list, one row at a time
    birth_year_dict [node[0]] = node[1]
    death_year_dict [node[0]] = node[2]
    religion_name_dict [node[0]] = node[3]
    ceda_name_dict [node[0]] = node[4]
    person_ceda_first_year_dict [node[0]] = node[5]
    person_ceda_last_year_dict [node[0]] = node[6]
    

In [9]:
for i, edge in enumerate(edges): # Loop through the list, one row at a time
    relationship_type_id_dict [(edge[0], edge[1])] = edges_attributes[i][0]    

In [9]:
print(religion_id_dict)# list Source, target and first_year (all records). This shows the data that will be exported to Gephi.
print(len(religion_id_dict))# At the end of the file print a count of all first_year check 3946)


{'William Aldam': '1', 'S  Stafford Allen': '1', 'Edward Backhouse': '1', 'James (1) Backhouse': '1', 'James Bell': '1', 'Antonio Brady': '1', 'William Bull': '1', 'Charles Buxton': '1', 'Henry Christy': '1', 'William Clay': '1', 'x Collier': '1', 'Henry Crowley': '1', 'David Dale': '1', 'James T J Doyle': '1', 'Robert Nicholas Fowler': '1', 'William Fowler': '1', 'Charles Henry Fox': '1', 'George Stacey Gibson': '1', 'John Henry Gurney': '1', 'Thomas (1) Hodgkin': '1', 'William Holmes': '1', 'Jonathan Hutchinson': '1', 'Joseph Lister': '1', 'William Horton Lloyd': '1', 'J Robinson': '1', 'John Ross': '1', 'E T Wakefield': '1', 'James Wilson': '1', 'William Wilson': '1', 'William Spicer Wood': '1', 'Arthur Albright': '1', 'Rachel Albright': '1', 'William Albright': '1', 'William Jun Aldam': '1', 'Frederick Alexander': '1', 'Samuel Allen': '1', 'John Arch': '1', 'R Arthington': '1', 'R Jun Arthington': '1', 'C Ashby': '1', 'T Jun Ashby': '1', 'Thomas Ashby': '1', 'Thomas Jun Ashby': '1'

In [13]:
# Nodes

nx.set_node_attributes(G, birth_year_dict, 'birth_year')
nx.set_node_attributes(G, death_year_dict, 'death_year')
nx.set_node_attributes(G, religion_name_dict, 'religion')
nx.set_node_attributes(G, ceda_name_dict,'ceda')
nx.set_node_attributes(G, person_ceda_first_year_dict, 'first_year') 
nx.set_node_attributes(G, person_ceda_last_year_dict, 'last_year')                    

# Edges
nx.set_node_attributes(G, relationship_type_id_dict, 'relationship_weight')

In [14]:
for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
    print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"

William Spicer Wood 
William Wilson 1785
James Wilson 
E T Wakefield 
John Ross 
J Robinson 
William Horton Lloyd 
Joseph Lister 1827
Jonathan Hutchinson 1828
William Holmes 
Thomas (1) Hodgkin 1798
John Henry Gurney 1819
George Stacey Gibson 1818
Charles Henry Fox 
William Fowler 
Robert Nicholas Fowler 1828
James T J Doyle 
David Dale 1829
Henry Crowley 
x Collier 
William Clay 1791
Henry Christy 1810
Charles Buxton 1823
William Bull 1828
Antonio Brady 1811
James Bell 1818
James (1) Backhouse 1794
Edward Backhouse 1808
S  Stafford Allen 1840
William Aldam 1813
Arthur Albright 
Rachel Albright 
William Albright 
William Jun Aldam 
Frederick Alexander 
Samuel Allen 
John Arch 
R Arthington 
R Jun Arthington 
C Ashby 
T Jun Ashby 
Thomas Ashby 
Thomas Jun Ashby 
Catherine Backhouse 
Jonathan Backhouse 
S Baker 
Robert Barclay 
Richard Barrington 
Edward Beck 
Eliza Bell 
Frederick J Bell 
S S Bell 
Samuel Bewley 
x Biffin 
J Binyon 
E L Birkbeck 
H Birkbeck 
W Blaine 
Elizabeth Bowley 


In [15]:
nx.write_gexf(G, 'ceda_quakers.gexf')