# Quaker families  dynamic relationships #

## This project explores the 592 Quakers found amongst the entire community of 3095 ##

19% of the community are Quakers.

<img src="quakers_dyn.png">

In [13]:
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community 
#This part of networkx, for community detection, needs to be imported separately.
import nbconvert
import seaborn as sns
plt.rc('figure', figsize=(20, 10))
# <img src=" xxx.png">

In [14]:
with open('vw_4_ceda_membership_quakers2_py.csv', 'r') as nodecsv: # Open the Nodes csv file
    nodereader = csv.reader(nodecsv) # Read the csv
    nodes = [n for n in nodereader][1:]# Retrieve the data (using Python list comprhension and list slicing
                                       # to remove the header row
    node_names = [n[0] for n in nodes] # Get a list of only the node names

with open('vw_5_person1_person2.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edge_list = list(edgereader) # Convert to list, so can iterate below in for loop
    
    # Create empty arrays to store edge data and edge attribute data
    edges = []
    edges_attributes = []
    
    # Fill the arrays with data from CSV
    for e in edge_list[1:]:
        edges.append(tuple(e[0:2])) # Get the first 2 columns (source, target) and add to array
        edges_attributes.append(tuple(e[2:3])) # Get the 3rd column (relationship_type_id) and add to array
    
edge_names = [e[0] for e in edges] # Get a list of only the edge names   

In [15]:
print("Nodes length: ", len(node_names))
print("Edges length: ", len(edges))
print("Edges attributes length: ", len(edges_attributes)) # This should be the same length as edges

Nodes length:  688
Edges length:  2006
Edges attributes length:  2006


In [16]:
print("First 5 nodes:", node_names[0:5])
print("First 5 edges:", edges[0:5])
print("First 5 edges attributes:", edges_attributes[0:5])

# The output will appear below this code cell.

First 5 nodes: ['William Spicer Wood', 'William Spicer Wood', 'William Spicer Wood', 'William Wilson', 'William Wilson']
First 5 edges: [('William Aldam', 'x Fox'), ('William Jun Aldam', 'x Fox'), ('Frederick Alexander', 'R D Alexander'), ('G W Alexander', 'R D Alexander'), ('Henry Alexander', 'R D Alexander')]
First 5 edges attributes: [('1',), ('1',), ('1',), ('1',), ('1',)]


In [17]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 590
Number of edges: 2001
Average degree:   6.7831


In [18]:
# Nodes attributes
birth_year_dict = {}
death_year_dict = {}
religion_name_dict = {}
ceda_name_dict = {}
person_ceda_first_year_dict = {}
person_ceda_last_year_dict = {}

# Edges attributes
relationship_type_id_dict = {}

In [19]:
for node in nodes: # Loop through the list, one row at a time
    birth_year_dict [node[0]] = node[1]
    death_year_dict [node[0]] = node[2]
    religion_name_dict [node[0]] = node[3]
    ceda_name_dict [node[0]] = node[4]
    person_ceda_first_year_dict [node[0]] = node[5]
    person_ceda_last_year_dict [node[0]] = node[6]
    

In [20]:
for i, edge in enumerate(edges): # Loop through the list, one row at a time
    relationship_type_id_dict [(edge[0], edge[1])] = edges_attributes[i][0]    

In [21]:
#print(relationship_type_id_dict)# list Source, target and first_year (all records). This shows the data that will be exported to Gephi.
#print(len(relationship_type_id_dict))# At the end of the file print a count of all first_year check 3946)


In [22]:
# Nodes

nx.set_node_attributes(G, birth_year_dict, 'birth_year')
nx.set_node_attributes(G, death_year_dict, 'death_year')
nx.set_node_attributes(G, religion_name_dict, 'religion')
nx.set_node_attributes(G, ceda_name_dict,'ceda')
nx.set_node_attributes(G, person_ceda_first_year_dict, 'first_year') 
nx.set_node_attributes(G, person_ceda_last_year_dict, 'last_year')                    

# Edges
nx.set_edge_attributes(G, relationship_type_id_dict, 'relationship_type_id')

In [23]:
# for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
#    print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"

In [24]:
nx.write_gexf(G, 'quakers_dynamic.gexf')