# HDDT analysis - Quakers and the CEDA (all) #

<img src="quakers_ceda.png">

In [1]:
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community 
#This part of networkx, for community detection, needs to be imported separately.
import nbconvert
import seaborn as sns
plt.rc('figure', figsize=(20, 10))
# <img src=" xxx.png">

In [2]:
with open('vw_1_quakers.csv', 'r') as nodecsv: # Open the Nodes csv file
    nodereader = csv.reader(nodecsv) # Read the csv
    nodes = [n for n in nodereader][1:]# Retrieve the data (using Python list comprhension and list slicing
                                       # to remove the header row
    node_names = [n[0] for n in nodes] # Get a list of only the node names

with open('vw_hddt_quakers_ceda_tuples_dates_py.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edge_list = list(edgereader) # Convert to list, so can iterate below in for loop
    
    # Create empty arrays to store edge data and edge attribute data
    edges = []
    edges_attributes = []
    
    # Fill the arrays with data from CSV
    for e in edge_list[1:]:
        edges.append(tuple(e[0:2])) # Get the first 2 columns (source, target) and add to array
        edges_attributes.append(tuple(e[3:5])) # Get the 3rd column (relationship_type_id) and add to array
    
edge_names = [e[0] for e in edges] # Get a list of only the edge names  

In [3]:
print("Nodes length: ", len(node_names))
print("Edges length: ", len(edges))
print("Edges attributes length: ", len(edges_attributes)) # This should be the same length as edges

Nodes length:  589
Edges length:  639
Edges attributes length:  639


In [4]:
print("First 5 nodes:", node_names[0:5])
print("First 5 edges:", edges[0:5])
print("First 5 edges attributes:", edges_attributes[0:5])

# The output will appear below this code cell.

First 5 nodes: ['William Aldam', 'S  Stafford Allen', 'Edward Backhouse', 'James (1) Backhouse', 'James Bell']
First 5 edges: [('William Aldam', 'ESL'), ('Edward Backhouse', 'ESL'), ('James (1) Backhouse', 'ESL'), ('James Bell', 'ESL'), ('Henry Christy', 'ESL')]
First 5 edges attributes: [('1844', '1849'), ('1870', '1872'), ('1869', '1870'), ('1852', '1863'), ('1854', '1866')]


In [5]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 594
Number of edges: 639
Average degree:   2.1515


In [6]:
# Edges
first_year_dict = {}
last_year_dict = {}

In [7]:
for i, edge in enumerate(edges): # Loop through the list, one row at a time
    first_year_dict [(edge [0], edge[1])] = edges_attributes[i][0]
    last_year_dict [(edge [0], edge[1])] = edges_attributes[i][1]

In [8]:
# Edges
nx.set_edge_attributes(G, first_year_dict, 'first_year')
nx.set_edge_attributes(G, last_year_dict, 'last_year')

In [9]:
#for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
#print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"

In [10]:
nx.write_gexf(G, 'quakers_ceda_dyn.gexf')