In [1]:
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.
import nbconvert

In [2]:
with open('vw_1_quakers.csv', 'r') as nodecsv: # Open the Nodes csv file
    nodereader = csv.reader(nodecsv) # Read the csv
    nodes = [n for n in nodereader][1:]# Retrieve the data (using Python list comprhension and list slicing
                                       # to remove the header row
    node_names = [n[0] for n in nodes] # Get a list of only the node names

with open('vw_5_person1_person2.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edge_list = list(edgereader) # Convert to list, so can iterate below in for loop
    
    # Create empty arrays to store edge data and edge attribute data
    edges = []
    edges_attributes = []
    
    # Fill the arrays with data from CSV
    for e in edge_list[1:]:
        edges.append(tuple(e[0:2])) # Get the first 2 columns (source, target) and add to array
        edges_attributes.append(tuple(e[2:4])) # Get the 3rd and 4th columns (first_year, last_year) and add to array
    
edge_names = [e[0] for e in edges] # Get a list of only the edge names   

In [3]:
print("Nodes length: ", len(node_names))
print("Edges length: ", len(edges))
print("Edges attributes length: ", len(edges_attributes)) # This should be the same length as edges

Nodes length:  593
Edges length:  2099
Edges attributes length:  2099


In [4]:
print("First 5 nodes:", node_names[0:5])
print("First 5 edges:", edges[0:5])
print("First 5 edges attributes:", edges_attributes[0:5])

# The output will appear below this code cell.

First 5 nodes: ['William Aldam', 'S  Stafford Allen', 'Edward Backhouse', 'James (1) Backhouse', 'James Bell']
First 5 edges: [('William Aldam', 'x Fox'), ('William Jun Aldam', 'x Fox'), ('Frederick Alexander', 'R D Alexander'), ('G W Alexander', 'R D Alexander'), ('Henry Alexander', 'R D Alexander')]
First 5 edges attributes: [('1',), ('1',), ('1',), ('1',), ('1',)]


In [5]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 593
Number of edges: 2099
Average degree:   7.0793


In [6]:
# Nodes
gender_id_dict = {}
birth_year_dict = {}
death_year_dict = {}
data_source_id_dict = {}
religion_id_dict = {}

# Edges
relationship_type_id_dict = {}

In [7]:
for node in nodes: # Loop through the list, one row at a time
    gender_id_dict [node[0]] = node[1]
    birth_year_dict [node[0]] = node[2]
    death_year_dict [node[0]] = node[3]
    data_source_id_dict [node[0]] = node[4]
    religion_id_dict[node[0]] = node[5]

In [8]:
for i, edge in enumerate(edges): # Loop through the list, one row at a time
    relationship_type_id_dict [(edge[0], edge[1])] = edges_attributes[i][0]    

In [9]:
print(religion_id_dict)# list Source, target and first_year (all records). This shows the data that will be exported to Gephi.
print(len(religion_id_dict))# At the end of the file print a count of all first_year check 3946)


{'William Aldam': '1', 'S  Stafford Allen': '1', 'Edward Backhouse': '1', 'James (1) Backhouse': '1', 'James Bell': '1', 'Antonio Brady': '1', 'William Bull': '1', 'Charles Buxton': '1', 'Henry Christy': '1', 'William Clay': '1', 'x Collier': '1', 'Henry Crowley': '1', 'David Dale': '1', 'James T J Doyle': '1', 'Robert Nicholas Fowler': '1', 'William Fowler': '1', 'Charles Henry Fox': '1', 'George Stacey Gibson': '1', 'John Henry Gurney': '1', 'Thomas (1) Hodgkin': '1', 'William Holmes': '1', 'Jonathan Hutchinson': '1', 'Joseph Lister': '1', 'William Horton Lloyd': '1', 'J Robinson': '1', 'John Ross': '1', 'E T Wakefield': '1', 'James Wilson': '1', 'William Wilson': '1', 'William Spicer Wood': '1', 'Arthur Albright': '1', 'Rachel Albright': '1', 'William Albright': '1', 'William Jun Aldam': '1', 'Frederick Alexander': '1', 'Samuel Allen': '1', 'John Arch': '1', 'R Arthington': '1', 'R Jun Arthington': '1', 'C Ashby': '1', 'T Jun Ashby': '1', 'Thomas Ashby': '1', 'Thomas Jun Ashby': '1'

In [10]:
# Nodes
nx.set_node_attributes(G, gender_id_dict, 'gender_id')
nx.set_node_attributes(G, birth_year_dict, 'birth_year')
nx.set_node_attributes(G, death_year_dict, 'death_year')
nx.set_node_attributes(G, data_source_id_dict, 'data_source_id')
nx.set_node_attributes(G, religion_id_dict, 'religion_id')

# Edges
nx.set_edge_attributes(G, relationship_type_id_dict, 'first_year')

In [11]:
for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
    print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"

William Aldam 1813
S  Stafford Allen 1840
Edward Backhouse 1808
James (1) Backhouse 1794
James Bell 1818
Antonio Brady 1811
William Bull 1828
Charles Buxton 1823
Henry Christy 1810
William Clay 1791
x Collier NA
Henry Crowley NA
David Dale 1829
James T J Doyle NA
Robert Nicholas Fowler 1828
William Fowler NA
Charles Henry Fox NA
George Stacey Gibson 1818
John Henry Gurney 1819
Thomas (1) Hodgkin 1798
William Holmes NA
Jonathan Hutchinson 1828
Joseph Lister 1827
William Horton Lloyd NA
J Robinson NA
John Ross NA
E T Wakefield NA
James Wilson NA
William Wilson 1785
William Spicer Wood NA
Arthur Albright NA
Rachel Albright NA
William Albright NA
William Jun Aldam NA
Frederick Alexander NA
Samuel Allen NA
John Arch NA
R Arthington NA
R Jun Arthington NA
C Ashby NA
T Jun Ashby NA
Thomas Ashby NA
Thomas Jun Ashby NA
Catherine Backhouse NA
Jonathan Backhouse NA
S Baker NA
Robert Barclay NA
Richard Barrington NA
Edward Beck NA
Eliza Bell NA
Frederick J Bell NA
S S Bell NA
Samuel Bewley NA
x Bi

In [12]:
nx.write_gexf(G, 'ceda_quakers.gexf')