# HDDT Visualisations - CEDA bigraph #

In [28]:
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.
import nbconvert

<img src="ceda.png">

In [29]:
with open('vw_2_ceda_with_attributes_202108091312.csv', 'r') as nodecsv: # Open the Nodes csv file
    nodereader = csv.reader(nodecsv) # Read the csv
    nodes = [n for n in nodereader][1:]# Retrieve the data (using Python list comprhension and list slicing
                                       # to remove the header row
    node_names = [n[0] for n in nodes] # Get a list of only the node names

with open('vw_4_ceda_membership_dates_xid_202108091147.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edge_list = list(edgereader) # Convert to list, so can iterate below in for loop
    
    # Create empty arrays to store edge data and edge attribute data
    edges = []
    edges_attributes = []
    
    # Fill the arrays with data from CSV
    for e in edge_list[1:]:
        edges.append(tuple(e[0:2])) # Get the first 2 columns (source, target) and add to array
        edges_attributes.append(tuple(e[2:4])) # Get the 3rd and 4th columns (first_year, last_year) and add to array
    
edge_names = [e[0] for e in edges] # Get a list of only the edge names   


In [30]:
print("Nodes length: ", len(node_names))
print("Edges length: ", len(edges))
print("Edges attributes length: ", len(edges_attributes)) # This should be the same length as edges


Nodes length:  3095
Edges length:  3894
Edges attributes length:  3894


In [31]:
print("First 5 nodes:", node_names[0:5])
print("First 5 edges:", edges[0:5])
print("First 5 edges attributes:", edges_attributes[0:5])

# The output will appear below this code cell.


First 5 nodes: ['Arthur William A Beckett', 'Andrew Mercer Adam', 'H R Adam', 'William Adam', 'Henry John Adams']
First 5 edges: [('William Adam', 'ESL'), ('William (1) Adams', 'ESL'), ('William (2) Adams', 'ESL'), ('Louis Agassiz', 'ESL'), ('Augustine Aglio', 'ESL')]
First 5 edges attributes: [('1844', '1844'), ('1844', '1844'), ('1858', '1871'), ('1860', '1871'), ('1843', '1845')]


In [32]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 3100
Number of edges: 3894
Average degree:   2.5123


In [33]:
# Nodes

birth_year_dict = {}
death_year_dict = {}
religion_id_dict = {}

# Edges
first_year_dict = {}
last_year_dict = {}

In [34]:
for node in nodes: # Loop through the list, one row at a time
    
    birth_year_dict [node[0]] = node[1]
    death_year_dict [node[0]] = node[2]
    religion_id_dict[node[0]] = node[3]
    
   

In [35]:
for i, edge in enumerate(edges): # Loop through the list, one row at a time
    first_year_dict [(edge[0], edge[1])] = edges_attributes[i][0]
    last_year_dict [(edge[0], edge[1])] = edges_attributes[i][1]

In [36]:
print(first_year_dict)# list Source, target and first_year (all records). This shows the data that will be exported to Gephi.
print(len(first_year_dict))# At the end of the file print a count of all first_year check 3946)
print(len(last_year_dict))# At the end of the file print a count of all last_year check 3946)

{('William Adam', 'ESL'): '1844', ('William (1) Adams', 'ESL'): '1844', ('William (2) Adams', 'ESL'): '1858', ('Louis Agassiz', 'ESL'): '1860', ('Augustine Aglio', 'ESL'): '1843', ('William Francis Harrison Ainsworth', 'ESL'): '1856', ('Alexander Muirhead Aitken', 'ESL'): '1864', ('Rutherford Alcock', 'ESL'): '1862', ('William Aldam', 'ESL'): '1844', ('William Allen', 'ESL'): '1858', ('William Amhurst Tyssen Amhurst', 'ESL'): '1862', ('W  P Andrew', 'ESL'): '1844', ('x Andrews', 'ESL'): '1850', ('Matthew John Anketell', 'ESL'): '1861', ('Thomas Chisholm Anstey', 'ESL'): '1852', ('x Antrim', 'ESL'): '1870', ('William Appleyard', 'ESL'): '1854', ('Frederick Scott Archer', 'ESL'): '1844', ('William Armstrong', 'ESL'): '1865', ('William Arthur', 'ESL'): '1853', ('James Ashbury', 'ESL'): '1867', ('William Henry Ashurst', 'ESL'): '1863', ('Henry Ashworth', 'ESL'): '1866', ('Charles A Atkins', 'ESL'): '1862', ('E Atkinson', 'ESL'): '1861', ('Alois Auer', 'ESL'): '1853', ('Arthur Daniel Aulton

In [37]:
# Nodes
nx.set_node_attributes(G, birth_year_dict, 'birth_year')
nx.set_node_attributes(G, death_year_dict, 'death_year')
nx.set_node_attributes(G, religion_id_dict, 'religion_id')

# Edges
nx.set_edge_attributes(G, first_year_dict, 'first_year')
nx.set_edge_attributes(G, last_year_dict, 'last_year')

In [38]:
for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
    print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"

Arthur William A Beckett 1844
Andrew Mercer Adam NA
H R Adam NA
William Adam NA
Henry John Adams NA
William (1) Adams NA
William (2) Adams 1820
William Adlam NA
Louis Agassiz 1807
Anastasius Agathides 1805
Augustine Aglio 1777
Joseph Agnew NA
William Francis Harrison Ainsworth 1807
William Baird Airston NA
Alexander Muirhead Aitken NA
Thomas Aitken NA
Rutherford Alcock 1809
William Aldam 1813
Frederick W Aley NA
J  McGrigor Allan NA
S  Stafford Allen 1840
William Allen NA
George Allin NA
Charles Hamond Alpe 1837
Crewe Alston 1828
William Amhurst Tyssen Amhurst 1835
George Amner 1873
Edward C Anderson NA
John Anderson NA
Joseph Anderson 1832
W  P Andrew NA
x Andrews NA
Matthew John Anketell NA
Thomas Chisholm Anstey 1816
x Antrim 1851
William Appleyard NA
William Arbuthnot NA
Frederick Scott Archer 1813
Richard Edward Arden 1804
Alexander Aria NA
William Armitage 1815
T  B Armitstead NA
William Armstrong 1810
Edward Arnold NA
Richard Arnold 1808
William Arthur 1819
Rodolph Arundell 1837

x Tasmania 1803
Alexander Norman Tate 1837
George Tate 1805
George Ralph Tate 1835
Ralph (1) Tate 1840
Thomas (1) Tate 1807
Thomas (2) Tate 1807
Thomas R Tatham NA
E B Tawney 1840
John Taylor 1833
John George Taylor NA
Richard Stephens jnr Taylor 1843
W Taylor NA
W E Taylor NA
Richard Temple 1826
Edward Tenison Ryan Tenison NA
John Tennant 1796
James Emerson Tennent 1804
Frederick Theed 1814
William, Jun Theobald 1829
Robert Thin 1843
Connop Thirlwall 1797
J L Thomas NA
Alfred Robert Thompson NA
Fred Thompson NA
Frederick Thompson NA
George C Thompson NA
Joseph Thompson NA
Richard John Thompson NA
W Thompson (2) NA
Christian JÃƒÂ¼rgensen Thomsen 1788
John Thomson 1837
L E Threlkeld 1788
J  T Thresh NA
John Thrupp 1817
Edward Thurlow NA
John Thurnam 1810
Richard Hill Tiddeman 1842
Samuel Timmins 1826
x Tinsley NA
E Tinsley NA
John Tolhurst NA
C D Tolme NA
x Tonna 1812
James Tonnere NA
William Tooke 1777
Albert Tootal 1838
John Towers NA
Frederick Travers 1808
S  Smith Travers 1826
Willi

KeyError: 'birth_year'

In [39]:
nx.write_gexf(G, 'ceda_all_data.gexf')
