# Bigraph nodes (Names) and edges (with attributes) #

## This workbook  ##

In [13]:
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.
import nbconvert

In [14]:
with open('vw_3_all_names_attributes.csv', 'r') as nodecsv: # Open the Nodes csv file
    nodereader = csv.reader(nodecsv) # Read the csv
    nodes = [n for n in nodereader][1:]# Retrieve the data (using Python list comprhension and list slicing
                                       # to remove the header row
    node_names = [n[0] for n in nodes] # Get a list of only the node names

with open('vw_3_all_bipartite_attributes.csv', 'r') as edgecsv: # Open the file
    edgereader = csv.reader(edgecsv) # Read the csv
    edge_list = list(edgereader) # Convert to list, so can iterate below in for loop
    
    # Create empty arrays to store edge data and edge attribute data
    edges = []
    edges_attributes = []
    
    # Fill the arrays with data from CSV
    for e in edge_list[1:]:
        edges.append(tuple(e[0:2])) # Get the first 2 columns (source, target) and add to array
        edges_attributes.append(tuple(e[2:4])) # Get the 3rd and 4th columns (first_year, last_year) and add to array
    
edge_names = [e[0] for e in edges] # Get a list of only the edge names   


In [15]:
print("Nodes length: ", len(node_names))
print("Edges length: ", len(edges))
print("Edges attributes length: ", len(edges_attributes)) # This should be the same length as edges


Nodes length:  3609
Edges length:  10118
Edges attributes length:  10118


In [16]:
print("First 5 nodes:", node_names[0:5])
print("First 5 edges:", edges[0:5])
print("First 5 edges attributes:", edges_attributes[0:5])

# The output will appear below this code cell.


First 5 nodes: ['A  Mackintosh Shaw', 'A  de Fullner', 'A , jun Ramsay', 'A A Stewart', 'A Ambrose']
First 5 edges: [('Arthur William A Beckett', 'ASL'), ('Arthur William A Beckett', 'London'), ('Arthur William A Beckett', 'literary'), ('Andrew Mercer Adam', 'ASL'), ('Andrew Mercer Adam', 'armed services')]
First 5 edges attributes: [('', ''), ('', ''), ('', ''), ('', ''), ('', '')]


In [17]:
G = nx.Graph()
G.add_nodes_from(node_names)
G.add_edges_from(edges)
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 3609
Number of edges: 10118
Average degree:   5.6071


In [18]:
# Nodes
religion_id_dict = {}
birth_year_dict = {}
death_year_dict = {}
gender_id_dict = {}
data_source_id_dict = {}

# Edges
first_year_dict = {}
last_year_dict = {}

In [19]:
for node in nodes: # Loop through the list, one row at a time
    religion_id_dict[node[0]] = node[1]
    birth_year_dict [node[0]] = node[2]
    death_year_dict [node[0]] = node[3]
    gender_id_dict [node[0]] = node[4]
    data_source_id_dict [node[0]] = node[5]
   

In [20]:
for i, edge in enumerate(edges): # Loop through the list, one row at a time
    first_year_dict [(edge[0], edge[1])] = edges_attributes[i][0]
    last_year_dict [(edge[0], edge[1])] = edges_attributes[i][1]

In [21]:
# print(first_year_dict)# list Source, target and first_year (all records). This shows the data that will be exported to Gephi.
# print(len(first_year_dict))# At the end of the file print a count of all first_year check 3946)
# print(len(last_year_dict))# At the end of the file print a count of all last_year check 3946)

In [22]:
# Nodes
nx.set_node_attributes(G, religion_id_dict, 'religion_id')
nx.set_node_attributes(G, birth_year_dict, 'birth_year')
nx.set_node_attributes(G, death_year_dict, 'death_year')
nx.set_node_attributes(G, gender_id_dict, 'gender_id')
nx.set_node_attributes(G, data_source_id_dict, 'data_source_id')

# Edges
nx.set_edge_attributes(G, first_year_dict, 'first_year')
nx.set_edge_attributes(G, last_year_dict, 'last_year')

In [23]:
# for n in G.nodes(): # Loop through every node, in our data "n" will be the name of the person
#     print(n, G.nodes[n]['birth_year']) # Access every node by its name, and then by the attribute "birth_year"

In [24]:
# nx.write_gexf(G, 'all_data_e.gexf')


<img src="quakers_dynamic.png">

<img src="screenshot_hod.png">

<img src="screenshot_hodgkin.png">