<a href="https://colab.research.google.com/github/PranavBansal04/UK-Charities-Network-Analysis/blob/master/UK_Boards_BFS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Building Network using BFS

In this notebook a lot of code snippets for importing and processing the data have been directly used from the previous notebook and only minimal required actions have been performed to make the data ready for building the network. This keeps things clean.

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
import json
import pandas as pd
import networkx as nx
from numpy.core.numeric import NaN
with open('drive/My Drive/UK_Data/json/publicextract.charity_trustee.json', encoding="utf-8-sig") as f:
  data = json.load(f)
print(len(data))
print(json.dumps(data[0],indent=4))

942096
{
    "date_of_extract": "2022-01-11T00:00:00",
    "organisation_number": 521013,
    "registered_charity_number": 521013,
    "linked_charity_number": 0,
    "trustee_id": 23760,
    "trustee_name": "AUGHTON PARISH COUNCIL",
    "trustee_is_chair": false,
    "individual_or_organisation": "O",
    "trustee_date_of_appointment": null
}


In [3]:
members = {}
orgs = {}

for trustee in data:
    tid = trustee['trustee_id']
    oid = trustee['organisation_number']
    if(tid not in members):
        members[tid] = [trustee]
    else:
        members[tid].append(trustee)
    if(oid not in orgs):
        orgs[oid] = [trustee]
    else:
        orgs[oid].append(trustee)

print("Total Trustees : ",len(members))
print("Total Organizations: ",len(orgs))

Total Trustees :  851555
Total Organizations:  170190


In [4]:
with open('drive/My Drive/UK_Data/json/publicextract.charity.json', encoding="utf-8-sig") as f:
  charity_data = json.load(f)

print(len(charity_data))
print(json.dumps(charity_data[3],indent=4))

376413
{
    "date_of_extract": "2022-01-11T00:00:00",
    "organisation_number": 4,
    "registered_charity_number": 200028,
    "linked_charity_number": 2,
    "charity_name": "TOWN LANDS CHARITY FOR THE CHURCH",
    "charity_type": null,
    "charity_registration_status": "Removed",
    "date_of_registration": "1961-10-19T00:00:00",
    "date_of_removal": "1997-09-17T00:00:00",
    "charity_reporting_status": null,
    "latest_acc_fin_period_start_date": null,
    "latest_acc_fin_period_end_date": null,
    "latest_income": null,
    "latest_expenditure": null,
    "charity_contact_address1": null,
    "charity_contact_address2": null,
    "charity_contact_address3": null,
    "charity_contact_address4": null,
    "charity_contact_address5": null,
    "charity_contact_postcode": null,
    "charity_contact_phone": null,
    "charity_contact_email": null,
    "charity_contact_web": null,
    "charity_company_registration_number": null,
    "charity_insolvent": false,
    "charity_in_a

In [5]:
# linking organisation number to names
charity = {}
for ch in charity_data:
    charity[ch['organisation_number']] = ch
# charity

# Helper Functions
Now that we have all the data, we can write some functions that will help in building the network from the data.

In [6]:
def trustees_in_charities(orgs_list,threshold):
  """
  Creates a list of trustees in the given list of organisations based on the threshold value

  Arguments:
    orgs_list: list of unique org ids
    threshold: an integer value specifying the least number of boards a trustee must sit on

  Returns:
    A list of trustees(ids) that sit on the board of given organisations

  """

  trustee_list = [] # list to store the trustee ids

  for org_num in orgs_list: # iterate over the given org ids
    for trustee in orgs[org_num]: # iterate over the trustees in the org_num organisation
      #only add trustees that sit on boards >= threshold
      if(len(members[trustee["trustee_id"]])>=threshold):
        trustee_list.append(trustee["trustee_id"])

  return list(set(trustee_list)) # create a set of trustee ids first since different organisations can have common trustees


#===========================================================================================================================


def charities_of_trustees(trustee_list):
  """
  Creates a list of all the charities of which the given trustees are board members

  Arguments:
    trustee_list: a list of unique trustee ids
  
  Returns:
    A list of charities/organisation ids
  """

  charity_list=[] # list to store the org ids

  for tid in trustee_list: # iterate over all given trustee ids
    for charity in members[tid]: # access all charities of that trustee
      charity_list.append(charity['organisation_number']) # add the org id to the list
  return list(set(charity_list)) # return the list of org ids


#===========================================================================================================================


def bfsGraph(org_num,depth=1,threshold=1):
  """
  Creates a list of organisations and trustees using BFS by linking orgs and trustees to one another

  Arguments:
    org_num: id of base organisation, starting node of the network, starting point for BFS
    depth: an integer value specifying the depth for BFS (how deep it needs to search for connected orgs and trustees)
           - a depth of 1 means that we only want the trustees that are part of the given organisation number
           - default value is 1
    threshold: an integer value specifying the least number of boards a trustee must sit on
               - default value is 1
  
  Returns:
    list 1: a list of all the organisations that are found by BFS and are a part of the network
    list 2: a list of all the trustees that are connected to the base organisation directly or indirectly
  """

  global_org_list = [] # list to store ids of all the connected orgs
  global_trustee_list = [] # list to store the ids of all the connected trustees

  temp_org_list = [org_num] # list to store org ids at depth i
  temp_trustee_list = trustees_in_charities(temp_org_list,threshold) # list to store the trustee ids at depth i

  # if depth is 1 then temp lists will store the required data which is the base organisation and all trustees connected to the base organisation
  if(depth==1):
    return list(set(temp_org_list)), list(set(temp_trustee_list)) # return the temp lists

  #iterate till the given depth and add all trustees and orgs to the gloabl lists
  for i in range(depth):
    global_org_list+=temp_org_list # add orgs from temp list to the global list
    global_trustee_list+= temp_trustee_list # add trustees from temp list to the global list
    temp_org_list = charities_of_trustees(temp_trustee_list) # get the orgs that are linked with trustees in the temp list
    temp_trustee_list = trustees_in_charities(temp_org_list,threshold) # get the trustees that are part of the charities in the temp list
  
  # return the global org and trustee lists
  return list(set(global_org_list)),list(set(global_trustee_list))

# Build Graph Function
This function will take the lists generated by bfsGraph function as input and build the network using networkx

In [48]:
def buildGraph(org_list,trustee_list,file,num_boards=1):
  """
  Builds the network using networkx and exports all data into a gexf/graphml file
  The network contains 2 types of nodes - organisation nodes and trustee nodes
  An edge in the network connects a trustee to all its boards and a charity to all of its trustees
  a gexf file can be imported directly into network visualization softwares such as Gephi

  Arguments:
    org_list: list of orgs in the network, will be added as nodes
    trustee_list: list of trustees in the network, will be added as nodes
    file: name of the output file
    num_boards: an integer specifying the least number of boards a trustee must sit on
                (only in context with the present network and not the entire dataset)
                - default value is 1

  Returns:
    G: a networkx graph object that stores all the information of the generated graph
    - an output file gexf/graphml is generated with the name provided as input that stores all the graph data
  """

  G = nx.MultiGraph() # initialize the graph object


  # Node attributes

  color_map = {} # stores the color of node, org - red, trustee - blue
  label_map = {} # stores the label for the node, name of charity or trustee

  expenditure_map = {} # stores the gross annual expenditure for the organisation
  income_map = {} # stores the gross annual income of the organisation

  size_map ={} # stores the size for the node (useful for pyvis visualization)


  # add org nodes to the network 
  for i in org_list:
    G.add_node(i)
    color_map[i]='#e636e1' # specify color attribute, red for charity
    label_map[i]= charity[i]['charity_name'] # specify lable attribute, name of charity
    size_map[i] = 15 # specify node size

    # expenditure and income are missing for some organisations in the original data set, therefore its wrapped inside a try-except block
    try:
      if(charity[i]['latest_income'] == None or charity[i]['latest_expenditure'] == None):
        raise Exception("None Type")
      income_map[i] = charity[i]['latest_income']
      expenditure_map[i] = charity[i]['latest_expenditure']
    except:
      # if no income and expenditure is found then assign 0
      income_map[i]=0
      expenditure_map[i]=0

  # add trustee nodes to the network
  for i in trustee_list:
    G.add_node(i)

    color_map[i]='#efe0cb' # specify color attribute, blue for trustee
    label_map[i]= members[i][0]['trustee_name'] # specify label attribute, name of trustee
    size_map[i] = 7 # specify node size
    
  # add edges to the network
  for p in trustee_list:
    if(p in G.nodes(0)):
      boards = []
      for j in members[p]:
        if(j['organisation_number'] in G.nodes()):
          boards.append(j['organisation_number'])
      #check if the trustee sits on boards >= num_boards
      if(len(boards)>=num_boards):
        res = [ (p,sub) for sub in boards ]
        G.add_edges_from(res)
      #if trustee sits on boards less than num_boards then remove the trustee node from the graph
      else:
        G.remove_node(p)
  
  # set all node attributes
  nx.set_node_attributes(G, color_map, name="color")
  nx.set_node_attributes(G, label_map, name="label")
  nx.set_node_attributes(G, size_map, name="size")
  nx.set_node_attributes(G, income_map, name="Total Gross Income")
  nx.set_node_attributes(G, expenditure_map, name="Total Gross Expenditure")

  # export the data into a gexf file
  nx.write_gexf(G, file,version='1.2draft')

  # uncomment the following line if you want to export the data as graphml file too
  # nx.write_graphml_lxml(G, "graph.graphml")

  # return the networkx graph object
  return G



*   bfsGraph takes organisation number, depth and threshold value as arguments
*   threshold value is the least number of boards a trustee must sit on, if no value is specified then its default value is 1
*   buildGraph function takes organisation list, trustee list, output file name and num_boards as arguments
*   num_boards is the least number of boards(in the present network) a trustee must sit on and its default value is 1



## Generating the network with the help of above functions
- bfsGraph(org_num,depth,threshold)
- buildGraph(org_list,trustee_list,fileName,num_boards)

In [8]:
# pyvis is an interactive network visualization library
!pip install pyvis

Collecting pyvis
  Downloading pyvis-0.2.1.tar.gz (21 kB)
Collecting jsonpickle>=1.4.1
  Downloading jsonpickle-2.1.0-py2.py3-none-any.whl (38 kB)
Building wheels for collected packages: pyvis
  Building wheel for pyvis (setup.py) ... [?25l[?25hdone
  Created wheel for pyvis: filename=pyvis-0.2.1-py3-none-any.whl size=23688 sha256=93331dbc1ff830885a4eb1de99d231eddf2fdcea70e7ef42871eec4ba6e39efe
  Stored in directory: /root/.cache/pip/wheels/2a/8f/04/6340d46afc74f59cc857a594ca1a2a14a1f4cbd4fd6c2e9306
Successfully built pyvis
Installing collected packages: jsonpickle, pyvis
Successfully installed jsonpickle-2.1.0 pyvis-0.2.1


In [55]:
#ROTHSCHILD FOUNDATION
a,b = bfsGraph(5012258,2,2)
G = buildGraph(a,b,"roths_depth2_thres2.gexf")

In [56]:
from pyvis.network import Network
from IPython.core.display import display, HTML

g = Network(height=600,width=800,notebook=True,bgcolor='#222222',font_color="#ffffff")
g.toggle_hide_edges_on_drag(False)
# g.show_buttons(filter_=['physics'])
# g.show_buttons()
g.force_atlas_2based()
g.from_nx(G)
g.show("ex.html")
display(HTML('ex.html'))

In [None]:
#ROTHSCHILD FOUNDATION -> Depth = 3, num_boards =2 
boards_list,members_list = bfsGraph(5012258,3)
buildGraph(boards_list,members_list,"roths_depth3_b2.gexf",2)

In [None]:
# WILTSHIRE ARCHAEOLOGICAL AND NATURAL HISTORY SOCIETY
#Two orgs with same name and same trustees but different org no. and registered charity number
#total income and expenditure is also different
# org1 - org no. - 309534
#        charity no. - 309534
#        link - https://register-of-charities.charitycommission.gov.uk/charity-search/-/charity-details/309534
# org2 - org no. -  3970185
#        charity no. - 1080096
#        link - https://register-of-charities.charitycommission.gov.uk/charity-search/-/charity-details/3970185
print(len(orgs[3970185]))
print(len(orgs[309534]))
print(charity[3970185])
print(charity[309534])

13
13
WILTSHIRE ARCHAEOLOGICAL AND NATURAL HISTORY SOCIETY
WILTSHIRE ARCHAEOLOGICAL AND NATURAL HISTORY SOCIETY


In [None]:
#ROYAL ACADEMY OF ARTS
boards_list,members_list = bfsGraph(4040548,2)
buildGraph(boards_list,members_list,"royal_academy_depth2_b2.gexf",2)

In [None]:
#ROYAL ACADEMY OF ARTS - threshold 2
a,b = bfsGraph(4040548,2,2)
buildGraph(a,b,"royal_academy_depth2_thres2.gexf")

In [None]:
#ROYAL ACADEMY OF ARTS -> Depth = 3, num_boards = 2
boards_list,members_list = bfsGraph(4040548,3)
buildGraph(boards_list,members_list,"royal_academy_depth3_b2.gexf",2)

In [None]:
# Some analysis

print(charity[306972], len(orgs[306972]))
print(charity[5048464], len(orgs[5048464]))
a = []
for i in orgs[306972]:
  # print(i["trustee_id"])
  a.append(i["trustee_id"])
b = []
for i in orgs[5048464]:
  # print(i["trustee_id"])
  b.append(i["trustee_id"])

print(sorted(a))
print(sorted(b))
print(set(a)-set(b))
print(set(b)-set(a))

#Both of these charities have 9 board members in common

READ MEMORIAL GROUND 11
THE PREBENDAL SCHOOL 10
[11249427, 11641760, 11967954, 11967955, 11967956, 11967968, 11967969, 12182006, 12399481, 12401234, 12444568]
[2576867, 11641760, 11967954, 11967955, 11967956, 11967968, 11967969, 12182006, 12399481, 12444568]
{12401234, 11249427}
{2576867}


In [None]:
#ROYAL SCOCIETY -https://register-of-charities.charitycommission.gov.uk/charity-search/-/charity-details/207043
a,b = bfsGraph(207043,2)
buildGraph(a,b,"royal_society_depth2.gexf")

In [None]:
#ROYAL SCOCIETY - threshold 2
a,b = bfsGraph(207043,2,2)
buildGraph(a,b,"royal_society_depth2_thres2.gexf")

In [None]:
#ROYAL SCOCIETY -> Depth = 2, nunm_boards = 2
a,b = bfsGraph(207043,2)
buildGraph(a,b,"royal_society_depth2_b2.gexf",2)

In [None]:
# CANAL & RIVER TRUST - at the top of arts/science according to total gross expenditure
a,b = bfsGraph(5027494,2)
buildGraph(a,b,"canal&river_trust_depth2.gexf")

In [None]:
# CANAL & RIVER TRUST - threshold 2
a,b = bfsGraph(5027494,2,2)
buildGraph(a,b,"canal&river_trust_depth2_thres2.gexf")

In [51]:
#CANAL & RIVER TRUST - depth 3
a,b = bfsGraph(5027494,3)
G = buildGraph(a,b,"canal&river_trust_depth3.gexf")

In [None]:
#CANAL & RIVER TRUST -> Depth = 3, num_boards = 2 
a,b = bfsGraph(5027494,3)
buildGraph(a,b,"canal&river_trust_depth3_b2.gexf",2)

In [None]:
#THE ARTS COUNCIL OF ENGLAND - 1036733 => Organisation with the highest income
a,b = bfsGraph(1036733,3)
buildGraph(a,b,"arts_council_depth3_b2.gexf",2)

In [None]:
#UNICEF
a,b = bfsGraph(3957500,3)
buildGraph(a,b,"unicef_depth3_b1.gexf",1)