In [1]:
import json
import networkx as nx
import pandas as pd
import numpy as np
import os


In [2]:
with open('test.json', 'r') as f:
    data = json.load(f)

In [3]:
output_dir = "/Users/christopherallison/Documents/Coding/Gephi/Gitlaw/"

In [4]:
act_names = {}

for k, v in data.items():
    act_names[v.get('title')] = k

In [5]:
act_names


{'': 'B-3.6.md',
 'Access to Information Act': 'A-1.md',
 'Administrative Tribunals Support Service of Canada Act': 'A-1.5.md',
 'Advance Payments for Crops Act': 'C-49.md',
 'Aeronautics Act': 'A-2.md',
 'Agreement on Internal Trade Implementation Act': 'A-2.4.md',
 'Agricultural Marketing Programs Act': 'A-3.7.md',
 'Agricultural Products Cooperative Marketing Act': 'A-5.md',
 'Agriculture and Agri-Food Administrative Monetary Penalties Act': 'A-8.8.md',
 'Air Canada Public Participation Act': 'A-10.1.md',
 'Air Travellers Security Charge Act': 'A-10.5.md',
 'Airport Transfer (Miscellaneous Matters) Act': 'A-10.4.md',
 'Alternative Fuels Act': 'A-10.7.md',
 'An Act to amend and consolidate the Acts relating to the office of Port Warden for the Harbour of Montreal': 'O-2.6.md',
 'Animal Pedigree Act': 'A-11.2.md',
 'Antarctic Environmental Protection Act': 'A-11.44.md',
 'Anti-Personnel Mines Convention Implementation Act': 'A-11.5.md',
 'Anti-terrorism Act': 'A-11.7.md',
 'Apprentice

In [6]:
# Define links between acts

edge_count = {}
edges = []

for k, v in data.items():
    for link in v.get('links'):
        edges.append([v.get('title'), link])

for edge in edges:
    source, target = edge
    edge_count["{}*{}".format(source, target)] = edge_count.get("{}*{}".format(
        source, target), 0) + 1

In [7]:
edge_count

{'Mi’kmaq Education Act*Indian Act': 7,
 'Labrador Inuit Land Claims Agreement Act*Labrador Inuit Land Claims Agreement Act': 2,
 'Fertilizers Act*Canadian Food Inspection Agency Act': 3,
 'Proceeds of Crime (Money Laundering) and Terrorist Financing Act*Seized Property Management Act': 3,
 'Merchant Seamen Compensation Act*Marine Liability Act': 1,
 'Visiting Forces Act*Corrections and Conditional Release Act': 1,
 'Hazardous Materials Information Review Act*Public Service Superannuation Act': 1,
 'Canadian Security Intelligence Service Act*Statutory Instruments Act': 1,
 'Garnishment, Attachment and Pension Diversion Act*Special Retirement Arrangements Act': 1,
 'Canada National Parks Act*Criminal Code': 8,
 'Federal Courts Act*Canada Deposit Insurance Corporation Act': 1,
 'Yukon Act*Western Arctic (Inuvialuit) Claims Settlement Act': 1,
 'Canada Evidence Act*Canada Evidence Act': 1,
 'Telecommunications Act*Canada Elections Act': 4,
 'Protecting Air Service Act*Canada Labour Code':

In [None]:
# Some additional work to get the Criminal Code from the Justice Site.

In [18]:
cc_url = 'http://laws-lois.justice.gc.ca/eng/acts/C-46/FullText.html'

In [20]:
from bs4 import BeautifulSoup
import requests

In [22]:
def parse_act(url):
    act = 'C-46.md'
    html = requests.get(url)
    soup = BeautifulSoup(html.text, 'html.parser')

    titles = soup.find_all('h1')
    links = soup.find_all('a')

    for link in links:
        if 'Act' in link.text or 'Code' in link.text:
            try:
                data[act]['links'].append(link.text)
            except KeyError:
                data[act] = {}
                data[act]['href'] = link.get('href')
                data[act]['title'] = ""
                data[act]['links'] = []
                data[act]['link_count'] = 0
            try:
                data[str(link.get('href'))[52:]]['title'] = link.text
            except KeyError:
                act_key = str(link.get('href'))[52:]

                data[act_key] = {}
                data[act_key]['href'] = link.get('href')
                data[act_key]['title'] = link.text
                data[act_key]['links'] = []
                data[act_key]['link_count'] = 0


In [23]:
parse_act(cc_url)

In [25]:
G = nx.DiGraph()

for k, v in data.items():
    G.add_node(v.get('title', k), href=v.get('href', 'NA'), code=k)

for edge in edges:
    source, target = edge
    G.add_edge(source, target, weight=edge_count.get("{}*{}".format(
            source, target)))

In [26]:
nx.write_gexf(G, os.path.join(output_dir, 'gitlaw_v2.gexf'))