# Let's go

In [61]:
%matplotlib inline
import pandas
import matplotlib.pyplot as plt

**Setup helper functions and classes**

In [62]:
class Node:
    def __init__(self, id, value):
        self.id = id
        self.value = value
        self.children = []
        self.parent = None


class Hierarchy:
    list = {}

    def __init__(self):
        self.list['ROOT'] = Node('ROOT', 'hierarchy root')


def add_node(hierarchy, node_id, node):
    h = hierarchy
    h.list[node.id] = node
    if node_id in h.list.keys():
        n = h.list[node_id]
        node.parent = n
        n.children.append(node)
    return h


def leaves_of(hierarchy):
    ret = filter(lambda x: x.children == [], hierarchy.list.values())
    return list(ret)


def is_leaf(node_id, hierarchy):
    return hierarchy.list[node_id].children == []

def parents(node_id, hierarchy, levels=None):
    n = hierarchy.list[node_id]
    ret = ()
    while not n.parent == None:
        ret = ret + (n.parent, )
        n = n.parent
    ret = list(reversed(ret))
    if not levels:
        return ret
    else:
        return list(map(ret.__getitem__, levels))


**Read data**

In [63]:
data_frame = pandas.read_csv("/users/simonshapiro/hierarchies/data/NACE2.csv", sep=",")

**Show first few records**

In [64]:
data_frame.head()

Unnamed: 0,Order,Level,Code,Parent,Description,This item includes,This item also includes,Rulings,This item excludes,Reference to ISIC Rev. 4
0,398481,1,A,,"AGRICULTURE, FORESTRY AND FISHING",This section includes the exploitation of vege...,,,,A
1,398482,2,01,A,"Crop and animal production, hunting and relate...","This division includes two basic activities, n...",This division also includes service activities...,,Agricultural activities exclude any subsequent...,01
2,398483,3,01.1,01,Growing of non-perennial crops,This group includes the growing of non-perenni...,,,,011
3,398484,4,01.11,01.1,"Growing of cereals (except rice), leguminous c...",This class includes all forms of growing of ce...,,,"This class excludes:\n- growing of rice, see 0...",0111
4,398485,4,01.12,01.1,Growing of rice,This class includes:\n- growing of rice (inclu...,,,,0112


In [65]:
data_frame.tail()

Unnamed: 0,Order,Level,Code,Parent,Description,This item includes,This item also includes,Rulings,This item excludes,Reference to ISIC Rev. 4
991,399472,4,98.20,98.2,Undifferentiated service-producing activities ...,This class includes the undifferentiated subsi...,,,,9820
992,399473,1,U,,ACTIVITIES OF EXTRATERRITORIAL ORGANISATIONS A...,,,,,U
993,399474,2,99,U,Activities of extraterritorial organisations a...,,,,,99
994,399475,3,99.0,99,Activities of extraterritorial organisations a...,,,,,990
995,399476,4,99.00,99.0,Activities of extraterritorial organisations a...,This class includes:\n- activities of internat...,This class also includes:\n- activities of dip...,,,9900


In [66]:
data_frame.describe()

Unnamed: 0,Order,Level
count,996.0,996.0
mean,398978.5,3.486948
std,287.664735,0.744042
min,398481.0,1.0
25%,398729.75,3.0
50%,398978.5,4.0
75%,399227.25,4.0
max,399476.0,4.0


In [67]:
h_data = data_frame[['Code', 'Parent', 'Description']]

**Build the hierarcy**

In [68]:
h = Hierarchy()
for row in h_data.values:
    node = Node(row[0], row[2])  # Investigate a better approach here
    if pandas.isnull(row[1]): 
        parent = "ROOT"
    else: 
        parent = row[1]
    add_node(node=node, hierarchy=h, node_id=parent, )



In [69]:
for l in leaves_of(h):
    print(l.id, l.value, "-->", l.parent.value) 

01.11 Growing of cereals (except rice), leguminous crops and oil seeds --> Growing of non-perennial crops
01.12 Growing of rice --> Growing of non-perennial crops
01.13 Growing of vegetables and melons, roots and tubers --> Growing of non-perennial crops
01.14 Growing of sugar cane --> Growing of non-perennial crops
01.15 Growing of tobacco --> Growing of non-perennial crops
01.16 Growing of fibre crops --> Growing of non-perennial crops
01.19 Growing of other non-perennial crops --> Growing of non-perennial crops
01.21 Growing of grapes --> Growing of perennial crops
01.22 Growing of tropical and subtropical fruits --> Growing of perennial crops
01.23 Growing of citrus fruits --> Growing of perennial crops
01.24 Growing of pome fruits and stone fruits --> Growing of perennial crops
01.25 Growing of other tree and bush fruits and nuts --> Growing of perennial crops
01.26 Growing of oleaginous fruits --> Growing of perennial crops
01.27 Growing of beverage crops --> Growing of perennial

25.73 Manufacture of tools --> Manufacture of cutlery, tools and general hardware
25.91 Manufacture of steel drums and similar containers --> Manufacture of other fabricated metal products
25.92 Manufacture of light metal packaging  --> Manufacture of other fabricated metal products
25.93 Manufacture of wire products, chain and springs --> Manufacture of other fabricated metal products
25.94 Manufacture of fasteners and screw machine products --> Manufacture of other fabricated metal products
25.99 Manufacture of other fabricated metal products n.e.c. --> Manufacture of other fabricated metal products
26.11 Manufacture of electronic components --> Manufacture of electronic components and boards
26.12 Manufacture of loaded electronic boards --> Manufacture of electronic components and boards
26.20 Manufacture of computers and peripheral equipment --> Manufacture of computers and peripheral equipment
26.30 Manufacture of communication equipment --> Manufacture of communication equipment


In [70]:
list(map(lambda x: (x.id, x.value), h.list["ROOT"].children))

[('A', 'AGRICULTURE, FORESTRY AND FISHING'),
 ('B', 'MINING AND QUARRYING'),
 ('C', 'MANUFACTURING'),
 ('D', 'ELECTRICITY, GAS, STEAM AND AIR CONDITIONING SUPPLY'),
 ('E', 'WATER SUPPLY; SEWERAGE, WASTE MANAGEMENT AND REMEDIATION ACTIVITIES'),
 ('F', 'CONSTRUCTION'),
 ('G', 'WHOLESALE AND RETAIL TRADE; REPAIR OF MOTOR VEHICLES AND MOTORCYCLES'),
 ('H', 'TRANSPORTATION AND STORAGE'),
 ('I', 'ACCOMMODATION AND FOOD SERVICE ACTIVITIES'),
 ('J', 'INFORMATION AND COMMUNICATION'),
 ('K', 'FINANCIAL AND INSURANCE ACTIVITIES'),
 ('L', 'REAL ESTATE ACTIVITIES'),
 ('M', 'PROFESSIONAL, SCIENTIFIC AND TECHNICAL ACTIVITIES'),
 ('N', 'ADMINISTRATIVE AND SUPPORT SERVICE ACTIVITIES'),
 ('O', 'PUBLIC ADMINISTRATION AND DEFENCE; COMPULSORY SOCIAL SECURITY'),
 ('P', 'EDUCATION'),
 ('Q', 'HUMAN HEALTH AND SOCIAL WORK ACTIVITIES'),
 ('R', 'ARTS, ENTERTAINMENT AND RECREATION'),
 ('S', 'OTHER SERVICE ACTIVITIES'),
 ('T',
  'ACTIVITIES OF HOUSEHOLDS AS EMPLOYERS; UNDIFFERENTIATED GOODS- AND SERVICES-PRODUCING

In [81]:
print(h.list["94.91"].value)
list(map(lambda x: x.value,parents("94.91", h, (1, 2, 3))))

Activities of religious organisations


['OTHER SERVICE ACTIVITIES',
 'Activities of membership organisations',
 'Activities of other membership organisations']

**Turn hierarchy into networkx graph**

In [73]:
import networkx as nx
g = nx.Graph()
# add nodes
for n in h.list:
    g.add_node(h.list[n].id)
g.number_of_nodes()

997

In [74]:
len(h.list)

997

In [75]:
len(g.nodes())

997

In [76]:
for n in h.list:
    print(n)
    for c in h.list[n].children:
        g.add_edge(h.list[n].id, c.id)
g.number_of_edges()

ROOT
A
01
01.1
01.11
01.12
01.13
01.14
01.15
01.16
01.19
01.2
01.21
01.22
01.23
01.24
01.25
01.26
01.27
01.28
01.29
01.3
01.30
01.4
01.41
01.42
01.43
01.44
01.45
01.46
01.47
01.49
01.5
01.50
01.6
01.61
01.62
01.63
01.64
01.7
01.70
02
02.1
02.10
02.2
02.20
02.3
02.30
02.4
02.40
03
03.1
03.11
03.12
03.2
03.21
03.22
B
05
05.1
05.10
05.2
05.20
06
06.1
06.10
06.2
06.20
07
07.1
07.10
07.2
07.21
07.29
08
08.1
08.11
08.12
08.9
08.91
08.92
08.93
08.99
09
09.1
09.10
09.9
09.90
C
10
10.1
10.11
10.12
10.13
10.2
10.20
10.3
10.31
10.32
10.39
10.4
10.41
10.42
10.5
10.51
10.52
10.6
10.61
10.62
10.7
10.71
10.72
10.73
10.8
10.81
10.82
10.83
10.84
10.85
10.86
10.89
10.9
10.91
10.92
11
11.0
11.01
11.02
11.03
11.04
11.05
11.06
11.07
12
12.0
12.00
13
13.1
13.10
13.2
13.20
13.3
13.30
13.9
13.91
13.92
13.93
13.94
13.95
13.96
13.99
14
14.1
14.11
14.12
14.13
14.14
14.19
14.2
14.20
14.3
14.31
14.39
15
15.1
15.11
15.12
15.2
15.20
16
16.1
16.10
16.2
16.21
16.22
16.23
16.24
16.29
17
17.1
17.11
17.12
17.2
17.21
17.2

996

In [82]:
import networkx.drawing.nx_pydot as nxu
nxu.write_dot(g, 'herarchy.dot')
ga = nxu.to_agraph(g)
pos=nxu.graphviz_layout(ga, prog='dot')
nx.draw(ga, pos, with_labels=False, arrows=False)


AttributeError: module 'networkx.drawing.nx_pydot' has no attribute 'to_agraph'