# Uni structure cleanup #

In [2]:
import pandas as pd
import json
import pickle

In [3]:
org_hierarchy = pd.read_csv("org_hierarchy.csv")
org_key = pd.read_csv("org_key.csv")
staff_record = pd.read_pickle("kbs_staff_record.pickle")

In [4]:
def get_org_info(org_id):
    global org_key
    # -> organisation info
    r = org_key[org_key["ORGANISATION_CODE"]==org_id]
    fn = r.iloc[0]["FULL_NAME"]
    sn = r.iloc[0]["SHORT_NAME"]
    ot = r.iloc[0]["ORGANISATION_TYPE"].lower()
    if isinstance(r.iloc[0]["URL"], str):
        url = r.iloc[0]["URL"]
    else:
        url = ""
    return fn, sn, ot, url

def get_org_people(org_id):
    global staff_record
    r = staff_record[staff_record["ORGANISATION_CODE"].apply(lambda x: org_id in x)]
    people_id = list(r.index.values)
    full_names = []
    for i in staff_record[staff_record["ORGANISATION_CODE"].apply(lambda x: 'FREN' in x)][["FORENAME", "SURNAME"]].values:
        full_names.append((i[0], i[1]))
    titles = []
    for i in staff_record[staff_record["ORGANISATION_CODE"].apply(lambda x: 'FREN' in x)]["JOB_TITLE"].values:
        titles.append(i)
    return people_id, full_names, titles

In [5]:
# build composite dictionary to encode organisation structure
all_orgs = ['UNIV']
uni_structure = [['UNIV', 'UNIV']]
for i in org_hierarchy[["PARENT_ORG_CODE", "CHILD_ORG_CODE"]].values:
    uni_structure.append([i[0], i[1]])
    #
    all_orgs.append(i[0])
    all_orgs.append(i[1])
all_orgs = list(set(all_orgs))

In [6]:
# pass 1: create nodes dictionary
nodes = {}
for i in uni_structure:
    parent_id, id = i
    fn, sn, ot, url = get_org_info(id)
    pid, _, _ = get_org_people(id)
    nodes[id] = { 'name': id, 'full_name': fn, 'short_name': sn, 'url': url, 'type': ot, 'people': pid }
    fn, sn, ot, url = get_org_info(parent_id)
    pid, _, _ = get_org_people(parent_id)
    nodes[parent_id] = { 'name': parent_id, 'full_name': fn, 'short_name': sn, 'url': url, 'type': ot, 'people': pid }
# print len(nodes)

In [7]:
# pass 2: create trees and parent-child relations
forest = []
for i in uni_structure:
    parent_id, id = i
    node = nodes[id]

    # either make the node a new tree or link it to its parent
    if id == parent_id:
        # start a new tree in the forest
        forest.append(node)
    else:
        # add new_node as child to parent
        parent = nodes[parent_id]
        if not 'children' in parent:
            # ensure parent has a 'children' field
            parent['children'] = []
        children = parent['children']
        children.append(node)

In [8]:
with open('dendrogram/uni_structure.json', 'w') as fp:
    json.dump(forest[0], fp)

In [9]:
# Get exhaustive list of all peiple wihtin administaration unit
def get_all_people(node):
    if node.get("children") is not None:
        ch = node.get("people", [])[:]
        for i in node.get("children", []):
            ch += get_all_people(i)
        return ch
    else:
        return node.get("people", [])[:]

In [10]:
test = {
    "name": "0",
    "people": [1,2,3],
    "children": [
        {"name": "0.1", "people": [4,5,6]},
        {"name": "0.2", "people": [7,8], "children":[{"name":"0.2.1", "people":[9,10]}]}
    ]
}
print get_all_people(test)
print get_all_people(test["children"][1])

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[7, 8, 9, 10]


In [11]:
def get_exhaustive_people(node, exhaustive_people=None):
    if exhaustive_people is None:
        exhaustive_people = {}
    exhaustive_people[node.get("name")] = get_all_people(node)
    for i in node.get("children", []):
        exhaustive_people = get_exhaustive_people(i, exhaustive_people)
    return exhaustive_people

get_exhaustive_people(test)

{'0': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 '0.1': [4, 5, 6],
 '0.2': [7, 8, 9, 10],
 '0.2.1': [9, 10]}

In [12]:
ex_ppl = get_exhaustive_people(forest[0])

In [13]:
print len(ex_ppl)
print len(all_orgs)

448
448


In [14]:
facs = ["FENG", "FOAT", "FMVS", "FMDY", "FSSL", "FLAW", "FSOC", "FMED", "FSCI", "INST"]
facs_ppl = {}
for f in facs:
    facs_ppl[f] = ex_ppl[f]

In [15]:
with open("facs_ppl.pickle", 'w') as pf:
    pickle.dump(facs_ppl, pf)

In [16]:
def get_children(root, node_name, acad=False):
    if root.get("name", "") == node_name:
        ch = []
        for i in root.get("children", []):
            if acad:
                ch.append(i.get("name", ""))
                if "office" in i.get("type"):
                    print "\"%s\"," % i.get("name")
#                     ch.append(i.get("name", ""))
            else:
                ch.append(i.get("name", ""))
        return ch
    else:
        for i in root.get("children", []):
            cc = get_children(i, node_name, acad)
            if cc:
                return cc

get_children(test, "0.2")

['0.2.1']

In [17]:
schools = []
for i in facs:
    schools += get_children(forest[0], i)
schls_ppl = {}
for i in schools:
    schls_ppl[i] = ex_ppl[i]

with open("schls_ppl.pickle", 'w') as pf:
    pickle.dump(schls_ppl, pf)

In [18]:
# faculty->school # discard non-acad offices
fac_school = {}
for i in facs:
    fac_school[i] = get_children(forest[0], i, acad=True)
with open("fac_schls.pickle", 'w') as pf:
    pickle.dump(fac_school, pf)

"ENGF",
"ARTF",
"MVSF",
"MDYF",
"SSLF",
"LAWF",
"SOCF",
"MEDF",
"SCIF",


In [19]:
# len(org_hierarchy["PARENT_ORG_CODE"].values)
# len(list(set(org_hierarchy["PARENT_ORG_CODE"].values)))

# staff_record[staff_record["ORGANISATION_CODE"].apply(lambda x: "UNIV" in x)].index.values

# Back to the tree #

In [21]:
org_key
org_key.to_pickle("org_key_pickle.pickle")
# staff_record
# I need: id, name, parent, description

In [10]:
org_hierarchy[org_hierarchy["PARENT_ORG_CODE"] == "UNIV"]

Unnamed: 0,PARENT_ORG_CODE,CHILD_ORG_CODE
347,UNIV,SUPP
355,UNIV,ACAD
356,UNIV,CORP


In [23]:
# forest[0]
f = {}
# assign ids
start = 1
def assignid(d):
    global start, f
    if d["name"] not in f:
        f[d["name"]] = start
        start += 1
    for i in d.get("children", []):
        if i["name"] not in f:
            f[i["name"]] = start
            start += 1
    for i in d.get("children", []):
        assignid(i)
assignid(forest[0])

In [25]:
len(f)

448

In [26]:
f["UNIV"]

1

In [48]:
out = []
for _, j in org_hierarchy.iterrows():
    t = {}
    t["parent"] = f[j["PARENT_ORG_CODE"]]
#     print "parent:", f[j["PARENT_ORG_CODE"]]
    t["id"] = f[j["CHILD_ORG_CODE"]]
#     print "id:", f[j["CHILD_ORG_CODE"]]
    t["name"] = j["CHILD_ORG_CODE"]
#     print "name:", j["CHILD_ORG_CODE"]
    c = org_key[org_key["ORGANISATION_CODE"] == j["CHILD_ORG_CODE"]]
    t["description"] = c["ORGANISATION_TYPE"].values[0] + ": " + c["FULL_NAME"].values[0]
#     print "type:", c["ORGANISATION_TYPE"].values[0]
#     print "short name:", c["SHORT_NAME"].values[0]
#     print "full name:", c["FULL_NAME"].values[0]
#     print "url:", c["URL"].values[0]
    out.append(t)

In [49]:
out

[{'description': 'ADMIN UNIT: Senior Common Room',
  'id': 40,
  'name': 'SENR',
  'parent': 38},
 {'description': 'ADMIN UNIT: Management Information Services',
  'id': 152,
  'name': 'MISD',
  'parent': 151},
 {'description': 'ADMIN UNIT: Disability Services',
  'id': 178,
  'name': 'ADSI',
  'parent': 21},
 {'description': 'ADMIN UNIT: Langford Club',
  'id': 41,
  'name': 'LCLB',
  'parent': 38},
 {'description': 'ADMIN SUBUNIT: Overseas Liaison Office',
  'id': 174,
  'name': 'OSLO',
  'parent': 168},
 {'description': 'ADMIN UNIT: Print Services',
  'id': 142,
  'name': 'PRIN',
  'parent': 122},
 {'description': 'ADMIN SUBUNIT: Stationery Office',
  'id': 125,
  'name': 'STAT',
  'parent': 114},
 {'description': 'ACADEMIC OFFICE: Faculty of Medicine',
  'id': 394,
  'name': 'MEDF',
  'parent': 201},
 {'description': 'ADMIN UNIT: Public Relations Office',
  'id': 162,
  'name': 'INFO',
  'parent': 19},
 {'description': 'ACADEMIC SCHOOL: School of Experimental Psychology',
  'id': 4

## Academic units ##

In [59]:
# forest[0]
f = {}
# assign ids
start = 1
def assignid(d):
    global start, f
    if d["name"] not in f:
        f[d["name"]] = start
        start += 1
    for i in d.get("children", []):
        if i["name"] not in f:
            f[i["name"]] = start
            start += 1
    for i in d.get("children", []):
        assignid(i)

for i in forest[0]["children"]:
    if i["name"] == "ACAD":
        break

assignid(i)
# print f
# break

out = []
for _, j in org_hierarchy.iterrows():
    if j["CHILD_ORG_CODE"] not in f:
        continue
    if j["PARENT_ORG_CODE"] == "UNIV":
        continue
    t = {}
    t["parent"] = f[j["PARENT_ORG_CODE"]]
#     print "parent:", f[j["PARENT_ORG_CODE"]]
    t["id"] = f[j["CHILD_ORG_CODE"]]
#     print "id:", f[j["CHILD_ORG_CODE"]]
    t["name"] = j["CHILD_ORG_CODE"]
#     print "name:", j["CHILD_ORG_CODE"]
    c = org_key[org_key["ORGANISATION_CODE"] == j["CHILD_ORG_CODE"]]
    t["description"] = c["ORGANISATION_TYPE"].values[0] + ": " + c["FULL_NAME"].values[0]
#     print "type:", c["ORGANISATION_TYPE"].values[0]
#     print "short name:", c["SHORT_NAME"].values[0]
#     print "full name:", c["FULL_NAME"].values[0]
#     print "url:", c["URL"].values[0]
    out.append(t)

In [60]:
out

[{'description': 'ACADEMIC OFFICE: Faculty of Medicine',
  'id': 202,
  'name': 'MEDF',
  'parent': 9},
 {'description': 'ACADEMIC SCHOOL: School of Experimental Psychology',
  'id': 215,
  'name': 'PSYC',
  'parent': 10},
 {'description': 'ACADEMIC UNIT: Veterinary General',
  'id': 89,
  'name': 'VGEN',
  'parent': 82},
 {'description': 'ACADEMIC UNIT: Oral Medicine, Pathology and Microbiology',
  'id': 98,
  'name': 'OMED',
  'parent': 83},
 {'description': 'ACADEMIC UNIT: Interface Analysis Centre',
  'id': 240,
  'name': 'IFAC',
  'parent': 223},
 {'description': 'ACADEMIC SCHOOL: School of Geographical Sciences',
  'id': 216,
  'name': 'GEOG',
  'parent': 10},
 {'description': 'ACADEMIC SCHOOL: School of Earth Sciences',
  'id': 217,
  'name': 'GELY',
  'parent': 10},
 {'description': 'ACADEMIC DEPARTMENT: Department of English',
  'id': 58,
  'name': 'ENGL',
  'parent': 34},
 {'description': 'ACADEMIC SCHOOL: School for Policy Studies',
  'id': 155,
  'name': 'SPOL',
  'parent':