In [2]:
import copy
import json
import pickle
import sys
sys.setrecursionlimit(10000)
sys.path.insert(1, "../")
import time
import warnings

import pandas as pd

from ete3 import Tree

from utils.plot import plot_tree

In [36]:
DEFAULT_PARAMS = {
    'n_g_min': 10,
    'alpha': 0.6,
    'rho_f_max': 0.25,
    'rho_g_w': 0.6,
    'major_prop_min': 0.8,
    'minor_show_max': 3,
    'global_prop_indicate': {
        1.0: '*',
        0.9: '++',
        0.8: '+'
    }
}

In [38]:
class Partition:
    def __init__(self, refs, root_node, params=DEFAULT_PARAMS):
        self.refs = {
            'tree': refs['tree'],
            'grp_dist': refs['grp_dist']
        }
        self.actives = {'tree': root_node} ## need to create actives first to call on calc_grp_dist()
        self.actives['grp_dist'] = self.calc_grp_dist()
        self.count = len(root_node.get_leaves())
        self.is_candidate = True
        self.params = params
        self.set_name_partition()

        self.tree_up = root_node.up.name if root_node.up else None
        self.tree_up_dist = root_node.dist if root_node.up else 0


    def validate(self):
        ref_root_node = self.refs['tree'].search_nodes(name=self.actives['tree'].name)[0]
        ref_root_anc_node = ref_root_node.up if not ref_root_node.is_root() else None
        self.actives['tree'] = ref_root_node.detach()
        self.set_name_partition()
        self.update_ref_grp_dist()
        self.is_candidate = False

        ## strip remaining ref_tree of naked internal nodes
        if ref_root_anc_node:
            for leaf in ref_root_anc_node.iter_leaves():
                curr = leaf
                while curr.is_leaf() and not curr.is_root() and not hasattr(curr, 'grp'):
                    prev = curr
                    curr = curr.up
                    prev.detach()


    def is_root(self):
        return self.actives['tree'].is_root()


    def get_leaves(self, grp_name=None, count_only=False, n_g_min_aware=False):
        sf_leaves = sorted(
            list(
                filter(
                    lambda leaf: (not grp_name or leaf.grp == grp_name) and \
                                (not n_g_min_aware or self.refs['grp_dist'][leaf.grp]['count'] >= self.params['n_g_min']), \
                                self.actives['tree'].get_leaves()
                )
            ),
            key=lambda leaf: leaf.depth,
            reverse=True
        )

        return (len(sf_leaves) if count_only else sf_leaves)


    def calc_grp_dist(self):
        grp_dist = {}
        for leaf in self.actives['tree'].iter_leaves():
            grp = leaf.grp
            depth = leaf.depth
            if grp in grp_dist:
                grp_dist[grp]['count'] += 1
                if depth > leaf.depth:
                    grp_dist[grp]['depth'] = leaf.depth
            else:
                grp_dist[grp] = { 'count': 1, 'depth': leaf.depth }

        return grp_dist


    ## get majority grp (for naming)
    def get_majority_grp(self):
        sorted_grp_dist = self.sort_grp_dist(sort_by='size')
        return sorted_grp_dist[0]


    ## get sorted grp dist
    def sort_grp_dist(self, ref=False, sort_by='depth', n_g_min_aware=False):
        grp_dist = self.refs['grp_dist'] if ref else self.actives['grp_dist']
        pre_sort = [
            {
                'name': k,
                'count': v['count'],
                'depth': v['depth']
            } for k, v in grp_dist.items()
        ]

        if n_g_min_aware:
            pre_sort = list(filter(lambda grp: grp['count'] >= self.params['n_g_min'], pre_sort))

        sorted_by_size = sorted(pre_sort, key=lambda grp: grp['count'], reverse=sort_by == 'size')
        if sort_by == 'size':
            return sorted_by_size
        else:
            sorted_by_depth = sorted(sorted_by_size, key=lambda grp: grp['depth'], reverse=True)
            return sorted_by_depth


    ## get labelling name
    def set_name_partition(self):

        def modify_grp_name(grp):
            global_prop = grp['count']/tree_grp_dist[grp['name']]['count']
            for thres in self.params['global_prop_indicate'].keys():
                if global_prop >= thres:
                    return (grp['name'] + self.params['global_prop_indicate'][thres])

            return grp['name']

        sorted_grp_dist = self.sort_grp_dist(sort_by='size')

        is_single_majority = sorted_grp_dist[0]['count']/self.count >= self.params['major_prop_min']
        majority_grps = []
        minority_grps = []
        for grp in sorted_grp_dist:
            grp_prop = grp['count']/self.count
            if grp_prop >= self.params['major_prop_min']:
                majority_grps.append(grp)
            elif grp_prop < (1 - self.params['major_prop_min']):
                minority_grps.append(grp)
            else:
                if is_single_majority:
                    minority_grps.append(grp)
                else:
                    majority_grps.append(grp)

        majority_label = '/'.join([modify_grp_name(grp) for grp in majority_grps]) + \
                        ('/' if not is_single_majority and len(majority_grps) == 1 else '')
        minority_label = '(%s%s%s)' % ('/'.join([modify_grp_name(grp) for grp in minority_grps[:self.params['minor_show_max']]]), \
                        ('/...' if len(minority_grps) > self.params['minor_show_max'] else ''), \
                        ('[n=%d]' % len(minority_grps) if len(minority_grps) > self.params['minor_show_max'] else ''))

        self.name = (majority_label + (minority_label if len(minority_grps) else ''))


    def update_ref_grp_dist(self):
        for k, v in self.actives['grp_dist'].items():
            self.refs['grp_dist'][k]['count'] -= v['count']
            if self.refs['grp_dist'][k]['count'] == 0:
                del self.refs['grp_dist'][k]


    def calc_grp_obj(self, grp_name):
        n_g = self.actives['grp_dist'][grp_name]['count']
        n_c = self.get_leaves(count_only=True, n_g_min_aware=True)
        N_g = self.refs['grp_dist'][grp_name]['count']
        rho_c = n_g/n_c
        rho_g = n_g/N_g

        g_thres = n_g >= self.params['n_g_min']
        f_thres = (n_c - n_g) <= n_c*self.params['rho_f_max']*(1+rho_g*self.params['rho_g_w'])
        obj = self.params['alpha']*rho_c + (1 - self.params['alpha'])*rho_g

#         print(g_thres, n_g, self.params['n_g_min'])
#         print(f_thres, (n_c - n_g)/n_c, self.params['rho_f_max']*(1+rho_g*self.params['rho_g_w']), obj)

        return (g_thres and f_thres, obj)


    def find_partition(self, grp_name, node=None, starting_leaf=None):

        ## get farthest leaf in grp of interest
        root_node = node if node else self.refs['tree']
        grp_leaves = root_node.search_nodes(grp=grp_name)
        grp_mrca = root_node.get_common_ancestor(grp_leaves)
        depth_sorted_leaves = sorted(grp_leaves, key=lambda leaf: leaf.depth, reverse=True)

        ## start from deepest leaf, if no valid partition can be found then move onto to next deepest leaf
        iter_n = 10
        for i in range(min(iter_n, len(depth_sorted_leaves))):
            target_leaf = starting_leaf if starting_leaf else depth_sorted_leaves[i]

            ## evaluate obj as the path to the MRCA is traversed
            curr = target_leaf.up
            max_obj = -999
            max_obj_partition = None

            while True:
                curr_partition = Partition(self.refs, curr)
                pass_obj = curr_partition.calc_grp_obj(grp_name)

                if pass_obj[0] and pass_obj[1] >= max_obj:
                    max_obj_partition = curr_partition
                    max_obj_partition.obj = pass_obj[1]
                    max_obj = pass_obj[1]

                if curr.name == grp_mrca.name:
                    break
                else:
                    curr = curr.up

            if starting_leaf or max_obj_partition:
                return max_obj_partition

        return None


    def check_foreign_partition(self, focus_grp):

        fgn_sorted_grps = filter(lambda grp: grp['name'] != focus_grp,
                                 self.sort_grp_dist(sort_by='depth', n_g_min_aware=True))
        for grp in fgn_sorted_grps:

            mono_leaves = map(lambda mono: get_top_leaf(mono),
                                 self.actives['tree'].get_monophyletic(values=[grp['name']], target_attr='grp'))

            for leaf in mono_leaves:
                fgn_candidate = self.find_partition(grp['name'],
                                                    node=self.actives['tree'],
                                                    starting_leaf=leaf)

                if fgn_candidate:
                    return (fgn_candidate, grp['name'])

## Functions

In [184]:
## function to reduce PANGO lvl
def lin_reduce(lin, lvl=3):
    return '.'.join(lin.split('.')[:lvl])

def get_top_leaf(tree):
    if tree.is_leaf():
        return tree

    for node in tree.iter_descendants('levelorder'):
        if node.is_leaf():
            return node

## Load data

In [58]:
tree_path = "/home/nclow23/downloads/04092023_final_pruned_no_usher_chronumental_consensus_2021_tree.tree"

label_path = "/home/nclow23/downloads/04092023_taxonium_viewing_metadata.tsv"

# Load tree
tree = Tree(tree_path, format=1)

# Load metadata
grping_df = pd.read_csv(label_path, sep="\t")

# neil IFNDEF 04.09
grping_df["seqName"] = grping_df[" strain"]

grping_df["new"] = grping_df.scorpio_call.fillna("Other").apply(
    lambda x: x.replace(" (", "_").replace("-", "_").replace(")", "").replace(" ", "_") # ugly
)

print(grping_df.shape)
# neil ENDIF 04.09

grping = dict(zip(grping_df.seqName.values, [lin_reduce(x) for x in grping_df.new.values]))

(293385, 6)


In [59]:
tree_copy = tree.copy('newick-extended')
tree_grp_dist = {}

for node in tree_copy.traverse():
    depth = tree_copy.get_distance(node, topology_only=True)
    if node.is_leaf():
        leaf_grp = grping[node.name]
        node.add_features(grp=leaf_grp, depth=depth)
        if leaf_grp in tree_grp_dist:
            tree_grp_dist[leaf_grp]['count'] += 1
            if tree_grp_dist[leaf_grp]['depth'] > depth: tree_grp_dist[leaf_grp]['depth']  = depth
        else:
            tree_grp_dist[leaf_grp] = {
                'count': 1,
                'depth': depth
            }
    else:
        node.add_features(depth=depth)

print('Taxa count: %d' % len(tree_copy.get_leaves()))

Taxa count: 291791


In [61]:
refs = {
    'tree': tree_copy.copy('newick-extended'),
    'grp_dist': copy.deepcopy(tree_grp_dist)
}
ref_partition = Partition(refs, tree_copy.copy('newick-extended'))

In [64]:
## main program starts here
partitions = []

start_time = time.time()
while True:
    print('Calculating most updated grp distribution...')
    depth_sorted_grps = ref_partition.sort_grp_dist(ref=True, sort_by='depth', n_g_min_aware=True)

    remaining_grp_exists = len(depth_sorted_grps)
    for grp in depth_sorted_grps:

        partition_stored = False
        focus_grp = grp['name']

        print(focus_grp)

        partition_candidate = ref_partition.find_partition(focus_grp)

        while partition_candidate:

            print('focus_grp: ', focus_grp)
            fgn_candidate = partition_candidate.check_foreign_partition(focus_grp)

            if fgn_candidate:
                partition_candidate, focus_grp = fgn_candidate

                continue

            else:
                partition_candidate.validate()
                partitions.append(partition_candidate)
                partition_stored = True

                print('\n%d' % (len(partitions) + 1))
                print(partition_candidate.name)
                print(partition_candidate.actives['tree'].name)
                print(partition_candidate.actives['grp_dist'])

                break

        if partition_stored:
            break ## break out to update global group distribution

    else:
        if remaining_grp_exists:
            continue
        else:
            break

print(time.time() - start_time)

Calculating most updated grp distribution...
Delta_AY.4.2_like
focus_grp:  Delta_AY.4.2_like

2
Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.617/Other)
node_153081
{'Delta_AY.4.2_like': {'count': 4575, 'depth': '130.0'}, 'Delta_AY.4_like': {'count': 63, 'depth': '115.0'}, 'Delta_B.1.617': {'count': 13, 'depth': '128.0'}, 'Other': {'count': 3, 'depth': '153.0'}}
Calculating most updated grp distribution...
Alpha_B.1.1
focus_grp:  Alpha_B.1.1
focus_grp:  Other

3
Other(Alpha_B.1.1)
node_267105
{'Alpha_B.1.1': {'count': 1, 'depth': '133.0'}, 'Other': {'count': 29, 'depth': '133.0'}}
Calculating most updated grp distribution...
Alpha_B.1.1
focus_grp:  Alpha_B.1.1
focus_grp:  Other

4
Other
node_283778
{'Other': {'count': 26, 'depth': '70.0'}}
Calculating most updated grp distribution...
Alpha_B.1.1
focus_grp:  Alpha_B.1.1
focus_grp:  B.1.1

5
B.1.1++(Alpha_B.1.1)
node_251669
{'B.1.1': {'count': 349, 'depth': '116.0'}, 'Alpha_B.1.1': {'count': 7, 'depth': '124.0'}}
Calculating most updated 

In [86]:
dir(p)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'actives',
 'calc_grp_dist',
 'calc_grp_obj',
 'check_foreign_partition',
 'count',
 'find_partition',
 'get_leaves',
 'get_majority_grp',
 'is_candidate',
 'is_root',
 'name',
 'obj',
 'params',
 'refs',
 'set_name_partition',
 'sort_grp_dist',
 'tree_up',
 'tree_up_dist',
 'update_ref_grp_dist',
 'validate']

In [65]:
## rename partitions to ensure uniqueness
partition_names = []

count = 0
print('#partitions: %d\n' % len(partitions))
for i, p in enumerate(partitions):
    count += p.count
    name_count = partition_names.count(p.name)
    partition_names.append(p.name)
    if name_count:
        p.name = '%s_%d' % (p.name, name_count)
    print(i+1)
    print('%s\n%s' % (p.name, str(p.actives['grp_dist'])))
    print('\n')

print('#leaves: %d' % count)

#partitions: 32

1
Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.617/Other)
{'Delta_AY.4.2_like': {'count': 4575, 'depth': '130.0'}, 'Delta_AY.4_like': {'count': 63, 'depth': '115.0'}, 'Delta_B.1.617': {'count': 13, 'depth': '128.0'}, 'Other': {'count': 3, 'depth': '153.0'}}


2
Other(Alpha_B.1.1)
{'Alpha_B.1.1': {'count': 1, 'depth': '133.0'}, 'Other': {'count': 29, 'depth': '133.0'}}


3
Other
{'Other': {'count': 26, 'depth': '70.0'}}


4
B.1.1++(Alpha_B.1.1)
{'B.1.1': {'count': 349, 'depth': '116.0'}, 'Alpha_B.1.1': {'count': 7, 'depth': '124.0'}}


5
B.1.1
{'B.1.1': {'count': 11, 'depth': '105.0'}}


6
Alpha_B.1.1++(Other/B.1.1)
{'Other': {'count': 252, 'depth': '68.0'}, 'Alpha_B.1.1': {'count': 69788, 'depth': '67.0'}, 'B.1.1': {'count': 8, 'depth': '119.0'}}


7
Omicron_BA.5_like*
{'Omicron_BA.5_like': {'count': 13, 'depth': '59.0'}}


8
Delta_B.1.617(Delta_AY.4_like)
{'Delta_B.1.617': {'count': 14, 'depth': '121.0'}, 'Delta_AY.4_like': {'count': 2, 'depth': '120.0'}}


9
Delta_B.

In [272]:
## need empty partition as root partition
root_partition_node = Tree(name='root', format=1)

i_nodes = {}
p_nodes = {}
counts = {}
for p in partitions:
    temp_t = Tree(name=p.name, format=1)
    temp_t.dist = p.tree_up_dist
    temp_t.add_features(p=p)
    p_nodes[p] = temp_t
    counts[p.name] = p.count

for i, (p, n) in enumerate(p_nodes.items()):
    # neil IFNDEF 04.09
    print(f"Step {i}/{len(p_nodes)}: {p.name}")
    try:
    # neil ENDIF 04.09
        curr_up = tree_copy.search_nodes(name=p.tree_up)[0]
        curr_p_tree = n
        anc_found = False
        while not anc_found:
            for pp, nn in p_nodes.items():
                if curr_up.is_root():
                    root_partition_node.add_child(curr_p_tree)
                    anc_found = True
                    print("curr_up is root")
                    break
                elif curr_up.name in i_nodes:
                    i_nodes[curr_up.name] = i_nodes[curr_up.name].add_child(curr_p_tree)
                    anc_found = True
                    print("curr_up in i_nodes")
                    break
                # neil IFNDEF 04.09
                # if I comment this out, I get a tree, if I don't I get nothing
                # Not sure why though?
                # elif pp.actives['tree'].search_nodes(name=curr_up.name):
                #     # print(curr_up.name)
                #     nn.add_child(curr_p_tree)
                #     # anc_found = True
                #     print("curr_up found in pp.actives.tree")
                #     # break
                # neil ENDIF 04.09

            if not anc_found:
                anc_part = Tree(name=curr_up.name, format=1)
                anc_part.add_child(curr_p_tree)
                curr_p_tree = anc_part
                i_nodes[curr_p_tree.name] = curr_p_tree

            if not curr_up.is_root():
                curr_up = curr_up.up
    # neil IFNDEF 04.09: add a warning for errors
    except IndexError as err:
        warnings.warn(f"Could not process partition with name {p.name}: {err=}", UserWarning)
    # neil ENDIF 04.09


Step 0/32: Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.617/Other)
curr_up is root
Step 1/32: Other(Alpha_B.1.1)
curr_up in i_nodes
Step 2/32: Other
curr_up in i_nodes
Step 3/32: B.1.1++(Alpha_B.1.1)
curr_up in i_nodes
Step 4/32: B.1.1
curr_up in i_nodes
Step 5/32: Alpha_B.1.1++(Other/B.1.1)
curr_up in i_nodes
Step 6/32: Omicron_BA.5_like*
curr_up in i_nodes
Step 7/32: Delta_B.1.617(Delta_AY.4_like)
curr_up in i_nodes
Step 8/32: Delta_B.1.617
curr_up in i_nodes
Step 9/32: Delta_AY.4_like++(Delta_B.1.617/Other)
curr_up in i_nodes
Step 10/32: Delta_AY.4_like
curr_up in i_nodes
Step 11/32: Delta_AY.4_like(Delta_B.1.617)
curr_up in i_nodes
Step 12/32: Omicron_Unassigned(Omicron_BA.1_like)
curr_up in i_nodes
Step 13/32: Omicron_BA.1_like++(Probable_Omicron_BA.1_like/Omicron_Unassigned+/Probable_Omicron_Unassigned/...[n=4])
curr_up in i_nodes
Step 14/32: Omicron_BA.1_like/(Probable_Omicron_BA.1_like/Probable_Omicron_Unassigned/Omicron_Unassigned)
curr_up in i_nodes
Step 15/32: Omicron_BA.1_l


Could not process partition with name Other++(Probable_Omicron_Unassigned/Omicron_BA.1_like/Probable_Omicron_BA.1_like/...[n=10]): err=IndexError('list index out of range')



In [274]:
partition_dict = {p.name: p for p in partitions}

for leaf in root_partition_node.get_leaves():
    # neil IFNDEF 04.09: add p feature
    leaf.add_features(p=partition_dict[leaf.name])
    leaf.name = f'{leaf.name}|N={counts[leaf.name]}'
    # neil ENDIF 04.09

In [275]:
fig = plot_tree(root_partition_node.write(format=5), style="plotly")

fig.write_html("root_partition_node.html")

In [223]:
with open("root_partition_node.nwk", "w") as f:
    f.write(root_partition_node.write())

In [210]:
print(root_partition_node.get_ascii(show_internal=False, compact=True))


                                                                                                                                                                                                                                                            /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /-Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.617/Other)|N=4654
                                                                                                                                                                                                                           /- /- /- /- /- /- /- /- /- /- /-|                                 \- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /- /-Delta_B.1.617|N=30
                                                                                                                                                                                       /- /- /- /- /- /- /- /- /- /- /- /-|                                 \- /- /

## Label all nodes by partition

In [224]:
node_partition = {}
for p in partitions:
    for node in p.actives['tree'].traverse():
        node_partition[node.name] = p.name

In [247]:
pd.Series(node_partition).to_frame(
    name="partition"
).reset_index().rename(
    columns={"index": "node"}
).to_csv(
    "node_partition.tsv", sep="\t",
    index=False
)

In [248]:
pd.read_csv("node_partition.tsv", sep="\t")

Unnamed: 0,node,partition
0,node_153081,Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.6...
1,node_153082,Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.6...
2,node_156902,Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.6...
3,node_153083,Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.6...
4,node_156898,Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.6...
...,...,...
583575,sequence_231508,Other++(Probable_Omicron_Unassigned/Omicron_BA...
583576,node_8104,Other++(Probable_Omicron_Unassigned/Omicron_BA...
583577,sequence_229222,Other++(Probable_Omicron_Unassigned/Omicron_BA...
583578,sequence_227984,Other++(Probable_Omicron_Unassigned/Omicron_BA...


## Deconstruct partition-tree into nodes/links
* WTF is part_tree?

In [281]:
part_nodes = [
    {
        'id': node.name,
        'num': node.p.count if node.is_leaf() else None, # neil 04.09
        'type': 3,
        'divs': [
            {
                'label': k,
                'num': v['count']
            } for k, v in node.p.actives['grp_dist'].items()
        ] if node.is_leaf() else None # neil 04.09
    } for node in root_partition_node.traverse()
]

In [282]:
part_links = [
    {
        'id': '%s_%s' % (node.up.name if node.up else '', node.name),
        'source': node.up.name if node.up else '',
        'target': node.name,
        'len': node.dist,
    } for node in root_partition_node.traverse()
]
part_links = list(filter(lambda link: link['source'], part_links))

### Get group distribution

In [250]:
with open('partition_summary.tsv', 'w+') as outfile:
    for p in partitions:
        grp_dist = p.actives['grp_dist']
        total_count = sum([v['count'] for v in grp_dist.values()])
        outfile.write('%s (root: %s)\n' % (p.name, p.actives['tree'].name))
        for grp, v in grp_dist.items():
            outfile.write('%s (n=%d (%.2f%%))\n' % (grp, v['count'], (v['count']*100/total_count)))
        outfile.write('\n')

In [252]:
pd.read_csv("partition_summary.tsv", sep="\t")

Unnamed: 0,Delta_AY.4.2_like*(Delta_AY.4_like/Delta_B.1.617/Other) (root: node_153081)
0,Delta_AY.4.2_like (n=4575 (98.30%))
1,Delta_AY.4_like (n=63 (1.35%))
2,Delta_B.1.617 (n=13 (0.28%))
3,Other (n=3 (0.06%))
4,Other(Alpha_B.1.1) (root: node_267105)
...,...
107,Omicron_BA.3_like (n=1 (0.00%))
108,Alpha_B.1.1 (n=3 (0.01%))
109,Eta_B.1.525_like (n=1 (0.00%))
110,Iota_B.1.526_like (n=9 (0.04%))


### Write to file

In [283]:
with open('nodes.json', 'w+') as outfile:
    outfile.write(json.dumps(part_nodes))

In [265]:
with open('links.json', 'w+') as outfile:
    outfile.write(json.dumps(part_links))

In [284]:
nodes_links = {
    'nodes': part_nodes,
    'links': part_links
}
with open('nodes_links.json', 'w+') as outfile:
    outfile.write(json.dumps(nodes_links))

## Save partitions

In [301]:
for i, p in enumerate(partitions):
    print(f"Partition {i+1}/{len(partitions)}")
    with open(f"partitions/partition{i}.pickle", "wb") as f:
        pickle.dump(p, f)

Partition 1/32
Partition 2/32
Partition 3/32
Partition 4/32
Partition 5/32
Partition 6/32
Partition 7/32
Partition 8/32
Partition 9/32
Partition 10/32
Partition 11/32
Partition 12/32
Partition 13/32
Partition 14/32
Partition 15/32
Partition 16/32
Partition 17/32
Partition 18/32
Partition 19/32
Partition 20/32
Partition 21/32
Partition 22/32
Partition 23/32
Partition 24/32
Partition 25/32
Partition 26/32
Partition 27/32
Partition 28/32
Partition 29/32
Partition 30/32
Partition 31/32
Partition 32/32
