In [1]:
# !pip install pydot
# !pip install keras
# !pip install networkx
# !pip install scipy
# !pip install tensorflow-macos

In [1]:
import math

from keras.applications import *
import networkx as nx
import numpy as np
import scipy
from itertools import combinations
from tensorflow import keras
import tensorflow as tf
from typing import Tuple, List, Dict

In [18]:
class Partitioner:
    def __init__(self, model: keras.Model):
        self.model = model
        self.Stack = []
        self.visited = {}
        # The "depth"/level that a certain layer is at
        self.layer_level = {}
        # The layers at a certain depth/level, where the index of the array is the level
        self.levels = []

    # Constructs model using shape of start layer as the input (doesn't include start layer in the model)
    def _construct_model(self, start, end, part_name="part_begin"):
        inpt = keras.Input(tensor=self.model.get_layer(start).output, name=part_name)
        outpt = self.model.get_layer(end).output
        part = keras.Model(inputs=inpt, outputs=outpt)
        return part

    def construct_models(self, model: keras.Model, num_nodes: int, num_classes: int, node_capacity: int, G_c: nx.Graph):
        partitioner = Partitioner(model)
        part_pts = partitioner.find_partitions()
        transfers = partitioner.find_partition_transfer_size(part_pts)

        partition_mems = partitioner.find_partition_memory(part_pts)
        partitions, node_arrangement = partition_and_place(num_nodes, node_capacity, G_c, num_classes, part_pts, transfers, partition_mems)

        constructed_models = []
        i = 0

        # Ignore the dispatcher "partition"
        for p in partitions[1:]:
            # Model input
            if p[0] == 0:
                start_layer = part_pts[0]
            else:
                # _construct_model() uses an exclusive start layer but inclusive end layer
                start_layer = part_pts[p[0]-1]

            # Model output
            if p[1] == len(part_pts):
                end_layer = part_pts[-1]
            else:
                # End layer of partition in graph is exclusive, so need to subtract one from end layer index
                # to use with _construct_model(), which has inclusive end layer
                end_layer = part_pts[p[1]-1]

            print(f"Partition {i}: ({start_layer}, {end_layer})")

            model = self._construct_model(start_layer, end_layer, part_name=f"part_{i}")
            constructed_models.append(model)
            print("Partition constructed")
            i += 1

        return node_arrangement, constructed_models


    # A recursive function used by longest_path. See below
    # link for details
    # https:#www.geeksforgeeks.org/topological-sorting/
    def topological_sort_util(self, v: str):
        self.visited[v] = True

        # Recur for all the vertices adjacent to this vertex
        # list<AdjListNode>::iterator i
        for i in self.get_next(v):
            if not self.visited[i]:
                self.topological_sort_util(i)

        # Push current vertex to stack which stores topological
        # sort
        self.Stack.append(v)

    # The function to find longest distances from a given vertex.
    # It uses recursive topologicalSortUtil() to get topological
    # sorting.
    def longest_path(self, s: str) -> List[List[str]]:
        for l in self.model.layers:
            self.visited[l.name] = False
            self.layer_level[l.name] = -1 # Equal to -infty

        # Call the recursive helper function to store Topological
        # Sort starting from all vertices one by one
        for l in self.model.layers:
            if not self.visited[l.name]:
                self.topological_sort_util(l.name)

        # Initialize distances to all vertices as infinite and
        # distance to source as 0
        self.layer_level[s] = 0

        # Process vertices in topological order
        while len(self.Stack) > 0:

            # Get the next vertex from topological order
            u = self.Stack.pop()

            # Update distances of all adjacent vertices
            # list<AdjListNode>::iterator i
            if self.layer_level[u] != -1:
                for i in self.get_next(u):
                    if self.layer_level[i] < self.layer_level[u] + 1:
                        self.layer_level[i] = self.layer_level[u] + 1 # Each edge weighted 1

        # Create array of calculated longest distances to layer
        layers_at_level = [[]] * len(self.layer_level)
        for l in self.model.layers:
            if len(layers_at_level[self.layer_level[l.name]]) == 0:
                layers_at_level[self.layer_level[l.name]] = []

            layers_at_level[self.layer_level[l.name]].append(l.name)

        return layers_at_level

    def find_singletons(self):
        # Model only has 1 input, which is input_names[0]
        name = self.model.input_names[0]
        # Finding the longest path from the start to every other layer
        self.levels = self.longest_path(name)
        singletons = []
        for l in range(len(self.levels)):
            if len(self.levels[l]) == 1:
                singletons.append(self.levels[l][0])
        return singletons

    def find_all_paths_util(self, u, d, visited, path, all_paths):
        # If the distance of the current path is greater than the longest path (the "level") to the destination node, we know the destination node can't be a partition point
        if self.layer_level[u] > self.layer_level[d]:
            return False
        # Mark the current node as visited and store in path
        visited[u] = True
        path.append(u)

        # If current vertex is same as destination, then print
        # current path[] (because we've found a path from u to d)
        if u == d:
            exists = False
            # See if path already exists in list of paths
            for p in all_paths:
                if p == path:
                    exists = True
                    break

            if not exists:
                all_paths.append(path.copy())
        else:
            # If current vertex is not destination
            # Recur for all the vertices adjacent to this vertex
            for i in self.get_next(u):
                if not visited[i]:
                    ret = self.find_all_paths_util(i, d, visited, path, all_paths)
                    if not ret:
                        return False

        # Remove current vertex from path[] and mark it as unvisited
        path.pop()
        visited[u] = False
        return True

    # Finds all paths from 's' to 'd.' Returns false if a there exists a path from s that has a greater "level" than d, otherwise returns true
    def find_all_paths(self, s, d) -> bool:
        # Mark all the vertices as not visited
        visited = {}
        for l in self.model.layers:
            visited[l.name] = False

        # Create an array to store paths
        path = []
        all_paths = []

        # Call the recursive helper function to find all paths
        return self.find_all_paths_util(s, d, visited, path, all_paths)

    def partitions_util(self, prev, singleton_nodes, partitions):
        # Reached the end of the model and found all the partitions
        if len(singleton_nodes) == 0:
            return partitions
        p = False
        i = -1 # So first i starts at 0
        # Starting from the previous partition point, we iterate through all the subsequent singleton nodes to find the next partition point
        while not p:
            i += 1
            p = self.find_all_paths(prev, singleton_nodes[i])

        partitions.append(singleton_nodes[i])
        return self.partitions_util(singleton_nodes[i], singleton_nodes[i + 1:], partitions)

    def find_partitions(self) -> List[str]:
        inpt = self.model.input_names[0]
        return self.partitions_util(inpt, self.find_singletons(), [])

    def keras_model_memory_usage_in_bytes(self, model, batch_size: int):
        """
        Return the estimated memory usage of a given Keras model in bytes.
        This includes the model weights and layers, but excludes the dataset.

        The model shapes are multiplied by the batch size, but the weights are not.

        Args:
            model: A Keras model.
            batch_size: The batch size you intend to run the model with. If you
                have already specified the batch size in the model itself, then
                pass `1` as the argument here.
        Returns:
            An estimate of the Keras model's memory usage in bytes.

        """
        default_dtype = tf.keras.backend.floatx()
        shapes_mem_count = 0
        internal_model_mem_count = 0
        for layer in model.layers:
            if isinstance(layer, tf.keras.Model):
                internal_model_mem_count += self.keras_model_memory_usage_in_bytes(
                    layer, batch_size=batch_size
                )
            single_layer_mem = tf.as_dtype(layer.dtype or default_dtype).size
            out_shape = layer.output_shape
            if isinstance(out_shape, list):
                out_shape = out_shape[0]
            for s in out_shape:
                if s is None:
                    continue
                single_layer_mem *= s
            shapes_mem_count += single_layer_mem

        trainable_count = sum(
            [tf.keras.backend.count_params(p) for p in model.trainable_weights]
        )
        non_trainable_count = sum(
            [tf.keras.backend.count_params(p) for p in model.non_trainable_weights]
        )

        total_memory = (
                batch_size * shapes_mem_count
                + internal_model_mem_count
                + trainable_count
                + non_trainable_count
        )
        return total_memory

    def keras_layer_memory(self, layer_name, batch_size: int):
        default_dtype = tf.keras.backend.floatx()
        shapes_mem_count = 0
        internal_model_mem_count = 0

        if isinstance(layer_name, tf.keras.Model):
            internal_model_mem_count += self.keras_model_memory_usage_in_bytes(
                layer_name, batch_size=batch_size
            )
        single_layer_mem = tf.as_dtype(layer_name.dtype or default_dtype).size
        out_shape = layer_name.output_shape
        if isinstance(out_shape, list):
            out_shape = out_shape[0]
        for s in out_shape:
            if s is None:
                continue
            single_layer_mem *= s
        shapes_mem_count += single_layer_mem

        trainable_count = sum(
            [tf.keras.backend.count_params(p) for p in layer_name.trainable_weights]
        )
        non_trainable_count = sum(
            [tf.keras.backend.count_params(p) for p in layer_name.non_trainable_weights]
        )

        total_memory = (
                batch_size * shapes_mem_count
                + internal_model_mem_count
                + trainable_count
                + non_trainable_count
        )
        return total_memory

    def find_partition_memory(self, partition_points):
        part_mems = []
        #Each index represents the memory between that part pt and the next one
        for i in range(1, len(partition_points)):
            # Going backwards along layers within partition to find total memory usage
            start = self.layer_level[partition_points[i]]
            end = self.layer_level[partition_points[i - 1]]
            mem = 0
            for j in range(start, end, -1):
                for l in self.levels[j]:
                    layer_mem = self.keras_layer_memory(self.model.get_layer(l), 1)
                    mem += layer_mem
            part_mems.append(mem)
        # Nothing used after last partition pt, which is output layer
        part_mems.append(0)
        return part_mems

    # Returns transfer size of partition in Mbits
    def find_partition_transfer_size(self, partition_points) -> List[int]:
        transfer_sizes = []
        input_size = 1
        # Iterate through all elements of shape tuple except first one (which is batch size)
        for s in self.model.input.get_shape()[1:]:
            input_size *= s
        # Compression ratio is ~1.44 (according to https://www.researchgate.net/publication/264417607_Fixed-Rate_Compressed_Floating-Point_Arrays)
        zfp_comp_ratio = 1.44
        # input_size gives us number of bits, need to convert to bytes
        input_size_bytes = (input_size * 8) / zfp_comp_ratio
        # Assuming all elements are floats, each float uses 8 bytes
        input_size_mbits = (input_size_bytes * 8) / (1024 ** 2)

        # Put input size as first element of transfer size array
        transfer_sizes.append(input_size_mbits)

        for i in range(len(partition_points)):
            num_outbound = len(self.model.get_layer(partition_points[i]).outbound_nodes)

            # Iterate through all elements of shape tuple except first one (which is batch size)
            output_size = 1
            for s in self.model.get_layer(partition_points[i]).get_output_at(0).get_shape()[1:]:
                output_size *= s

            # Assuming all elements are floats, each float uses 8 bytes
            output_size_bytes = (output_size * 8) / zfp_comp_ratio
            output_size_mbits = (output_size_bytes * 8) / (1024 ** 2)
            # All outputs of the layer are the same size, the total size will be (output size * num_output_nodes)
            transfer_size = num_outbound * output_size_mbits
            transfer_sizes.append(transfer_size)

        return transfer_sizes

In [None]:
# model = ResNet50()
# p = Partitioner(model)
# #print(dir(model.get_layer("conv3_block3_out")))
# #print(model.get_layer("conv3_block3_out").output_shape)
# a = p._construct_model("conv3_block3_out", "avg_pool")
# keras.utils.plot_model(a, to_file = "partition.png")
# converter = tf.lite.TFLiteConverter.from_keras_model(a)
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.target_spec.supported_types = [tf.float16]
# tflite_model = converter.convert()
# print(a.summary())

In [4]:
def k_path_color_coding(graph: nx.Graph, k: int):
    # Creates speedup of algorithm
    a = 1.3
    for i in range(int(10*(math.e ** k))):
        rng = np.random.default_rng()
        coloring = rng.integers(1, (a*k)+1, len(graph.nodes()))
        j = 0
        for v in graph.nodes():
            graph.nodes()[v]["color"] = coloring[j]
            j += 1
        g = {}
        for v in graph.nodes():
            g[v] = {}
            g[v][frozenset([graph.nodes()[v]['color']])] = {}
            g[v][frozenset([graph.nodes()[v]['color']])]['hasPath'] = True
            g[v][frozenset([graph.nodes()[v]['color']])]['path'] = [v]
            for c in range(1, k+1):
                if c != graph.nodes()[v]['color']:
                    g[v][frozenset([c])] = {}
                    g[v][frozenset([c])]['hasPath'] = False
        K = range(1, k+1)
        for s in range(1, k):
            possible_S = list(combinations(K, s))
            for u in graph.nodes():
                for v in nx.neighbors(graph, u):
                    for S in possible_S:
                        Sset = frozenset(S)
                        if Sset in g[u] and g[u][Sset]['hasPath'] == True:
                            if graph.nodes()[v]['color'] not in Sset:
                                newSet = list(S).copy()
                                newSet.append(graph.nodes()[v]['color'])
                                g[v][frozenset(newSet)] = {}
                                g[v][frozenset(newSet)]['hasPath'] = True
                                newPath = g[u][frozenset(S)]['path'].copy()
                                newPath.append(v)
                                g[v][frozenset(newSet)]['path'] = newPath

        for u in graph.nodes():
            if frozenset(K) in g[u]:
                if g[u][frozenset(K)]['hasPath']:
                    return g[u][frozenset(K)]['path']

    return False

In [5]:
def modified_k_path_color_coding(graph: nx.Graph, k: int, start, end):
    # Creates speedup of algorithm
    a = 1.3
    for i in range(int(10*(math.e ** k))):
        rng = np.random.default_rng()
        coloring = rng.integers(1, (a*k)+1, len(graph.nodes()))
        j = 0
        for v in graph.nodes():
            graph.nodes()[v]["color"] = coloring[j]
            j += 1
        g = {}
        for v in graph.nodes():
            g[v] = {}
            g[v][frozenset([graph.nodes()[v]['color']])] = {}
            g[v][frozenset([graph.nodes()[v]['color']])]['hasPath'] = True
            g[v][frozenset([graph.nodes()[v]['color']])]['path'] = [v]
            for c in range(1, k+1):
                if c != graph.nodes()[v]['color']:
                    g[v][frozenset([c])] = {}
                    g[v][frozenset([c])]['hasPath'] = False
        K = range(1, k+1)
        for s in range(1, k):
            possible_S = list(combinations(K, s))
            for u in graph.nodes():
                if start is not None and s == 1 and u != start:
                    continue
                for v in nx.neighbors(graph, u):
                    if v is not None and v == end and s != k-1:
                        continue
                    for S in possible_S:
                        Sset = frozenset(S)
                        if Sset in g[u] and g[u][Sset]['hasPath'] == True:
                            if graph.nodes[v]['color'] not in Sset:
                                newSet = list(S).copy()
                                newSet.append(graph.nodes()[v]['color'])
                                g[v][frozenset(newSet)] = {}
                                g[v][frozenset(newSet)]['hasPath'] = True
                                newPath = g[u][frozenset(S)]['path'].copy()
                                newPath.append(v)
                                g[v][frozenset(newSet)]['path'] = newPath

        if end is not None:
            if frozenset(K) in g[end]:
                if g[end][frozenset(K)]['hasPath']:
                    return g[end][frozenset(K)]['path']

        else:
            for u in graph.nodes():
                if frozenset(K) in g[u]:
                    if g[u][frozenset(K)]['hasPath']:
                        return g[u][frozenset(K)]['path']

    return False

In [6]:
def threshold(X: int, edges, classes: Dict[Tuple, int], t: int):
    for e in edges:
        name = e[2]['name']
        if e[2]['weight'] < t:
            classes[name] = X-1
        else:
            classes[name] = X

In [7]:
def subgraph_k_path(G: nx.Graph, X: int, k: int):
    # For the binary search we want the edge in reverse order
    edge_list = sorted(G.edges(data=True), key=lambda x: x[2]['weight'], reverse=True)

    low = 0
    high = len(edge_list)
    classes = {}
    best_path = []
    while low < high:
        median = (low+high) // 2
        med_weight = edge_list[median][2]['weight']
        threshold(X, edge_list, classes, med_weight)
        x_edges = [(e[0], e[1]) for e in edge_list if classes[e[2]['name']] == X]
        G_x = G.edge_subgraph(x_edges).copy()
        result = k_path_color_coding(G_x, k)
        if not result:
            low = median + 1
        else:
            high = median
            best_path = result

    G.remove_nodes_from(best_path)
    return best_path

In [8]:
def modified_subgraph_k_path(G: nx.Graph, X: int, k: int, s, u):
    # For the binary search we want the edge in reverse order
    edge_list = sorted(G.edges(data=True), key=lambda x: x[2]['weight'], reverse=True)

    low = 0
    high = len(edge_list)
    classes = {}
    best_path = []
    while low < high:
        median = (low+high) // 2
        med_weight = edge_list[median][2]['weight']
        threshold(X, edge_list, classes, med_weight)
        x_edges = [(e[0], e[1]) for e in edge_list if classes[e[2]['name']] == X]
        G_x = G.edge_subgraph(x_edges).copy()
        if s is not None and s not in G_x:
            low = median + 1
            continue
        if u is not None and u not in G_x:
            low = median + 1
            continue
        result = modified_k_path_color_coding(G_x, k, s, u)
        if result == False or len(result) == 0:
            low = median + 1
        else:
            high = median
            best_path = result

    G.remove_nodes_from(best_path)
    return best_path

In [9]:
def find_subarrays(S, X):
    x = np.array(S)
    a = x == X
    inds = [i for i in range(len(S))]
    splits = np.split(inds, np.where(np.diff(a)!=0)[0]+1)
    subs = [s for s in splits if S[s[0]] == X]
    return subs

In [10]:
def classify(transfer_sizes: List[int], chosen_sizes: List[int], num_bins):
    bins = np.histogram_bin_edges(transfer_sizes, bins=num_bins)
    # Returns the class that each transfer size belongs to
    classes = np.digitize(chosen_sizes, bins)
    return classes

In [11]:
def create_partition_graph(node_capacity: int, partitions: List[str], transfer_sizes, partition_mems):
    partitions_dag = nx.DiGraph()
    for i in range(len(partitions)):
        for j in range(i+1, len(partitions)+1):
            mem = sum(partition_mems[i:j-1])
            # Partition has to fit into node
            if mem < node_capacity:
                node_name = f"{i}-{j}"
                # End layer of partition is exclusive
                partitions_dag.add_node(node_name, partition=(i, j))

    for n1 in partitions_dag.nodes(data=True):
        for n2 in partitions_dag.nodes(data=True):
            n1_name = n1[0]
            n2_name = n2[0]
            uEnd = n1[1]['partition'][1]
            vStart = n2[1]['partition'][0]
            if uEnd == vStart:
                w = transfer_sizes[uEnd]
                partitions_dag.add_edge(n1_name, n2_name, weight=w)
    return partitions_dag, transfer_sizes

path_from = {}
def min_cost_path(G, v):
    # Node is leaf node
    if len(G[v]) == 0:
        return [v], 0

    # Not actually the last layer, its the layer after the last
    partition_last_layer = G.nodes()[v]['partition'][1]
    if partition_last_layer not in path_from:
        min_path = []
        min_cost = math.inf
        for c in G[v]:
            path, cost = min_cost_path(G, c)
            if cost < min_cost:
                min_cost = cost
                min_path = path

        path_from[partition_last_layer] = (min_path, min_cost)

    min_path, min_cost = path_from[partition_last_layer]

    # The child that resulted in the min cost path
    chosen_node = min_path[0]
    # Path starting at v and going to a leaf
    new_path = [v]
    new_path.extend(min_path)
    new_cost = G[v][chosen_node]['weight'] + min_cost
    return new_path, new_cost

def partition(G, transfer_sizes: List, num_nodes: int, num_bins: int):
    roots = []
    for n in G.nodes():
        if G.in_degree(n) == 0:
            roots.append(n)

    min_path = []
    min_cost = math.inf
    for r in roots:
        path, cost = min_cost_path(G, r)
        if len(path) > num_nodes:
            continue
        if cost < min_cost:
            min_cost = cost
            min_path = path

    # The dispatcher "partition" transfer size always has to be the first
    chosen_transfer_sizes = [transfer_sizes[0]]
    for p in range(len(min_path)-1):
        ts = G[min_path[p]][min_path[p+1]]['weight']
        chosen_transfer_sizes.append(ts)

    # The dispatcher "partition" always has to be the first
    chosen_partitions = [(0, 0)]
    for m in min_path:
        chosen_partitions.append(G.nodes()[m]['partition'])

    transfer_size_classes = classify(transfer_sizes, chosen_transfer_sizes, num_bins)
    return chosen_partitions, transfer_size_classes, chosen_transfer_sizes

In [12]:
def partition_and_place(num_nodes: int, node_capacity: int, comm_graph: nx.Graph, num_classes, partitions, transfers, partition_mems):
    G_p, transfer_sizes = create_partition_graph(node_capacity, partitions, transfers, partition_mems)
    Q, S, transfer_size_weights = partition(G_p, transfer_sizes, num_nodes, num_classes)
    # Q only has the dispatcher
    if len(Q) == 1:
        raise MemoryError("Can't partition with specified number of nodes and capacity")
    # Q only has the dispatcher and one partition
    if len(Q) == 2:
        raise NotImplementedError("Only one partition necessary")

    G_c = comm_graph.copy()
    N = k_path_matching(G_c, Q, S, num_classes)
    # Rare case, usually if there's too many bandwidth classes
    if None in N:
        raise NotImplementedError("Couldn't find matching")
    return Q, N

In [13]:
def k_path_matching(g: nx.Graph, Q: List[str], S: List[int], C: int):
    original_graph = g.copy()
    N = [None] * len(Q)
    for X in range(C, 0, -1):
        x_paths = find_subarrays(S, X)
        x_paths = sorted(x_paths, key=lambda x: len(x), reverse=True)
        for j in range(len(x_paths)):
            start_idx = x_paths[j][0]
            end_idx = start_idx + len(x_paths[j])
            start_v = N[start_idx]
            end_v = N[end_idx]
            if start_v is not None and start_v not in g:
                nodes_to_add = list(g.nodes())
                nodes_to_add.append(start_v)
                g = original_graph.subgraph(nodes_to_add).copy()
            if end_v is not None and end_v not in g:
                nodes_to_add = list(g.nodes())
                nodes_to_add.append(end_v)
                g = original_graph.subgraph(nodes_to_add).copy()

            path = modified_subgraph_k_path(g, X, len(x_paths[j]) + 1, start_v, end_v)
            N[start_idx:start_idx+len(path)] = path

    return N

In [14]:
# Node capacities in MB
caps = [64, 128, 256, 512]
# Number of nodes
node_nums = [5, 10, 15, 20, 50]
#node_nums = [50]
# Num classes: [2, 5, 8, 11, 14, 17, 20]
class_nums = [i for i in range(2, 21, 3)]

def distance_to_bandwidth(d):
    # Network with average bandwidth = 6.5 Mbps
    a = 283230
    return math.log2(1 + a / (d ** 2))

def get_bottleneck(transfer_sizes, G_c, arrangement):
    bottleneck = 0
    for t in range(len(transfer_sizes)):
        latency = transfer_sizes[t] / G_c[arrangement[t]][arrangement[t+1]]['weight']
        if latency > bottleneck:
            bottleneck = latency

    return bottleneck

def generate_comm_graph(num_nodes: int):
    rng = np.random.default_rng()
    # Set of arrays of len 2
    node_pos = (rng.random((num_nodes, 2)) * 149) + 1
    comm_graph = nx.complete_graph(num_nodes)
    nodes_list = list(comm_graph.nodes())
    for n in range(len(nodes_list)):
        comm_graph.nodes()[nodes_list[n]]['pos'] = node_pos[n]
    for j in comm_graph.edges():
        u = j[0]
        v = j[1]
        dist = scipy.spatial.distance.euclidean(comm_graph.nodes[u]["pos"], comm_graph.nodes[v]["pos"])
        w = distance_to_bandwidth(dist)
        comm_graph[u][v]["weight"] = w
        comm_graph[u][v]['name'] = f"{u}-{v}"

    return comm_graph

def test_graph_configs(model, model_name):
    partitioner = Partitioner(model)
    partitions = partitioner.find_partitions()
    transfers = partitioner.find_partition_transfer_size(partitions)

    partition_mems = partitioner.find_partition_memory(partitions)

    all_data = {}
    # Average of many trials for accuracy
    num_trials = 50
    for i in range(num_trials):
        print(f"Trial #{i+1}")
        for num_nodes in node_nums:
            for c in caps:
                # Convert to MB
                cap = c * (1024 ** 2)
                for num_classes in class_nums:
                    try:
                        #print(f"{model_name}-{c}-{num_nodes}-{num_classes}")
                        comm_graph = generate_comm_graph(num_nodes)
                        transfer_sizes, arrangement = partition_and_place(num_nodes, cap, comm_graph, num_classes, partitions, transfers, partition_mems)
                        bottleneck = get_bottleneck(transfer_sizes, comm_graph, arrangement)
                    except NotImplementedError as e:
                        bottleneck = 0
                    except MemoryError as e:
                        bottleneck = math.inf

                    key = f"{model_name}-{c}-{num_nodes}-{num_classes}"
                    if i == 0:
                        old_avg = 0
                    else:
                        old_avg = all_data[key]
                    new_avg = old_avg + ((bottleneck - old_avg)/(i+1))
                    all_data[key] = new_avg
    return all_data

In [15]:
# The models we're using for the test
#model_names = ['ResNet50', 'InceptionResNetV2', 'EfficientNetB1', 'MobileNetV2']

In [16]:
# model =
# model_name =
# data = test_graph_configs(model, model_name)
# for k in data:
#     cols = k.split("-")
#     key_fmt = "\t".join(cols)
#     val = data[k]
#     result = f"{key_fmt}\t{val}"
#     print(result)

In [17]:
def check_optimality(g: nx.Graph, N: List[int], T: List[float]):
    # Assumes that this link becomes the bottleneck (could end up with a case of a small transfer size
    # w/ rly small bandwidth that has higher latency than this edge)
    max_edge = max(g.edges(data=True), key=lambda x: x[2]['weight'])
    t = T.index(max(T))
    if (N[t], N[t+1]) == (max_edge[0], max_edge[1]):
        bottleneck = get_bottleneck(T, g, N)
        min_bottleneck = T[t] / g[N[t]][N[t+1]]['weight']
        if bottleneck == min_bottleneck:
            return True

    return False

In [18]:
# Compare to optimality of joint optimization?
# num_trials = 1000
# graph_size = 50
# model = MobileNetV2()
# capacity = 64 * (1024 ** 2)
# bw_classes = 20
#
# partitioner = Partitioner(model)
# partitions = partitioner.find_partitions()
# transfers = partitioner.find_partition_transfer_size(partitions)[0]
#
# partition_mems = partitioner.find_partition_memory(partitions)
# num_true = 0
# for i in range(num_trials):
#     if i % 100 == 0:
#         print(f"Trial {i}")
#     comm_graph = generate_comm_graph(graph_size)
#     arrangement, transfer_sizes = partition_and_place(model, graph_size, capacity, comm_graph, bw_classes, partitions, transfers, partition_mems)
#     if check_optimality(comm_graph, arrangement, transfer_sizes):
#         num_true += 1
#
# print(f"{num_true} out of {num_trials}")

In [19]:
graph_size = 50
test_model = ResNet50()
capacity = 64 * (1024 ** 2)
bw_classes = 20

comm_graph = generate_comm_graph(graph_size)
partitioner = Partitioner(test_model)
# First node will be dispatcher node
nodes, partitioned_models = partitioner.construct_models(test_model, graph_size, bw_classes, capacity, comm_graph)
print(nodes)

[(0, 0), (0, 7), (7, 16), (16, 34), (34, 41)]
Partition 0: (input_1, pool1_pool)
Partition constructed
Partition 1: (pool1_pool, conv3_block2_add)
Partition constructed
Partition 2: (conv3_block2_add, conv5_block1_add)
Partition constructed
Partition 3: (conv5_block1_add, predictions)
Partition constructed
[10, 28, 42, 5, 9]


In [20]:
from keras.applications.resnet import preprocess_input, decode_predictions

input_shape = (224, 224, 3)
# Random pixels, just to get a sample output
x = np.random.random_sample(input_shape).astype(np.float32)
dims_exp = np.expand_dims(x, axis=0)
arr = preprocess_input(dims_exp)

pred = test_model.predict(arr)
print("Single device inference: ", decode_predictions(pred, top=1)[0])

out = arr
for i in range(len(partitioned_models)):
    print(f"Inferencing with part {i}")
    part = partitioned_models[i]
    #part.summary()
    out = part.predict(out)

print("Distributed inference: ", decode_predictions(out, top=1)[0])

2023-02-08 16:13:52.097193: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Single device inference:  [('n06359193', 'web_site', 0.067092866)]
Inferencing with part 0
Inferencing with part 1
Inferencing with part 2
Inferencing with part 3
Distributed inference:  [('n06359193', 'web_site', 0.067092866)]
