# Google Hashcode 2019 Final Round: Compiling Google

In [1]:
import numpy as np
import io
import os
import pathlib
import networkx as nx
import matplotlib.pyplot as plt

In [2]:
config InlineBackend.figure_format = 'retina'

## Utility Functions

In [3]:
def open_input(filename):
    # read file
    path=str(pathlib.Path().absolute())+"/dataset/"+filename
    input_file = io.open(path, mode='r')
    lines = input_file.readlines() #lista di righe del file sorgente
    input_file.close()

    # parsing first line
    [num_file,num_target,num_server] = np.fromstring(lines[0], dtype=int, sep=' ')

    # conversion of read lines from list of string to list of lists 
    lines = [i.strip("[]\n").split(" ") for i in lines]

    # blank dicts
    time = {}
    dep = {}
    target = {}

    
    # build time & dep dicts
    row=1

    while row <= num_file*2:
        time.update({lines[row][0] : [lines[row][1], lines[row][2]]})
        row_dep=row+1

        if lines[row_dep][0]== "0":
            dep.update({lines[row][0]: []})
        else:
            num_dep=int(lines[row_dep][0])
            dep.update({lines[row][0]: lines[row_dep][-num_dep:]})

        row+=2

    # build target dict     
    row=1+num_file*2

    while row <= num_target+(num_file*2):
        target.update({lines[row][0] : [lines[row][1], lines[row][2]]})
        row+=1;

    # conversion dicts lists from string to int
    time = dict((k,list(map(int,v))) for k,v in time.items())
    target = dict((k,list(map(int,v))) for k,v in target.items())

    # generate file list
    file=list(time.keys())

    return num_file, num_target, num_server, file, time, dep, target

In [4]:
#creates the matrix of execution sequences
def create_sequence(num_server, target, dep): 
    
    # INPUT
    # num_serv: number of servers
    # target: dictionary of target's files
    # dep: dictionary of file's dependencies
    
    # OUTPUT
    # matrix: matrix of execution sequences
    
    
    # number of files target
    num_target = len(target)
    # list of target files
    target_keys = list(target.keys())
    # declaration of emtpy matrix
    matrix = []
    
   
    matrix_dim = min(num_server,num_target)
    # insert a target file for each matrix' row 
    for i in range(matrix_dim):
        row = []
        row.append(target_keys[i])
        matrix.append(row)
    
    # declaration of empty list of nodes. Contains nodes that must be inserted in a matrix' row
    nodes = []
    # declaration of empty list of files. Contains files that have already been inserted in matrix
    file_list = []
    
    # continues filling the matrix with dependencies of file targets
    for i in range(matrix_dim):
        nodes.append(matrix[i][0])
        
        while len(nodes) != 0:
            dep_list = dep[nodes[0]]
            
            for f in dep_list:
                # check whether the dependency file is already being compiled on a server or not
                if not(f in file_list):
                    matrix[i].insert(0,f)
                    file_list.append(f)
                    nodes.append(f)
                    
            nodes.pop(0)  
        
    return matrix

In [5]:
# creates list of target files that do not generates dependencies
def find_target(target, dep):
    no_dep_tar = []
    
    for key_tar in target:
        no_dep = True
        
        for key_dep in dep:
            if key_tar in dep[key_dep]:
                no_dep = False
                break
                
        if no_dep:
            no_dep_tar.append(key_tar)
    return no_dep_tar

In [6]:
def build_graph(time, dep, sequence):
    # Define the directed graph of the files
    graph = nx.DiGraph()

    # Add the nodes of the graph
    for i,s in enumerate(sequence):
        graph.add_nodes_from(s, server=i+1)

    # Add the edges of the graph
    for s in sequence:    
        # add an edge between files on the same server
        for i in range(len(s)-1):
            w = time[s[i]][0]
            graph.add_edge(s[i], s[i+1], weight=w, n_weight=-w, color="black")
        # add dipendencies edges for each file on a server
        for i in range(len(s)):       
            dep_files = dep[s[i]]
            # check that the files has dipendencies
            if dep_files: 
                for f in dep_files:
                    # check that the file is on an other server
                    if f not in s:
                        w = time[f][0]+time[f][1]
                        graph.add_edge(f, s[i], weight=w, n_weight=-w, color="black")

    # Add two fake nodes
    graph.add_node("s", server=-1)
    graph.add_node("t", server=-1)

    for node in list(graph.nodes):
        if (node!="s" and node!="t"):
            # if node doesn't have in-edges, add edge s -> node
            if not graph.in_edges(node):
                graph.add_edge("s", node, weight=0, n_weight=0, color="black")
            # if node doesn't have out-edges, add edge node -> t
            elif not graph.out_edges(node):
                w = time[node][0]
                graph.add_edge(node, "t", weight=w, n_weight=-w, color="black")    
    
    return graph;

In [7]:
def get_longest_path(graph):
    # Get the longest path using negative weights and Bellman-Ford algorithm
    distance, path = nx.algorithms.shortest_paths.weighted.single_source_bellman_ford(graph, "s", weight="n_weight")
    # change the color of the edge on the path
    for key,value in path.items():
        if len(value)>1:
            for i in range(len(value)-1):
                graph[value[i]][value[i+1]]["color"] = "red"
    
    return graph, distance, path;

In [8]:
def draw_graph(graph):
    # Draw the given graph
    fig = plt.figure(figsize=(10,6))
    ax = plt.axes()
    # graph layout
    pos = nx.planar_layout(graph)
    # draw nodes
    cmap = plt.get_cmap('Pastel1')
    # https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html <----- visit here for others color maps
    colors = [cmap(i) for i in np.linspace(0, 1, len(sequence)+1)]
    for i,server in enumerate(sequence):
        nx.draw_networkx_nodes(graph, pos, ax=ax, nodelist=server, node_color=[colors[i]], alpha=1, edgecolors="black")
    nx.draw_networkx_nodes(graph, pos, ax=ax, nodelist=["s","t"], node_color=[colors[len(sequence)]], alpha=1, edgecolors="black")
    nx.draw_networkx_labels(graph, pos, ax=ax);
    # draw edges
    color = nx.get_edge_attributes(graph,'color')
    nx.draw_networkx_edges(graph, pos, ax=ax, width=1.0, alpha=0.8, edge_color=color.values())
    edge_labels = nx.get_edge_attributes(graph,'weight')
    nx.draw_networkx_edge_labels(graph, pos, ax=ax, edge_labels=edge_labels, label_pos=0.5)

    plt.show()
    
    return graph

In [9]:
def get_score(distance, target, time):
    # Compute the score of the solution
    score = 0
    for file in target:
        # check if the file target is in the given solution
        if file in distance:
            d = target[file][0]
            x = -distance[file]+time[file][0]
            # check if the file is compiled before the deadline
            if x<=d:
                g = target[file][1]
                score = score + d-x + g
        
    return score;

In [10]:
def write_file(distance, graph):
    
    # INPUT
    # distance: dictionary of distances for current execution sequence
    # graph: graph of current execution sequence
    
    # OUTPUT
    # creates/overwrites file in path "./output/submission_file.txt"
    

    # convert distances in positive values
    distance = dict((k,abs(v)) for k,v in distance.items())
    #convert distance dictionary to list
    distance_list = sorted(distance.items(), key=lambda x: x[1])
    
    # checking wheater output's folder exists
    path_dir=str(pathlib.Path().absolute())+ "/output"
    if not os.path.exists(path_dir):
        os.mkdir(path_dir)  
    
    #opening file
    path = path_dir + '/submission_file.txt'
    file_object = open(path, 'w')
    
    # writing file
    file_object.write(str(len(distance_list)-2)+ '\n')
    print(str(len(distance_list)-2))
    
    for i in distance_list:
        if(i[0] != 's' and i[0] != 't'):
            file = i[0]
            time = i[1]
            server = graph.nodes[file]['server']
            file_object.write(file + " " + str(server) + '\n')
            print(file + " " + str(time) + " " + str(server))

    file_object.close()

In [11]:
def build_dep_graph(dep, target):
    
    # Define the directed dependencies graph
    dep_graph = nx.DiGraph()
    
    # Add the nodes of the graph
    dep_graph.add_nodes_from(list(dep.keys()), target=False, priority=0)
    
    # Add the edges of the graph
    for file, dep_list in dep.items():
        for d in dep_list:
            dep_graph.add_edge(d, file)
    
    # Set target attribute
    for t in target:
        dep_graph.nodes[t]['target'] = True
        
#         pred = my_ancestors(dep_graph, t) 
        dep_graph.nodes[t]['priority'] = target[t][1] / target[t][0] 
                
    return dep_graph

In [12]:
def draw_dep_graph(dep_graph):
    
    # Draw the dependencies graph
    fig = plt.figure(figsize=(10,6))
    ax = plt.axes()
    # graph layout
    pos = nx.planar_layout(dep_graph)
    # draw nodes
    cmap = plt.get_cmap('Pastel1')
    nx.draw_networkx_nodes(dep_graph, pos, ax=ax, nodelist=[n for n in dep_graph if n not in target], node_color=[cmap(1)], alpha=1, edgecolors="black")
    nx.draw_networkx_nodes(dep_graph, pos, ax=ax, nodelist=[n for n in dep_graph if n in target], node_color=[cmap(0)], alpha=1, edgecolors="black")
    nx.draw_networkx_labels(dep_graph, pos, ax=ax);
    # draw edges
    nx.draw_networkx_edges(dep_graph, pos, ax=ax, width=1.0, alpha=0.8, edge_color="black")

    plt.show()

In [13]:
def priority_mapping(node):
    return -dep_graph.nodes[node]['priority']

In [14]:
def update_priority(node, temp_graph):
    pred = my_ancestors(temp_graph, node)
    temp_graph.nodes[node]['priority'] = target[node][1] - target[node][0] - (len(pred)-1)

In [15]:
def place_files(pred, server_time, sequence):
    
#     e se sto fatto lo facciamo per tutti i predecessori ? 
#     e se non riusciamo a soddisfare una dedline rendiamo il file "normale" ? (attributo = False)
#         -> possibile inculata: lo sappiamo solo alla fine....
    
    min_ind = init = server_time.index(min(server_time)) 
    
    x 
    
    for i, file in enumerate(pred):
        if i == 0 or i == len(pred)-1:
            server_time[init] = server_time[init] + time[file][0]
            sequence[init].append(file)
        else:
            min_ind = server_time.index(min(server_time))
            if server_time[min_ind] + time[file][0] + time[file][1] <= server_time[init] + time[file][0]:
                server_time[min_ind] = server_time[min_ind] + time[file][0]
                sequence[min_ind].append(file)
            else:
                server_time[init] = server_time[init] + time[file][0]
                sequence[init].append(file)


In [16]:
def my_ancestors(G, source):
 
    if not G.has_node(source):
        raise nx.NetworkXError("The node %s is not in the graph." % source)
    anc = list(n for n, d in nx.shortest_path_length(G, target=source).items())
    anc.remove(source)
    return anc

In [17]:
def compile_greedy(dep_graph, sequence, server_time):
    
    temp_graph = dep_graph
    toggle = True
    
    while toggle:
#         for t in temp_graph:
#             if temp_graph.nodes[t]['target']:
#                 update_priority(t, temp_graph)
        
        sorted_files = list(nx.lexicographical_topological_sort(temp_graph, key = priority_mapping))
        
        toggle = False
        
        for f in sorted_files:
            if temp_graph.nodes[f]['target']:
                first_target = f
                toggle = True
                break
        
        if toggle:
            pred = my_ancestors(temp_graph, first_target)            
            pred.reverse()
            pred.append(first_target)
            
            for p in pred:
                file.remove(p)

            place_files(pred, server_time, sequence)
            temp_graph.remove_nodes_from([n for n in temp_graph if n in set(pred)])

# Main Algorithm

In [59]:
# num_file, num_target, num_server, file, time, dep, target = open_input("a_example.in")
# num_file, num_target, num_server, file, time, dep, target = open_input("b_narrow.in")
# num_file, num_target, num_server, file, time, dep, target = open_input("c_urgent.in")
# num_file, num_target, num_server, file, time, dep, target = open_input("d_typical.in")
# num_file, num_target, num_server, file, time, dep, target = open_input("e_intriguing.in")
num_file, num_target, num_server, file, time, dep, target = open_input("f_big.in")

In [60]:
dep_graph = build_dep_graph(dep, target)
# draw_dep_graph(dep_graph)

In [61]:
server_time=[0]*num_server
sequence = [[] for _ in range(num_server)]
compile_greedy(dep_graph, sequence, server_time)

In [62]:
graph = build_graph(time, dep, sequence)
# graph = draw_graph(graph)

In [63]:
graph, distance, path = get_longest_path(graph)
# graph = draw_graph(graph)

In [64]:
score = get_score(distance, target, time)
score

0

In [65]:
write_file(distance, graph)

9334
s699228 0 1
s544374 0 2
s1243400 0 3
s1485217 0 4
s1207961 0 5
s477990 0 6
s1440120 0 7
s798526 0 8
s1801971 0 9
s1100523 0 10
s505569 0 11
s1266365 0 12
s1922228 0 13
s441957 0 14
s1656674 0 15
s890051 0 16
s783147 0 17
s113071 0 18
s1215008 0 19
s1127074 0 20
s1104687 0 21
s967331 0 22
s1928314 0 23
s1455265 0 24
s48957 0 25
s596999 0 26
s87289 0 27
s1762157 0 28
s506592 0 29
s1024256 0 30
s351297 0 31
s76530 0 32
s1729679 0 33
s1368119 0 34
s1908465 0 35
s148315 0 36
s1078358 0 37
s528022 0 38
s1166159 0 39
s1898329 0 40
s1033746 0 41
s559126 0 42
s1205539 0 43
s1259435 0 44
s1751718 0 45
s1685068 0 46
s1508137 0 47
s1256177 0 48
s70913 0 49
s868545 0 50
s402274 0 51
s1199397 0 52
s1824654 0 53
s758024 0 54
s1136483 0 55
s1543145 0 56
s1626087 0 57
s455838 0 58
s261150 0 59
s1439030 0 60
s1757807 0 61
s145031 0 62
s1552995 0 63
s1290339 0 64
s756470 0 65
s820284 0 66
s259814 0 67
s232683 0 68
s1578569 0 69
s168353 0 70
s1006779 0 71
s776235 0 72
s1632035 0 73
s1170446 0 74
s776

s947432 17 31
s207103 17 32
s319545 17 33
s865560 17 34
s1366849 17 35
s1771219 17 36
s694620 17 37
s92643 17 38
s1718325 17 39
s1928809 17 40
s235079 17 41
s1885381 17 42
s814776 17 43
s1871998 17 44
s1746713 17 45
s1712731 17 46
s827894 17 47
s1016951 17 48
s619354 17 49
s1921823 17 50
s93572 17 51
s40743 17 52
s830078 17 53
s1610329 17 54
s1118487 17 55
s148827 17 56
s100988 17 57
s308228 17 58
s1404164 17 59
s1036409 17 60
s996529 17 61
s27938 17 62
s839911 17 63
s1995177 17 64
s201552 17 65
s267116 17 66
s902511 17 67
s627476 17 68
s836756 17 69
s915528 17 70
s1606723 17 71
s1333065 17 72
s1780789 17 73
s1136301 17 74
s527809 17 75
s66677 17 76
s1753485 17 77
s923448 17 78
s1734274 17 79
s374209 17 80
s1072555 17 86
s947309 17 88
s617776 17 89
s1499059 17 91
s1467954 17 92
s1691473 17 95
s109663 17 100
s499264 18 1
s566045 18 2
s1124271 18 3
s1818469 18 4
s1380616 18 5
s1853391 18 6
s1327819 18 7
s1179701 18 8
s1099910 18 9
s1426962 18 10
s1779379 18 11
s118537 18 12
s572113 18 13

s1773440 46 7
s1279664 46 8
s1037540 46 9
s1714142 46 10
s1614167 46 11
s178087 46 12
s843632 46 13
s1809609 46 14
s7705 46 15
s1246530 46 16
s1970137 46 17
s112052 46 18
s367080 46 19
s698721 46 20
s523682 46 21
s1814446 46 22
s598911 46 23
s1115919 46 24
s507521 46 25
s863814 46 26
s4858 46 27
s987523 46 28
s688937 46 29
s1583595 46 30
s51745 46 31
s1016900 46 32
s80747 46 33
s371851 46 34
s997895 46 36
s1768941 46 38
s602695 46 39
s603837 46 46
s1958807 47 1
s1840083 47 2
s1091696 47 3
s50532 47 4
s975556 47 5
s99871 47 6
s1085131 47 7
s1664303 47 8
s1258478 47 9
s1914919 47 10
s999303 47 11
s1385538 47 12
s634704 47 13
s1834355 47 14
s1053137 47 15
s284011 47 16
s1702031 47 17
s516653 47 18
s1005825 47 19
s1727760 47 20
s11444 47 21
s1831985 47 22
s1094887 47 23
s156569 47 24
s396679 47 25
s470812 47 26
s1638759 47 27
s1923017 47 28
s1094143 47 29
s1078666 47 30
s723003 47 31
s1832611 47 32
s19535 47 33
s1600462 47 34
s771176 47 36
s841317 47 38
s1509750 47 39
s114053 48 1
s712325 

c1867624 5038040 2
s1296206 5093894 70
c1043908 5095040 85
s224986 5132936 66
c622484 5155540 24
c116610 5155540 49
c863345 5209062 3
c159136 5212232 50
c694698 5212232 44
c1034726 5212232 33
c409985 5212232 10
c1735841 5212232 73
c574056 5221830 23
c41037 5221830 75
c39428 5221830 76
c1723358 5221830 26
c1365068 5221830 91
c368451 5221830 8
c511650 5221830 64
s103070 5248029 76
s386223 5248030 76
c1997373 5248031 76
s374503 5297643 88
c964067 5297644 88
s1431915 5300464 52
c1838563 5300465 52
c896606 5330162 40
c786036 5330162 66
c1829255 5330162 42
s1698763 5391204 57
c1946425 5391205 57
c1189096 5405615 73
c1794871 5432027 99
c1868090 5432027 45
c1402663 5432027 53
c1858610 5432027 82
c370784 5432027 19
c243410 5449838 78
c1985198 5452007 76
c285529 5563571 83
c175810 5582238 61
c1278272 5582238 59
c431417 5648747 51
c1306060 5650034 23
s247528 5666680 91
c284363 5666681 91
c1040944 5726294 64
c406729 5726294 77
c156462 5726294 31
c288126 5726294 67
c1154625 5726294 43
c1975886 5726

c856447 23581800 52
c1863377 23581800 29
c1927492 23581800 27
c1766644 23590438 55
c1938260 23591941 79
c1979100 23601495 77
c1798944 23601606 15
c1016917 23608825 59
c994841 23649124 79
c16777 23659746 87
c1775454 23663799 23
c397500 23669275 53
c983615 23675553 88
c1889058 23689186 28
c28809 23718821 63
s104370 23746828 75
c1602022 23746829 75
c1568312 24116433 76
c1165594 24116433 1
c413169 24116433 36
c919454 24116433 64
c1541496 24116433 72
c1118344 24116433 83
c1800194 24116433 54
c32231 24116433 32
c1464796 24116433 92
c612690 24116433 67
c1467562 24116433 39
c166111 24116433 78
c648828 24116433 94
c72810 24116433 7
c708602 24116433 80
c280084 24116433 29
c643488 24116433 75
c244097 24116433 98
c453388 24116433 12
c686564 24116433 58
c1797616 24116433 70
c1937468 24116433 97
c1108901 24116433 33
c837569 24116433 17
c1566369 24116433 4
c1660679 24116433 96
c973316 24116433 37
c1035886 24116433 82
c1072357 24116433 35
c105638 24116433 61
c748724 24116433 34
c1822796 24116433 30
c1

c35115 38008390 20
c701902 38008390 60
c1830056 38008390 28
c401776 38008390 61
c1268873 38017997 69
c1352941 38022412 43
c1824264 38028482 83
c1639845 38035039 37
c711034 38059468 87
c622991 38060326 58
c1189589 38138462 96
c1727122 38144514 67
c955614 38146763 42
c385949 38150169 24
c1546404 38186107 96
c1339626 38256409 52
c671600 38299024 75
c116724 38308758 83
c796354 38318635 96
c1425727 38324324 96
s1939167 38370119 73
c130331 38370120 73
c1525688 38373207 61
c697053 38563865 19
c579910 38563865 48
c850483 38563865 75
c183546 38563865 95
c1383204 38563865 29
c1414445 38563865 56
c520277 38563865 99
c1704619 38563865 46
c374354 38563865 93
c978124 38563865 57
c763235 38563865 66
c1769857 38563865 91
c817292 38563865 10
c1489338 38563865 79
c1512071 38563865 27
c1342979 38563865 41
c694285 38563865 62
c1719733 38563865 20
c766663 38563865 14
c539518 38563865 22
c439567 38563865 30
c150397 38563865 85
c791791 38563865 38
c1861813 38563865 8
c273271 38563865 33
c814108 38563865 54
c

# DOUBTS
* Ci sono dei file non target che non servono a nessun file target:


In [25]:
file

[]