In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
import sys
sys.path.append("..")  # Add parent directory to the system path
import config

COLAB=False

if COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    curr_path = config.drive_path
else:
    curr_path = os.getcwd()
    
data_path = os.path.join(curr_path, "../", config.data_path)
if not os.path.exists(data_path):
    os.makedirs(data_path)

proc_data_path = os.path.join(curr_path, "../", config.processed_data_path)
if not os.path.exists(proc_data_path):
    os.makedirs(proc_data_path)

application_data_file = os.path.join(data_path, config.application_data_filename)
router_data_file = os.path.join(data_path, config.router_data_filename)


In [12]:
import re
from io import StringIO

lines = []

with open(application_data_file, 'r') as file:
    i = 0
    for line in file:
        if not line[1:].startswith('MARK_'):
            continue

        if line.find("======== CHANGE"):
          line = line[:line.find("======== CHANGE")]

        lines.append(line)

        i += 1

data_string = '\n'.join(lines)

application_data = pd.read_csv(StringIO(data_string), sep=' ', header=None)
for i in range(0, 11, 2):
    application_data = application_data.drop(i, axis=1)
application_data = application_data.drop(5, axis=1)

application_data.columns = ['iteration', 'node', 'rank', 'time']
application_data['iteration'] = application_data['iteration'].apply(lambda x: re.findall('(\d+)', x)[0])

application_data['iteration'] = application_data['iteration'].astype(int)
application_data['node'] = application_data['node'].astype(int)
application_data['rank'] = application_data['rank'].astype(int)
application_data['time'] = application_data['time'].astype(float)

application_data

Unnamed: 0,iteration,node,rank,time
0,0,39,10,1.665091e+06
1,0,68,15,1.665421e+06
2,0,62,23,1.665421e+06
3,0,37,18,1.665726e+06
4,0,29,11,1.665733e+06
...,...,...,...,...
271995,3999,48,7,1.224125e+10
271996,3999,52,11,1.224125e+10
271997,3999,27,31,1.224125e+10
271998,3999,67,14,1.224125e+10


In [4]:
iterations = []
application_nodes = application_data['node'].unique()
for node in application_nodes:
    d = application_data[application_data['node'] == node]['iteration'].max()
    if d not in iterations:
        iterations.append(d)

iterations

[3999]

In [5]:
nodes_milc = application_nodes[0:36]
nodes_lammps = application_nodes[36:]
print(len(nodes_milc))
print(len(nodes_lammps))
assert sorted(application_nodes) == sorted(list(nodes_milc) + list(nodes_lammps))

36
32


In [6]:
print(sorted(nodes_lammps))
print(sorted(nodes_milc))

[2, 7, 9, 10, 11, 14, 16, 18, 21, 22, 23, 24, 25, 27, 28, 31, 33, 42, 45, 48, 49, 52, 53, 54, 55, 56, 57, 58, 60, 64, 67, 69]
[0, 1, 3, 4, 5, 8, 12, 15, 17, 19, 20, 26, 29, 30, 32, 34, 35, 37, 38, 39, 40, 41, 43, 44, 46, 47, 50, 51, 61, 62, 63, 65, 66, 68, 70, 71]


In [7]:
active_nodes = nodes_milc

In [10]:
# questa linea serve per milc+lamps
application_data = application_data[application_data['node'].isin(active_nodes)]
max_iteration = 3999
application_data = application_data[application_data['iteration'] <= max_iteration]
application_data

Unnamed: 0,iteration,node,rank,time
72,0,22,3,3.297528e+06
73,0,23,19,3.297865e+06
74,0,21,0,3.297867e+06
75,0,42,4,3.298159e+06
76,0,48,7,3.298478e+06
...,...,...,...,...
271995,3999,48,7,1.224125e+10
271996,3999,52,11,1.224125e+10
271997,3999,27,31,1.224125e+10
271998,3999,67,14,1.224125e+10


In [18]:
max_milc = application_data[application_data['node'].isin(nodes_milc)]['time'].max()
application_data[(application_data['node'].isin(nodes_lammps)) & (application_data['time'] > max_milc)]

Unnamed: 0,iteration,node,rank,time
208160,2005,64,1,6.570684e+09
208161,2005,56,17,6.570685e+09
208162,2005,60,5,6.570686e+09
208163,2005,58,21,6.570686e+09
208164,2005,54,13,6.570687e+09
...,...,...,...,...
271995,3999,48,7,1.224125e+10
271996,3999,52,11,1.224125e+10
271997,3999,27,31,1.224125e+10
271998,3999,67,14,1.224125e+10


In [10]:
router_data = pd.read_csv(router_data_file, sep=' ')
router_data['vc-occupancy'].str.split(':', expand=True)

# Split the vc-occupancy, downstream-credits
router_data[['vc-occupancy-0', 'vc-occupancy-1', 'vc-occupancy-2', 'vc-occupancy-3']] = router_data['vc-occupancy'].str.split(':', expand=True)

router_data[['downstream-credits-0', 'downstream-credits-1', 'downstream-credits-2', 'downstream-credits-3']] = router_data['downstream-credits'].str.split(':', expand=True)

router_data.drop(columns=['Unnamed: 0', 'vc-occupancy', 'downstream-credits'], inplace=True)

router_data = router_data.astype({
    'router-id': 'int',
    'time-stamp': 'int',
    'port-id': 'int',
    'qos-level': 'int',
    'bw-consumed': 'float',
    'qos-status': 'int',
    'qos-data': 'int',
    'busy-time': 'float',
    'qos-green-total': 'int',
    'qos-green-sent': 'int',
    'qos-yellow-total': 'int',
    'qos-yellow-sent': 'int',
    'qos-red-total': 'int',
    'qos-red-sent': 'int',
    'vc-occupancy-0': 'int',
    'vc-occupancy-1': 'int',
    'vc-occupancy-2': 'int',
    'vc-occupancy-3': 'int',
    'downstream-credits-0': 'int',
    'downstream-credits-1': 'int',
    'downstream-credits-2': 'int',
    'downstream-credits-3': 'int',
})

router_data

Unnamed: 0,router-id,time-stamp,port-id,qos-level,bw-consumed,qos-status,qos-data,busy-time,qos-green-total,qos-green-sent,...,qos-red-total,qos-red-sent,vc-occupancy-0,vc-occupancy-1,vc-occupancy-2,vc-occupancy-3,downstream-credits-0,downstream-credits-1,downstream-credits-2,downstream-credits-3
0,22,250000,0,0,91.030129,1,1282875,6573.097557,98,0,...,0,146,0,0,0,0,4096,12288,12288,16384
1,22,250000,1,0,87.825386,1,1237711,5506.449145,118,0,...,0,113,0,8192,0,0,16384,8192,16384,12288
2,22,250000,2,0,91.343834,1,1287296,6400.633618,90,0,...,0,77,0,8192,4096,0,16384,4096,12288,16384
3,22,250000,3,0,79.994755,1,1127355,12533.295846,154,0,...,0,0,0,4096,0,0,16384,4096,16384,16384
4,22,250000,4,0,81.233255,1,1144809,5179.314219,160,0,...,0,0,0,0,0,0,16384,4096,12288,16384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12339175,31,12241250000,2,0,28.060377,1,395451,0.000000,105,0,...,0,0,0,0,0,0,16384,16384,16384,16384
12339176,31,12241250000,3,0,26.448993,1,372742,2279.486904,90,0,...,0,0,0,0,0,0,16384,16384,16384,16384
12339177,31,12241250000,4,0,35.414171,1,499087,0.000000,109,0,...,0,0,0,0,0,0,16384,16384,16384,16384
12339178,31,12241250000,5,0,0.000000,1,0,0.000000,0,0,...,0,0,0,0,0,0,32768,32768,32768,32768


In [11]:
# create merged df: aggretate network data for each of the 7 router ports and for each of the 2000 iterations

merged_df_path = proc_data_path + '/merged_df_allPorts_LAMMPS_4000.csv'
FORCE_DF_COMPUTATION = True

def get_data_to_add(router_data_filtered, iteration_time, application_iteration, router_id, router_port, compute_node):
  id_max_timestamp = router_data_filtered['time-stamp'].idxmax()

  data_to_add = {
        'iteration-duration': iteration_time,
        'iteration': application_iteration,
        'router-id': router_id,
        'port-id': router_port,
        'compute-node': compute_node,

        'bw-consumed-avg': router_data_filtered['bw-consumed'].mean(),
        'bw-consumed-q25': router_data_filtered['bw-consumed'].quantile(0.25),
        'bw-consumed-q75': router_data_filtered['bw-consumed'].quantile(0.75),
        'bw-consumed-min': router_data_filtered['bw-consumed'].min(),
        'bw-consumed-max': router_data_filtered['bw-consumed'].max(),

        'qos-data-avg': router_data_filtered['qos-data'].mean(),
        'qos-data-q25': router_data_filtered['qos-data'].quantile(0.25),
        'qos-data-q75': router_data_filtered['qos-data'].quantile(0.75),
        'qos-data-min': router_data_filtered['qos-data'].min(),
        'qos-data-max': router_data_filtered['qos-data'].max(),

        'busy-time-avg': router_data_filtered['busy-time'].mean(),
        'busy-time-q25': router_data_filtered['busy-time'].quantile(0.25),
        'busy-time-q75': router_data_filtered['busy-time'].quantile(0.75),
        'busy-time-min': router_data_filtered['busy-time'].min(),
        'busy-time-max': router_data_filtered['busy-time'].max(),

        'qos-green-total-avg': router_data_filtered['qos-green-total'].mean(),
        'qos-green-total-q25': router_data_filtered['qos-green-total'].quantile(0.25),
        'qos-green-total-q75': router_data_filtered['qos-green-total'].quantile(0.75),
        'qos-green-total-min': router_data_filtered['qos-green-total'].min(),
        'qos-green-total-max': router_data_filtered['qos-green-total'].max(),

        'qos-green-sent-avg': router_data_filtered['qos-green-sent'].mean(),
        'qos-green-sent-q25': router_data_filtered['qos-green-sent'].quantile(0.25),
        'qos-green-sent-q75': router_data_filtered['qos-green-sent'].quantile(0.75),
        'qos-green-sent-min': router_data_filtered['qos-green-sent'].min(),
        'qos-green-sent-max': router_data_filtered['qos-green-sent'].max(),

        'qos-yellow-total-avg': router_data_filtered['qos-yellow-total'].mean(),
        'qos-yellow-total-q25': router_data_filtered['qos-yellow-total'].quantile(0.25),
        'qos-yellow-total-q75': router_data_filtered['qos-yellow-total'].quantile(0.75),
        'qos-yellow-total-min': router_data_filtered['qos-yellow-total'].min(),
        'qos-yellow-total-max': router_data_filtered['qos-yellow-total'].max(),

        'qos-yellow-sent-avg': router_data_filtered['qos-yellow-sent'].mean(),
        'qos-yellow-sent-q25': router_data_filtered['qos-yellow-sent'].quantile(0.25),
        'qos-yellow-sent-q75': router_data_filtered['qos-yellow-sent'].quantile(0.75),
        'qos-yellow-sent-min': router_data_filtered['qos-yellow-sent'].min(),
        'qos-yellow-sent-max': router_data_filtered['qos-yellow-sent'].max(),

        'qos-red-total-avg': router_data_filtered['qos-red-total'].mean(),
        'qos-red-total-q25': router_data_filtered['qos-red-total'].quantile(0.25),
        'qos-red-total-q75': router_data_filtered['qos-red-total'].quantile(0.75),
        'qos-red-total-min': router_data_filtered['qos-red-total'].min(),
        'qos-red-total-max': router_data_filtered['qos-red-total'].max(),

        'qos-red-sent-avg': router_data_filtered['qos-red-sent'].mean(),
        'qos-red-sent-q25': router_data_filtered['qos-red-sent'].quantile(0.25),
        'qos-red-sent-q75': router_data_filtered['qos-red-sent'].quantile(0.75),
        'qos-red-sent-min': router_data_filtered['qos-red-sent'].min(),
        'qos-red-sent-max': router_data_filtered['qos-red-sent'].max(),

        'vc-occupancy-0-avg': router_data_filtered['vc-occupancy-0'].mean(),
        'vc-occupancy-1-avg': router_data_filtered['vc-occupancy-1'].mean(),
        'vc-occupancy-2-avg': router_data_filtered['vc-occupancy-2'].mean(),
        'vc-occupancy-3-avg': router_data_filtered['vc-occupancy-3'].mean(),
        'vc-occupancy-0-q25': router_data_filtered['vc-occupancy-0'].quantile(0.25),
        'vc-occupancy-1-q25': router_data_filtered['vc-occupancy-1'].quantile(0.25),
        'vc-occupancy-2-q25': router_data_filtered['vc-occupancy-2'].quantile(0.25),
        'vc-occupancy-3-q25': router_data_filtered['vc-occupancy-3'].quantile(0.25),
        'vc-occupancy-0-q75': router_data_filtered['vc-occupancy-0'].quantile(0.75),
        'vc-occupancy-1-q75': router_data_filtered['vc-occupancy-1'].quantile(0.75),
        'vc-occupancy-2-q75': router_data_filtered['vc-occupancy-2'].quantile(0.75),
        'vc-occupancy-3-q75': router_data_filtered['vc-occupancy-3'].quantile(0.75),
        'vc-occupancy-0-min': router_data_filtered['vc-occupancy-0'].min(),
        'vc-occupancy-1-min': router_data_filtered['vc-occupancy-1'].min(),
        'vc-occupancy-2-min': router_data_filtered['vc-occupancy-2'].min(),
        'vc-occupancy-3-min': router_data_filtered['vc-occupancy-3'].min(),
        'vc-occupancy-0-max': router_data_filtered['vc-occupancy-0'].max(),
        'vc-occupancy-1-max': router_data_filtered['vc-occupancy-1'].max(),
        'vc-occupancy-2-max': router_data_filtered['vc-occupancy-2'].max(),
        'vc-occupancy-3-max': router_data_filtered['vc-occupancy-3'].max(),

        'downstream-credits-0-avg': router_data_filtered['downstream-credits-0'].quantile(0.25),
        'downstream-credits-1-avg': router_data_filtered['downstream-credits-1'].mean(),
        'downstream-credits-2-avg': router_data_filtered['downstream-credits-2'].mean(),
        'downstream-credits-3-avg': router_data_filtered['downstream-credits-3'].mean(),
        'downstream-credits-0-q25': router_data_filtered['downstream-credits-0'].quantile(0.25),
        'downstream-credits-1-q25': router_data_filtered['downstream-credits-1'].quantile(0.25),
        'downstream-credits-2-q25': router_data_filtered['downstream-credits-2'].quantile(0.25),
        'downstream-credits-3-q25': router_data_filtered['downstream-credits-3'].quantile(0.25),
        'downstream-credits-0-q75': router_data_filtered['downstream-credits-0'].quantile(0.75),
        'downstream-credits-1-q75': router_data_filtered['downstream-credits-1'].quantile(0.75),
        'downstream-credits-2-q75': router_data_filtered['downstream-credits-2'].quantile(0.75),
        'downstream-credits-3-q75': router_data_filtered['downstream-credits-3'].quantile(0.75),
        'downstream-credits-0-min': router_data_filtered['downstream-credits-0'].min(),
        'downstream-credits-1-min': router_data_filtered['downstream-credits-1'].min(),
        'downstream-credits-2-min': router_data_filtered['downstream-credits-2'].min(),
        'downstream-credits-3-min': router_data_filtered['downstream-credits-3'].min(),
        'downstream-credits-0-max': router_data_filtered['downstream-credits-0'].max(),
        'downstream-credits-1-max': router_data_filtered['downstream-credits-1'].max(),
        'downstream-credits-2-max': router_data_filtered['downstream-credits-2'].max(),
        'downstream-credits-3-max': router_data_filtered['downstream-credits-3'].max(),

        'vc-occupancy-0-last': router_data_filtered.loc[id_max_timestamp, 'vc-occupancy-0'],
        'vc-occupancy-1-last': router_data_filtered.loc[id_max_timestamp, 'vc-occupancy-1'],
        'vc-occupancy-2-last': router_data_filtered.loc[id_max_timestamp, 'vc-occupancy-2'],
        'vc-occupancy-3-last': router_data_filtered.loc[id_max_timestamp, 'vc-occupancy-3'],

        'downstream-credits-0-last': router_data_filtered.loc[id_max_timestamp, 'downstream-credits-0'],
        'downstream-credits-1-last': router_data_filtered.loc[id_max_timestamp, 'downstream-credits-1'],
        'downstream-credits-2-last': router_data_filtered.loc[id_max_timestamp, 'downstream-credits-2'],
        'downstream-credits-3-last': router_data_filtered.loc[id_max_timestamp, 'downstream-credits-3']
  }
  return data_to_add

if os.path.exists(merged_df_path) and not FORCE_DF_COMPUTATION:
  merged_df = pd.read_csv(merged_df_path)
else:
  i = 0
  data_to_add_list = []
  processed_routers = [] # holds the tuples (application_iteration, router_id) keeping track of the iteration and router for which 0-4 port data has been added
  
  for application_data_record in application_data.itertuples():
    progress = i / len(application_data) * 100

    if progress % 1 == 0:
      print("STEP 1: ", progress, '%', " - ", len(data_to_add_list), "entries processed")
    i += 1

    application_iteration = application_data_record.iteration
    application_node = application_data_record.node
    application_rank = application_data_record.rank
    application_time = application_data_record.time

    router_id = int(application_data_record.node // 2)
    router_port = 5 if application_data_record.node % 2 == 0 else 6

    if application_iteration == 0:
      timestamp_lower_bound = 0
    else:
      timestamp_lower_bound = application_data.loc[(application_data['iteration'] == application_iteration - 1) &
                                              (application_data['node'] == application_node) &
                                              (application_data['rank'] == application_rank)]['time'].values[0]


    timestamp_upper_bound = application_time

    iteration_time = timestamp_upper_bound - timestamp_lower_bound

    router_data_filtered = router_data.loc[(router_data['router-id'] == router_id) &
                                  (router_data['port-id'] == router_port) &
                                  (router_data['time-stamp'] >= timestamp_lower_bound) &
                                  (router_data['time-stamp'] <= timestamp_upper_bound)]
  
    # compute_node is True if the node is active (running the workload)
    compute_node = 1 if application_node in active_nodes else 0

    data_to_add =  get_data_to_add(router_data_filtered, iteration_time, application_iteration, router_id, router_port, compute_node)

    data_to_add_list.append(data_to_add)

    ### ports 0-4  +  neighbour node port (5 or 6): ###

    # router data for ports 0-4 has to be added only once
    if [application_iteration, router_id] in processed_routers:
      continue

    # keep track of the fact that port 0-4 data has been processed for this iteration for this router
    processed_routers.append([application_iteration, router_id])

    neighbour_node_id = application_node + 1 if router_port == 5 else application_node - 1
    neighbour_node_port = 5 if router_port == 6 else 6

    router_ports = [0,1,2,3,4]
    
    if neighbour_node_id not in active_nodes: 
      router_ports.append(neighbour_node_port)
    
    for router_port in router_ports:
      # compute_node is set to False for nodes corresponding to ports 0-4
      compute_node = 0
      
      if router_port in [5, 6]: 
         compute_node = 1 if neighbour_node_id in active_nodes else 0

      if application_iteration == 0:
        timestamp_lower_bound = 0
        timestamp_lower_bound_avg = 0
      else:
        lb_df = application_data.loc[(application_data['iteration'] == application_iteration - 1) &
                                                ((application_data['node'] == application_node) | (application_data['node'] == neighbour_node_id))
                                                ]
        timestamp_lower_bound_min = lb_df['time'].min()
        timestamp_lower_bound_avg = lb_df['time'].mean()

      ub_df = application_data.loc[(application_data['iteration'] == application_iteration) &
                                              ((application_data['node'] == application_node) | (application_data['node'] == neighbour_node_id))
                                              ]
      timestamp_upper_bound_max = ub_df['time'].max()
      timestamp_upper_bound_avg = ub_df['time'].mean()

      iteration_time = timestamp_upper_bound_avg - timestamp_lower_bound_avg

      router_data_filtered = router_data.loc[(router_data['router-id'] == router_id) &
                                    (router_data['port-id'] == router_port) &
                                    (router_data['time-stamp'] >= timestamp_lower_bound) &
                                    (router_data['time-stamp'] <= timestamp_upper_bound)]

      data_to_add =  get_data_to_add(router_data_filtered, iteration_time, application_iteration, router_id, router_port, compute_node)

      data_to_add_list.append(data_to_add)
    

  columns =  ['iteration-duration', 'iteration', 'router-id', 'port-id', 'time-stamp',
      'bw-consumed', 'qos-data', 'busy-time', 'qos-green-total', 'qos-green-sent', 'qos-yellow-total',
      'qos-yellow-sent', 'qos-red-total', 'qos-red-sent', 'vc-occupancy-0-avg', 'vc-occupancy-1-avg',
      'vc-occupancy-2-avg', 'vc-occupancy-3-avg', 'downstream-credits-0-avg', 'downstream-credits-1-avg',
      'downstream-credits-2-avg', 'downstream-credits-3-avg', 'vc-occupancy-0-last', 'vc-occupancy-1-last',
      'vc-occupancy-2-last', 'vc-occupancy-3-last', 'downstream-credits-0-last', 'downstream-credits-1-last',
      'downstream-credits-2-last', 'downstream-credits-3-last']

  merged_df = pd.DataFrame(columns=columns)
  merged_df = pd.DataFrame.from_dict(data_to_add_list)

  # STEP 2: add router 0-7 port data for routers for which BOTH the connected computing nodes do not run the MILC workload
  # in this case we aggregate the network features for each port on a specific iteration by considering the iteration time bounda of the nearest computing node

  sorted_active_nodes = np.array((sorted(active_nodes)))
  data_to_add_list = []

  inactive_nodes_couples = []
  for n1 in range(0, 71, 2):
      n2 = n1+1
      # consider pairs of nodes that are bot inactive
      # those are the nodes that have not been considered in the previous step
      if n1 not in active_nodes and n2 not in active_nodes:
          inactive_nodes_couples.append((n1, n2))

  i = 0
  for n1, n2 in inactive_nodes_couples:
      progress = int(i / len(inactive_nodes_couples) * 100)

      print("STEP 2: ", progress, '%')
      i += 1

      #print(f"Nodes {n1} - {n2}")

      nearest_neighbour_to_n1_index = np.abs(sorted_active_nodes - n1).argmin()
      nearest_neighbour_to_n1 = sorted_active_nodes[nearest_neighbour_to_n1_index]
      d1 = np.abs(nearest_neighbour_to_n1 - n1)

      nearest_neighbour_to_n2_index = np.abs(sorted_active_nodes - n2).argmin()
      nearest_neighbour_to_n2 = sorted_active_nodes[nearest_neighbour_to_n2_index]
      d2 = np.abs(nearest_neighbour_to_n2 - n2)

      if d1 < d2:
          nearest_neighbour_node_id =  nearest_neighbour_to_n1
      else:
          nearest_neighbour_node_id = nearest_neighbour_to_n2

      router_id = int(n1 // 2)

      for router_port in [0,1,2,3,4,5,6]:
          for application_iteration in range(0, max_iteration + 1):
              if application_iteration == 0:
                  timestamp_lower_bound = 0
              else:
                  lb_df = application_data.loc[(application_data['iteration'] == application_iteration - 1) &
                                                          (application_data['node'] == nearest_neighbour_node_id)
                                                          ]
                  timestamp_lower_bound = lb_df['time'].min()

              ub_df = application_data.loc[(application_data['iteration'] == application_iteration) &
                                                      (application_data['node'] == nearest_neighbour_node_id)
                                                      ]
              timestamp_upper_bound = ub_df['time'].max()
            
              iteration_duration = timestamp_upper_bound - timestamp_lower_bound

              router_data_filtered = router_data.loc[(router_data['router-id'] == router_id) &
                                              (router_data['port-id'] == router_port) &
                                              (router_data['time-stamp'] >= timestamp_lower_bound) &
                                              (router_data['time-stamp'] <= timestamp_upper_bound)]

              data_to_add =  get_data_to_add(router_data_filtered, iteration_duration, application_iteration, router_id, router_port, compute_node)

              data_to_add_list.append(data_to_add)

  add_to_merged_df = pd.DataFrame.from_dict(data_to_add_list)

  merged_df_ = pd.concat([merged_df, add_to_merged_df], ignore_index=True)

  def compute_node(row):
    if row['port-id'] in [5, 6] and row['port-id'] - 5 + row['router-id'] * 2 in active_nodes:
        return 1
    else:
        return 0

  merged_df_['compute-node'] = merged_df_.apply(compute_node, axis=1)

  merged_df_.to_csv(merged_df_path, index=False)


STEP 1:  0.0 %  -  0 entries processed
STEP 1:  1.0 %  -  7000 entries processed
STEP 1:  2.0 %  -  14000 entries processed
STEP 1:  3.0 %  -  21000 entries processed
STEP 1:  4.0 %  -  28000 entries processed
STEP 1:  5.0 %  -  35000 entries processed
STEP 1:  6.0 %  -  42000 entries processed
STEP 1:  8.0 %  -  56000 entries processed
STEP 1:  9.0 %  -  63000 entries processed
STEP 1:  10.0 %  -  70000 entries processed
STEP 1:  11.0 %  -  77000 entries processed
STEP 1:  12.0 %  -  84000 entries processed
STEP 1:  13.0 %  -  91000 entries processed
STEP 1:  15.0 %  -  105000 entries processed
STEP 1:  16.0 %  -  112000 entries processed
STEP 1:  17.0 %  -  119000 entries processed
STEP 1:  18.0 %  -  126000 entries processed
STEP 1:  19.0 %  -  133000 entries processed
STEP 1:  20.0 %  -  140000 entries processed
STEP 1:  21.0 %  -  147000 entries processed
STEP 1:  22.0 %  -  154000 entries processed
STEP 1:  23.0 %  -  161000 entries processed
STEP 1:  24.0 %  -  168000 entries pr