# Apollo Analytical Model Explorer

Build and simulate the application of Apollo models against an exhaustive dataset.

In [20]:

def construct_model_from_flush(data, flush_key):
    # Grab the table
    td = data[flush_key]
    
    # Filter the refined data
    td['region_name'] = pd.Categorical(td['region_name'])
    td['region_name_id'] = td['region_name'].cat.codes
    name_swap = td[['region_name', 'region_name_id']]\
            .groupby(['region_name', 'region_name_id'], as_index=False, sort=True)\
            .first()
    grp_td = td.groupby(
        ['region_name', 'region_name_id', 'num_elements', 'policy_index'],
        as_index=False).agg({'time_avg':min})
    
    region_names = td['region_name'].unique().tolist()

    drop_fields = ['region_name', 'region_name_id', 'policy_index', 'time_avg']

    feature_names = [f for f in grp_td.columns if f not in drop_fields]
    model_count = 0
    
    # Set up the SKL pipeline
    # Build a model for each region
    all_skl_models = {}
    all_types_rule = {}
    all_rules_json = {}
    all_least_json = {}
    all_timed_json = {}
    all_sizes_data = {}
    overall_start = time.time()

    one_big_tree = False

    print("Training...")
    for region in region_names:
        model_count += 1
    
        if one_big_tree:
            rd = grp_td
            region = "__ANY_REGION__"
        else:
            rd = grp_td[grp_td['region_name'] == region]

        if (rd.shape[0] < 1): 
            continue

        y = rd["policy_index"].astype(int)
        x = rd.drop(drop_fields, axis="columns").values.astype(float)

        #example = DecisionTreeClassifier(
        #         class_weight=None, criterion='gini', max_depth=6,
        #         max_features=x.shape[1], max_leaf_nodes=None,
        #         min_impurity_decrease=1e-07, min_samples_leaf=1,
        #         min_samples_split=2, min_weight_fraction_leaf=0.0,
        #         presort=False, random_state=None, splitter='best'))]

        clf = DecisionTreeClassifier(
                 class_weight=None, criterion='gini', max_depth=1,
                 min_samples_leaf=1, min_samples_split=2)

        # Conduct some model evaluation:
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1) # 75% training and 25% test

        pipe = [('estimator', clf)]
        model = Pipeline(pipe)

        model.fit(x, y)

        trained_model = model.named_steps['estimator']
        y_pred = trained_model.predict(x_test)

        # Does not work for small splits:
        #scores = cross_val_score(model, x, y, cv=5)

        all_types_rule[region] = "DecisionTree"
        all_rules_json[region] = tree_to_data(trained_model, feature_names, name_swap, y)
        all_least_json[region] = -1
        all_timed_json[region] = True
        all_sizes_data[region] = str(x.shape)
        all_skl_models[region] = trained_model
        
        print("model[\"" + str(region) + "\"].x_shape" + "%-12s" % str(x.shape) \
                + "%22s" % ("Acc%: " + "%6s" % ("%3.2f" % (100.0 * metrics.accuracy_score(y_test, y_pred)))))

        if one_big_tree:
            #print("")
            #print(json.dumps(tree_to_simple_str(trained_model, feature_names, name_swap, y), sort_keys=False, indent=4, ensure_ascii=True))
            #print("")
            break


    #
    # Now we're done building models.
    #
    if one_big_tree == False:
        model_def = {
                "guid": 0,
                "driver": {
                    "rules": all_rules_json,
                    "least": all_least_json,
                    "timed": all_timed_json,
                    },
                "region_names": list(region_names),
                "region_sizes": all_sizes_data,
                "region_types": all_types_rule,
                "features": {
                    "count": len(feature_names),
                    "names": feature_names,
                    },
                }
    else:
        model_def = {
                "guid": 0,
                "driver": {
                    "rules": all_rules_json,
                    "least": all_least_json,
                    "timed": all_timed_json,
                    },
                "region_names": "__ANY_REGION__",
                "region_sizes": all_sizes_data,
                "region_types": all_types_rule,
                "features": {
                    "count": len(feature_names),
                    "names": feature_names,
                    },
                }

    # Add in a default model (Static, OMP defaults) for any unnamed region:
    if one_big_tree == False:
        model_def["region_names"].append("__ANY_REGION__")
        model_def["region_sizes"]["__ANY_REGION__"] = "(0, 0)"
        model_def["region_types"]["__ANY_REGION__"] = "Static"
        model_def["driver"]["rules"]["__ANY_REGION__"] = "0"
        model_def["driver"]["least"]["__ANY_REGION__"] = "-1"
        model_def["driver"]["timed"]["__ANY_REGION__"] = True

    #model_as_json = json.dumps(model_def, sort_keys=False, indent=4, ensure_ascii=True) + "\n"              
    return all_skl_models



def project_model_over_trace(data):
    # compile a model
    print("Constructing models:")
    all_models = construct_model_from_flush(data, 'apollo.flush')
    print("Done.")
    
    # go through the trace and evaluate the model
    #
    print("\n\nTrace:")
    for row in data['apollo.trace'].itertuples():
        step          = int(row.step)
        region_name   = str(row.region_name)
        policy_index  = int(row.policy_index)
        num_threads   = int(row.num_threads)
        num_elements  = int(row.num_elements)
        time_exec     = float(row.time_exec)
        print("region_name: %s num_elements: %d" % (region_name, num_elements))
        if (row.Index > 25):
            print("...")
            break
    #
    return


def plot_apollo_vs_normal(data):
    # Grab the data:
    apollo16 = data['apollo.steps']
    normal16 = data['normal.steps']
    fig, ax = plt.subplots()
    # Refine the data:
    apollo16 = apollo16[apollo16['policy_index']==7].groupby(['policy_index'])
    normal16 = normal16[normal16['num_threads']==16]
    normal16 = normal16[normal16['schedule']=='static'].groupby(['num_threads'])
    apollo_labels = ["apollo." + str(x) for x in apollo16['policy_index'].unique()]
    normal_labels = ["normal." + str(x) for x in normal16['num_threads'].unique()]
    # Plot the refined data:
    apollo16.plot(title="Cleverleaf Step Exec Time(sec) for OMP @ 16 x Static",
                  x='step', y='step_exec_time', ax=ax, linestyle='-')
    normal16.plot(x='step', y='step_exec_time', ax=ax, linestyle=':')
    # Customize plot display:
    ax.legend((apollo_labels + normal_labels), loc='upper right')
    ax.tick_params(axis='y', which='minor', left=True)
    ax.tick_params(axis='x', which='minor', bottom=True)
    plt.grid(True)
    plt.xlabel('Cleverleaf Step')
    plt.ylabel('Time (seconds)')
    plt.show()
    return

#----------

%run './common_routines.ipynb'

data = dict()
data['path'] = '/g/g17/wood67/src/apollo/data/intel/001.node.001.rank'
data['apollo.tracefile'] = 'trace.policy.0.csv'
data['apollo.flushfile'] = 'flush.apollo.logged.csv'
data['apollo.stepsfile'] = 'steps.apollo.silent.csv'
data['normal.stepsfile'] = 'steps.normal.silent.csv'
data = load_csv_data(data)

project_model_over_trace(data)
plot_apollo_vs_normal(data)

print("\nDone.")
return

#
# NOTE: Commented out until ipywidgets works on Quartz
#
# Set up the UI:
#trace_progress = widgets.IntProgress(
#    value=0, min=0, max=10, step=1,
#    description='Processing:', orientation='horizontal',
#    bar_style='') # 'success', 'info', 'warning', 'danger' or ''
#trace_summary = widgets.Output()
#trace_detail  = widgets.Output()
#trace_accordion = widgets.Accordion(children=[trace_summary, trace_detail])
#trace_accordion.set_title(0, 'Summary')
#trace_accordion.set_title(1, 'Detail')
#trace_box = widgets.Box(children=[trace_progress, trace_accordion])
#
#plot_progress = widgets.IntProgress(
#    value=0, min=0, max=10, step=1,
#    description='Processing:', orientation='horizontal',
#    bar_style='') # 'success', 'info', 'warning', 'danger' or ''
#plot_summary = widgets.Output()
#plot_detail  = widgets.Output()
#plot_accordion = widgets.Accordion(children=[plot_summary, plot_detail])
#plot_accordion.set_title(0, 'Summary')
#plot_accordion.set_title(1, 'Detail')
#plot_box = widgets.Box(children=[plot_progress, plot_accordion])
#
#tab_nest = widgets.Tab(children=[trace_box, plot_box])
#tab_nest.set_title(0, 'Trace')
#tab_nest.set_title(1, 'Plot')
#tab_nest.selectedIndex = None
#
#display(tabNest)

#trace_thread = threading.Thread(
#    target=project_model_over_trace,
#    args=(data, trace_progress, trace_summary, trace_detail))

#plot_thread = threading.Thread(
#    target=plot_apollo_vs_normal,
#    args=(data))

#trace_thread.start()
#plot_thread.start()





                \     _ \   _ \  |     |      _ \  
               _ \   |   | |   | |     |     |   | 
              ___ \  ___/  |   | |     |     |   | 
            _/    _\_|    \___/ _____|_____|\___/  
                                                   
           -  -  -  --  --  ---  --= --== ==*# ###>
      
Data source:
	/g/g17/wood67/src/apollo/data/intel/001.node.001.rank

Loading:
       data[apollo.trace]   (78.05 MB)   trace.policy.0.csv
       data[apollo.flush]   (81.86 MB)   flush.apollo.logged.csv
       data[apollo.steps]   (88.87 kB)   steps.apollo.silent.csv
       data[normal.steps]   (80.13 kB)   steps.normal.silent.csv

Constructing models:
Training...
model["cleverleaf-apollo-release(0xe014bd)"].x_shape(340, 1)              Acc%:   5.88
model["cleverleaf-apollo-release(0xdf9f08)"].x_shape(340, 1)              Acc%:   5.88
model["cleverleaf-apollo-release(0xe4e3ae)"].x_shape(620, 1)              Acc%:   7.74
model["cleverleaf-apollo-release(0xe4136e)"].x_shape

<Figure size 640x480 with 1 Axes>


Done.
