In [1]:
import sys
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display
from IPython.display import HTML

import thicket as th
from thicket.model_extrap import ExtrapInterface

display(HTML("<style>.container { width:80% !important; }</style>"))

# display all columns from pandas data frames
pd.options.display.max_columns = None

data = "../../data/LULESH/exclusive/weak"
thicket = th.Thicket.from_caliperreader(data)


<IPython.core.display.Javascript object>

In [2]:
#thicket.metadata

Instatiate an Extra-P interface to create performance models and more.

In [3]:
extrap_interface = ExtrapInterface()

To show the available modelers from Extra-P that can be used for modeling.

In [4]:
extrap_interface.print_modelers()

Available Extra-P Modeler: ['refining', 'basic', 'multi-parameter', 'default']


To display the available options for a specific modeler, e.g., the default modeler.

In [5]:
extrap_interface.print_modeler_options("default")

Modeler Options
--------------
allow_log_terms	 BOOL	 Allows models with logarithmic terms
use_crossvalidation	 BOOL	 Enables cross-validation
compare_with_RSS	 BOOL	 If enabled the models are compared using their residual sum of squares (RSS) instead of their symmetric mean absolute percentage error (SMAPE)
poly_exponents	 STR	 Set of polynomial exponents. Use comma separated list.
log_exponents	 STR	 Set of logarithmic exponents. Use comma separated list.
retain_default_exponents	 BOOL	 If set the default exponents are added to the given ones.
force_combination_exponents	 BOOL	 If set the exact combination of exponents is forced.
allow_negative_exponents	 BOOL	 If set adds neagtive exponents for strong scaling.



To create performance models use the create_models() function of the Extra-P interface class like shown in the example below. You have to pass the thicket object, the parameters and metrics you want to model, and a name for the model configuration you are going to create when running the create_models() function. Extra-P will internally use the name you provided as a unique identifier for your modeling experiments. This will come in handy when you experiment with different modeling parameters, metrics, and modeler configurations.

In [6]:
extrap_interface.create_models(thicket, 
                               parameters=[
                                   "jobsize",
                                ], 
                               metrics=[
                                   "Avg time/rank (exc)",
                                   ],
                               model_name="config1")

  tht.statsframe.dataframe.at[


If you want to explore Extra-P's modeling capabilities you can try to use a different modeler and see the difference in models it will return. To change the model generator Extra-P will use specify the name of the modeler as shown below. Here we use the refining modeler instead, which automatically adjusts the search space of the models for the user. Therefore, there is no need to specify the exponents Extra-P should use for modeling.
Furthermore, this example shows how to create models for several parameters and metrics.
To create models for several parameters or metrics simply provide them as a list of string values.

There are several additional parameters to this method. For example, one can use the parameter `use_median=True` to switch between using the median and mean values of the measured performance metrics values of a measurement point (application configuration) for modeling.

The `add_stats=True` parameter let's you specify if you want to have Extra-P's statistical values extended to the dataframe object of thicket. These values are internally used by Extra-P to decide which of the found hypotheses will be the best model for a certain node/kernel and metric.

In [7]:
extrap_interface.create_models(thicket, 
                               parameters=[
                                   "jobsize"
                                ], 
                               metrics=[
                                   "Avg time/rank (exc)",
                                   "Avg time/rank"
                                   ], 
                               use_median=True,
                               modeler="refining",
                               model_name="config2")

  tht.statsframe.dataframe.at[
  tht.statsframe.dataframe.at[


For advanced users, you can specific which options Extra-P should use when creating the models. This will influence the type of models you will get.

Define the modeler options that you want to use for modeling in the form of a dictonary and pass them to the create_models() function of the extrap interface.

Use the previously shown `print_modeler_options("default")` function to display which options are available for a specific modeler.

Make sure that the options you are setting are available for the modeler that you specified via `modeler=""`. If not the Extra-P interface will let you know and continue using the default options.

In [8]:
modeler_options = {'allow_log_terms': True,
                   'use_crossvalidation': True,
                   'compare_with_RSS': False,
                   'poly_exponents': [0,1,2,3,4,5],
                   'log_exponents': [0,1,2],
                   'retain_default_exponents': False,
                   'force_combination_exponents': True,
                   'allow_negative_exponents': False,
                   'allow_combinations_of_sums_and_products': False,
                   '#single_parameter_modeler': "default",
                   '#single_parameter_options': {'poly_exponents': "0,1,2,3,4,5",
                   'log_exponents': "0,1,2"},
                  }

extrap_interface.create_models(thicket, 
                               parameters=[
                                   "jobsize",
                                   "problem_size"
                                ], 
                               metrics=[
                                   "Avg time/rank (exc)",
                                   #"Avg time/rank"
                                   ], 
                               use_median=True,
                               modeler="multi-parameter",
                               model_name="config3",
                               modeler_options=modeler_options)



In [9]:
pd.set_option('display.max_colwidth', None)
thicket.statsframe.dataframe

Unnamed: 0_level_0,config1,config1,config1,config1,config1,config1,config1,config2,config2,config2,config2,config2,config2,config2,config2,config2,config2,config2,config2,config2,config3,config3,config3,config3,config3,config3,config3
Unnamed: 0_level_1,name,Avg time/rank (exc)_extrap-model,Avg time/rank (exc)_RSS_extrap-model,Avg time/rank (exc)_rRSS_extrap-model,Avg time/rank (exc)_SMAPE_extrap-model,Avg time/rank (exc)_AR2_extrap-model,Avg time/rank (exc)_RE_extrap-model,name,Avg time/rank (exc)_extrap-model,Avg time/rank (exc)_RSS_extrap-model,Avg time/rank (exc)_rRSS_extrap-model,Avg time/rank (exc)_SMAPE_extrap-model,Avg time/rank (exc)_AR2_extrap-model,Avg time/rank (exc)_RE_extrap-model,Avg time/rank_extrap-model,Avg time/rank_RSS_extrap-model,Avg time/rank_rRSS_extrap-model,Avg time/rank_SMAPE_extrap-model,Avg time/rank_AR2_extrap-model,Avg time/rank_RE_extrap-model,name,Avg time/rank (exc)_extrap-model,Avg time/rank (exc)_RSS_extrap-model,Avg time/rank (exc)_rRSS_extrap-model,Avg time/rank (exc)_SMAPE_extrap-model,Avg time/rank (exc)_AR2_extrap-model,Avg time/rank (exc)_RE_extrap-model
node,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2
"{'name': 'MPI_Barrier', 'type': 'function'}",MPI_Barrier,$-1.039\times10^{−3}+4.453\times10^{−4}\cdot \log_2{p}^{1}$,1.447227e-06,8.545707,48.405904,0.617853,0.685289,MPI_Barrier,$1.699\times10^{−3}$,4.733862e-06,107.024198,49.294964,1.0,0.181734,$1.699\times10^{−3}$,4.733862e-06,107.024198,49.294964,1.0,0.181734,MPI_Barrier,$7.904\times10^{−4}+2.268\times10^{−6}\cdot q^{2}$,8.9e-05,222.102533,49.255743,0.548831,1.33145
"{'name': 'MPI_Comm_dup', 'type': 'function'}",MPI_Comm_dup,$2.033\times10^{−5}+1.605\times10^{−7}\cdot p^{1/2}\cdot \log_2{p}^{2}$,5.055366e-08,8.842384,69.719797,-0.113187,0.837009,MPI_Comm_dup,$1.052\times10^{−4}$,5.676683e-08,23.131486,90.627582,1.0,2.895062,$1.052\times10^{−4}$,5.676683e-08,23.131486,90.627582,1.0,2.895062,MPI_Comm_dup,$3.079\times10^{−5}+6.233\times10^{−7}\cdot p^{1}$,0.0,35.46995,55.196981,0.411859,0.805145
"{'name': 'MPI_Finalize', 'type': 'function'}",MPI_Finalize,$10^{−6}$,0.0,0.0,0.0,1.0,0.0,MPI_Finalize,$10^{−6}$,0.0,0.0,0.0,1.0,0.0,$10^{−6}$,0.0,0.0,0.0,1.0,0.0,MPI_Finalize,$10^{−6}$,0.0,0.0,0.0,1.0,0.0
"{'name': 'MPI_Finalized', 'type': 'function'}",MPI_Finalized,$10^{−6}$,0.0,0.0,0.0,1.0,0.0,MPI_Finalized,$10^{−6}$,0.0,0.0,0.0,1.0,0.0,$10^{−6}$,0.0,0.0,0.0,1.0,0.0,MPI_Finalized,$10^{−6}$,0.0,0.0,0.0,1.0,0.0
"{'name': 'MPI_Initialized', 'type': 'function'}",MPI_Initialized,$1.833\times10^{−6}$,8.333333e-13,0.729167,17.050298,1.0,0.083333,MPI_Initialized,$1.833\times10^{−6}$,8.333333e-13,0.729167,17.050298,1.0,0.083333,$1.833\times10^{−6}$,8.333333e-13,0.729167,17.050298,1.0,0.083333,MPI_Initialized,$1.700\times10^{−6}$,0.0,4.8825,26.906907,1.0,0.15
"{'name': 'main', 'type': 'function'}",main,$8.209\times10^{−3}+3.719\times10^{−4}\cdot \log_2{p}^{1}$,6.126005e-07,0.006114,3.256699,0.743588,0.027427,main,$8.209\times10^{−3}+3.719\times10^{−4}\cdot \log_2{p}^{1}$,6.126005e-07,0.006114,3.256699,0.743588,0.027427,$-5.414\times10^{1}+1.611\times10^{1}\cdot \log_2{p}^{3/2}$,872.8205,0.039998,8.163793,0.983814,0.068315,main,$-2.131\times10^{−3}+4.580\times10^{−4}\cdot \log_2{p}^{1}+3.576\times10^{−7}\cdot q^{3}$,2.4e-05,3.269151,19.389186,0.99679,0.171968
"{'name': 'CommRecv', 'type': 'function'}",CommRecv,$-9.937\times10^{−3}+7.143\times10^{−3}\cdot p^{1/3}$,3.033116e-06,0.007269,3.287004,0.995762,0.027221,CommRecv,$-9.937\times10^{−3}+7.143\times10^{−3}\cdot p^{1/3}$,3.033116e-06,0.007269,3.287004,0.995762,0.027221,$-1.819\times10^{−2}+1.273\times10^{−2}\cdot p^{4/11}$,4.15255e-08,6.6e-05,0.306846,0.999988,0.002553,CommRecv,$3.848\times10^{−4}+9.362\times10^{−7}\cdot \log_2{p}^{2}\cdot q^{1}\cdot \log_2{q}^{2}$,0.000139,0.349657,8.330406,0.993607,0.079487
"{'name': 'MPI_Irecv', 'type': 'function'}",MPI_Irecv,$-1.201\times10^{−2}+8.223\times10^{−3}\cdot p^{1/3}$,1.678652e-06,0.003539,2.657153,0.998228,0.021927,MPI_Irecv,$-1.201\times10^{−2}+8.223\times10^{−3}\cdot p^{1/3}$,1.678652e-06,0.003539,2.657153,0.998228,0.021927,$-1.201\times10^{−2}+8.223\times10^{−3}\cdot p^{1/3}$,1.678652e-06,0.003539,2.657153,0.998228,0.021927,MPI_Irecv,$1.265\times10^{−2}+4.327\times10^{−8}\cdot p^{1}\cdot q^{2}\cdot \log_2{q}^{1}$,0.005274,108.652539,41.748858,0.924135,0.865611
"{'name': 'CommSBN', 'type': 'function'}",CommSBN,$1.236\times10^{−3}+3.694\times10^{−4}\cdot p^{1/3}\cdot \log_2{p}^{1}$,4.971435e-07,0.014465,3.581461,0.997715,0.030795,CommSBN,$3.977\times10^{−5}+1.247\times10^{−3}\cdot p^{1/2}$,1.033456e-06,0.034629,5.222877,0.995251,0.045825,$2.385\times10^{−2}$,0.0004133922,3.885888,27.695147,1.0,0.025838,CommSBN,$-6.990\times10^{−4}+1.774\times10^{−10}\cdot \log_2{p}^{2}\cdot q^{4}\cdot \log_2{q}^{2}$,0.069561,10.316746,64.551492,0.994203,0.420139
"{'name': 'MPI_Wait', 'type': 'function'}",MPI_Wait,$6.756\times10^{−2}$,0.005576674,12.403994,41.220236,1.0,0.150209,MPI_Wait,$6.756\times10^{−2}$,0.005576674,12.403994,41.220236,1.0,0.150209,$6.756\times10^{−2}$,0.005576674,12.403994,41.220236,1.0,0.150209,MPI_Wait,$-2.836\times10^{−1}+2.767\times10^{−2}\cdot \log_2{q}^{2}$,5.235625,155.918069,75.919453,0.162298,1.541739


In [None]:
#t_ens.statsframe.dataframe["Avg time/rank (exc)_extrap-model"].iloc[0].mdl.hypothesis.function

In [None]:
model_obj = t_ens.statsframe.dataframe.at[t_ens.statsframe.dataframe.index[0], "Avg time/rank (exc)_extrap-model"]

plt.clf()
fig, ax = model_obj.display(show_mean=True, show_median=True, 
                            show_min_max=True, RSS=True, 
                            AR2=True, show_opt_scaling=True,
                            opt_scaling_func="1.5*p**1*log2(p)**1")
 
# Plot graph
plt.show()


In [None]:
with pd.option_context("display.max_colwidth", 1):
    display(HTML(mdl.to_html(show_mean=True, show_median=True, 
                            show_min_max=True, RSS=True, 
                            AR2=True, show_opt_scaling=False)))


In [None]:
mdl.componentize_statsframe()

xp_comp_df = t_ens.statsframe.dataframe

xp_comp_df

In [None]:
#mdl.complexity_statsframe(eval_targets=[[512,70],[1024,80]])
mdl.complexity_statsframe(eval_targets=[[512],[1024]])

xp_comp_df = t_ens.statsframe.dataframe

xp_comp_df

In [None]:
# sort by growth rank to identify bottlenecks
#x = xp_comp_df.sort_values(by=["Avg time/rank (exc)_extrap-model_complexity_(512,70)"])
x = xp_comp_df.sort_values(by=["Avg time/rank (exc)_extrap-model_complexity_(512)"])
x

In [None]:
#x = xp_comp_df.groupby("Avg time/rank (exc)_extrap-model_complexity_(512,70)")
x = xp_comp_df.groupby("Avg time/rank (exc)_extrap-model_complexity_(512)")
for key, value in x.groups.items():
    print(key)
    print(value)
