In [3]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [None]:
def df_from_csv(csv_file):
    # Read csv with the following name of columns
    df = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

    # Define the number of epochs and number of measures
    num_epochs = 30
    num_measures = 5

    # Get the events measured
    events = df["Event Name"].unique()

    # Split the Dataframe in num_measures
    arrs = np.array_split(df, num_measures)

    # Store that mean columns in a new Dataframe
    data = []
    headers = []
    i = 0
    # Calculate the mean of each iteration
    for arr in arrs:
        arr = arr.reset_index(drop=True)
        arr['Avg'] = arr.groupby('Event Name')['Value'].transform('sum')
        i = i + 1
        headers.append("Measure_" + str(i))
        data.append(arr['Avg'].head(len(events)))

    # Creates a new df from the avg of the iters
    df = pd.concat(data, axis=1, keys=headers)

    # Add the events names
    df.insert(0, 'Event Name', events)

    # Creates a new avg column from all the measures
    df['Avg'] = df.mean(axis=1)
    return df

In [None]:
# Options used:
# -------------
# SINGLE-THREAD. inter = intra = 1
# Medida en la fase de entrenamiento (fit())
# Se ha ejecutado el programa 5 veces. 30 epochs en cada una de ellas.
# --------------------------------------------------------------------

# -----------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/single_thread/mnist_train_papi.csv"
# plot_each_iter(csv_file)
st_mnist_train_papi = df_from_csv(csv_file)
# -----------------------------------------------------------------------------------------------

# ---------------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/single_thread/mnist_train_callback.csv"
# plot_each_iter(csv_file)
st_mnist_train_callback = df_from_csv(csv_file)
# ---------------------------------------------------------------------------------------------------

# -----------------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/single_thread/mnist_train_each_epoch.csv"
# plot_each_iter(csv_file)
st_mnist_train_each_epoch = df_from_csv(csv_file)
# -----------------------------------------------------------------------------------------------------

# -----------------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/single_thread/mnist_train_each_batch.csv"
# plot_each_iter(csv_file)
st_mnist_train_each_batch = df_from_csv(csv_file)
# -----------------------------------------------------------------------------------------------------

In [None]:
st_mnist_train_papi

In [None]:
st_mnist_train_callback

In [None]:
st_mnist_train_each_epoch

In [None]:
st_mnist_train_each_batch

In [None]:
# Join all the dataframes in another one
events = pd.Series(st_mnist_train_papi["Event Name"].unique())
data = [events, st_mnist_train_papi['Avg'], st_mnist_train_callback['Avg'], st_mnist_train_each_epoch['Avg'], st_mnist_train_each_batch['Avg']]
headers = ['Event Name', 'mnist_train_papi', 'mnist_train_callback', 'mnist_train_each_epoch', 'mnist_train_each_batch']
df5 = pd.concat(data, axis=1, keys=headers)
#df5['Value_diff'] = (df3['df1'] - df3['df2']).abs()
df5

In [None]:
# ------------------------------------------------------------------------------------------------------------------------

In [None]:
# Options used:
# -------------
# MULTI-THREAD. inter = intra = default
# Medida en la fase de entrenamiento (fit())
# Se ha ejecutado el programa 5 veces. 30 epochs en cada una de ellas.
# --------------------------------------------------------------------

# -----------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/multi_thread/mnist_train_papi.csv"
# plot_each_iter(csv_file)
mt_mnist_train_papi = df_from_csv(csv_file)
# -----------------------------------------------------------------------------------------------

# ---------------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/multi_thread/mnist_train_callback.csv"
# plot_each_iter(csv_file)
mt_mnist_train_callback = df_from_csv(csv_file)
# ---------------------------------------------------------------------------------------------------

# -----------------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/multi_thread/mnist_train_each_epoch.csv"
# plot_each_iter(csv_file)
mt_mnist_train_each_epoch = df_from_csv(csv_file)
# -----------------------------------------------------------------------------------------------------

# -----------------------------------------------------------------------------------------------------
# Name of csv file
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/multi_thread/mnist_train_each_batch.csv"
# plot_each_iter(csv_file)
mt_mnist_train_each_batch = df_from_csv(csv_file)
# -----------------------------------------------------------------------------------------------------

In [None]:
mt_mnist_train_papi

In [None]:
mt_mnist_train_callback

In [None]:
mt_mnist_train_each_epoch

In [None]:
mt_mnist_train_each_batch

In [None]:
# Join all the dataframes in another one
events = pd.Series(mt_mnist_train_papi["Event Name"].unique())
data = [events, mt_mnist_train_papi['Avg'], mt_mnist_train_callback['Avg'], mt_mnist_train_each_epoch['Avg'], mt_mnist_train_each_batch['Avg']]
headers = ['Event Name', 'mnist_train_papi', 'mnist_train_callback', 'mnist_train_each_epoch', 'mnist_train_each_batch']
df10 = pd.concat(data, axis=1, keys=headers)
#df5['Value_diff'] = (df3['df1'] - df3['df2']).abs()
df10

In [None]:
# ------------------------------------------------------------------------------------------------------------------------

In [None]:
def df_from_csv_for_plot(csv_file):

    # Read csv with the following name of columns
    df = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

    # Define the number of measures and split the Dataframe in that num of slices
    num_measures = 5
    arrs = np.array_split(df, num_measures)

    # Using the first df from the split. This can be changed to use the n measure or the avg (need to calculate the last one)
    df = arrs[0]

    # Get the events and cpus measured
    events = df["Event Name"].unique()
    cpus = df["CPU"].unique()

    # Also the iterations (batches or epochs) performed
    iterations = range(1, int(len(df.index) / (len(events) * len(cpus))) + 1)

    # Creates a column with the number of iteration and append it to the df
    data = []
    for i in iterations:
        aux = [i] * len(events) * len(cpus)
        data = data + aux
    df.insert(0, "# Iter.", data)

    # "Rotate" the table
    df = df.pivot_table(index=["# Iter."], columns=["Event Name"], values=["Value"]).fillna(0)

    # Drop the first multiindex
    df.columns = df.columns.droplevel()

    # Remove name of columns
    df.columns.name = None

    # Reset the index to an auto-increment
    #df = df.reset_index()

    return df

In [None]:
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/single_thread/mnist_train_each_epoch.csv"
df = df_from_csv_for_plot(csv_file)

# Plot
fig = go.Figure()

# Add traces
x_values = df.index.to_list()
for event in df.columns:
    y_values = df[event].tolist()
    fig.add_trace(go.Scatter(x=x_values, y=y_values, mode='lines+markers', name=event))

# Set the start axis to zero
fig.update_xaxes(rangemode="tozero")
fig.update_yaxes(rangemode="tozero")

# Names
fig.update_xaxes(title_text="Number of epoch")
fig.update_yaxes(title_text="Value")
fig.update_layout(
    title='MyPaPi measure by iterations',
    # hovermode="x unified",
    legend=dict(
        traceorder="normal",
        font=dict(family="sans-serif", size=12, color="black"),
        bgcolor="white",
        bordercolor="Black",
        borderwidth=2
    )
)

fig.show()
fig.write_html("st_mnist_train_each_epoch.html")

In [None]:
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/multi_thread/mnist_train_each_epoch.csv"
df = df_from_csv_for_plot(csv_file)

# Plot
fig = go.Figure()

# Add traces
x_values = df.index.to_list()
for event in df.columns:
    y_values = df[event].tolist()
    fig.add_trace(go.Scatter(x=x_values, y=y_values, mode='lines+markers', name=event))

# Set the start axis to zero
fig.update_xaxes(rangemode="tozero")
fig.update_yaxes(rangemode="tozero")

# Names
fig.update_xaxes(title_text="Number of epoch")
fig.update_yaxes(title_text="Value")
fig.update_layout(
    title='MyPaPi measure by iterations',
    # hovermode="x unified",
    legend=dict(
        traceorder="normal",
        font=dict(family="sans-serif", size=12, color="black"),
        bgcolor="white",
        bordercolor="Black",
        borderwidth=2
    )
)

fig.show()
fig.write_html("mt_mnist_train_each_epoch.html")

In [4]:
csv_file = "/home/jlpadillas01/TFG/tests/tensorflow/mnist/out/single_thread/mnist_train_each_batch.csv"

# Read csv with the following name of columns
df = pd.read_csv(csv_file, header=None, sep=":", names=["CPU", "Value", "Unit", "Event Name"])

# Define the number of measures and split the Dataframe in that num of slices
num_measures = 5
num_epochs = 30
arrs = np.array_split(df, num_measures * num_epochs)

# Using the first df from the split. This can be changed to use the n measure or the avg (need to calculate the last one)
epoch = 0
df = arrs[epoch]

# We have the 464_130 rows which are: 9 events * 1_719 batches * 30 cpus

# Get the events and cpus measured
events = df["Event Name"].unique()
cpus = df["CPU"].unique()

# Also the iterations (batches or epochs) performed
iterations = range(1, int(len(df.index) / (len(events) * len(cpus))) + 1)

# Creates a column with the number of iteration and append it to the df
data = []
for i in iterations:
    aux = [i] * len(events) * len(cpus)
    data = data + aux
df.insert(0, "# Iter.", data)

# "Rotate" the table
df = df.pivot_table(index=["# Iter."], columns=["Event Name"], values=["Value"]).fillna(0)

# Drop the first multiindex
df.columns = df.columns.droplevel()

# Remove name of columns
df.columns.name = None

# df

Unnamed: 0_level_0,L1-DCACHE-LOADS,L1-DCACHE-STORES,PERF_COUNT_HW_BRANCH_INSTRUCTIONS,PERF_COUNT_HW_INSTRUCTIONS,fp_arith_inst_retired.128b_packed_single,fp_arith_inst_retired.256b_packed_single,fp_arith_inst_retired.512b_packed_single,fp_arith_inst_retired.scalar_double,fp_arith_inst_retired.scalar_single
# Iter.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,1.079207e+07,5.779265e+06,7.708564e+06,3.737336e+07,9.733333,2.200000,79316.666667,396.233333,101.766667
2,4.096540e+04,8.527300e+03,9.273033e+03,1.170452e+05,9.733333,2.200000,79316.666667,159.300000,100.000000
3,4.147757e+04,8.888533e+03,9.551200e+03,1.186752e+05,9.733333,2.200000,79316.666667,159.100000,100.000000
4,4.201623e+04,8.904733e+03,1.008367e+04,1.214560e+05,9.733333,2.200000,79316.666667,159.100000,100.000000
5,4.196670e+04,8.867900e+03,1.002843e+04,1.212305e+05,9.733333,2.200000,79316.666667,159.100000,100.000000
...,...,...,...,...,...,...,...,...,...
1715,4.268887e+04,9.087467e+03,1.056667e+04,1.245601e+05,9.733333,2.200000,79316.666667,159.366667,100.000000
1716,4.268310e+04,9.083333e+03,1.056623e+04,1.245550e+05,9.733333,2.200000,79316.666667,159.366667,100.000000
1717,4.269513e+04,9.091433e+03,1.057447e+04,1.245942e+05,9.733333,2.200000,79316.666667,159.366667,100.000000
1718,4.270133e+04,9.079600e+03,1.062213e+04,1.246533e+05,9.733333,2.200000,79316.666667,159.366667,100.000000


In [5]:
# Plot
fig = go.Figure()

# Add traces
x_values = df.index.to_list()
for event in df.columns:
    y_values = df[event].tolist()
    fig.add_trace(go.Scatter(x=x_values, y=y_values, mode='lines+markers', name=event))

# Set the start axis to zero
fig.update_xaxes(rangemode="tozero")
fig.update_yaxes(rangemode="tozero")

# Names
fig.update_xaxes(title_text="Number of epoch")
fig.update_yaxes(title_text="Value")
fig.update_layout(
    title='MyPaPi measure by iterations',
    # hovermode="x unified",
    legend=dict(
        traceorder="normal",
        font=dict(family="sans-serif", size=12, color="black"),
        bgcolor="white",
        bordercolor="Black",
        borderwidth=2
    )
)

fig.show()
fig.write_html("st_mnist_train_each_batch.html")

In [None]:

# eje_y = df[(df['Event Name'] == 'fp_arith_inst_retired.512b_packed_single')]['Value'].tolist()
# Setting the dict of event name and how many computations represent each count. Valid on node c6-111
computations_dict = {
    "fp_arith_inst_retired.128b_packed_double": 2,
    "fp_arith_inst_retired.128b_packed_single": 4,
    "fp_arith_inst_retired.256b_packed_double": 4,
    "fp_arith_inst_retired.256b_packed_single": 8,
    "fp_arith_inst_retired.512b_packed_double": 8,
    "fp_arith_inst_retired.512b_packed_single": 16,
    "fp_arith_inst_retired.scalar_double": 1,
    "fp_arith_inst_retired.scalar_single": 1,
    "fp_assist.any": 1
}
