# **CoNN EVALUATION**

## SETUP

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math
import statistics
sns.set(style="ticks")
sns.set_palette("colorblind")

from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

In [None]:
try:
    from itertools import zip_longest
except ImportError:
    from itertools import izip_longest as zip_longest

def get_length_of_longest_list(lists):
  longest_list_size = 0
  for list in lists:
    if len(list) > longest_list_size:
      longest_list_size = len(list)
  return longest_list_size
 # return len(max(lists, key=len))

def column_wise_mean(lists):
  longest_list_length = get_length_of_longest_list(lists)
  mean_list = []
  for column_index in range(0, longest_list_length):
    sum = 0.0
    mean_counter = 0
    for list in lists:
      if len(list) >= column_index + 1:
        sum += list[column_index]
        mean_counter += 1
    mean_list.append(sum/mean_counter)
  return mean_list

def column_wise_mean_with_zero_fill(lists):
  columns = zip_longest(*lists, fillvalue=0)
  return [sum(col)/len(lists) for col in columns]

def get_mean_list(df_column):
  lists = []
  for row_index in range(len(df_column.index)):
    lists.append([float(x) for x in str(df_column.iloc[row_index]).split(',')])
  return column_wise_mean(lists)

def get_mean_list_with_zero_fill(df_column):
  lists = []
  for row_index in range(len(df_column.index)):
    lists.append([float(x) for x in str(df_column.iloc[row_index]).split(',')])
  return column_wise_mean_with_zero_fill(lists)

def get_last_item_mean(df_column):
  lists = []
  for row_index in range(len(df_column.index)):
    lists.append([float(x) for x in str(df_column.iloc[row_index]).split(',')])

  sum = 0
  for list in lists:
    sum += list[-1]

  return sum/len(lists)

def get_last_item_median(df_column):
  lists = []
  for row_index in range(len(df_column.index)):
    lists.append([float(x) for x in str(df_column.iloc[row_index]).split(',')])

  last_item_list = []
  for list in lists:
    last_item_list.append(list[-1])

  return statistics.median(last_item_list)

def get_median_of_row_sums(df_column):
  row_sums = []
  for row_index in range(len(df_column.index)):
    row_sums.append(sum([float(x) for x in str(df_column.iloc[row_index]).split(',')]))
  return statistics.median(row_sums)

def get_length_of_longest_list2(lists):
    list_sizes = sorted([len(list) for list in lists])
    half_length = len(list_sizes) // 2
    return list_sizes[half_length]

def column_wise_mean2(lists):
    longest_list_length = get_length_of_longest_list2(lists)
    mean_list = []
    for column_index in range(0, longest_list_length):
        sum = 0.0
        mean_counter = 0
        for list in lists:
            if len(list) >= column_index + 1:
                sum += list[column_index]
                mean_counter += 1
        if mean_counter > len(lists) / 2:
            mean_list.append(sum/mean_counter)
    return mean_list

def get_mean_list2(df_column):
    lists = []
    for row_index in range(len(df_column.index)):
        lists.append([float(x) for x in str(df_column.iloc[row_index]).split(',')])
    return column_wise_mean2(lists)

In [None]:
def printResults(df):
  construction_accuracy_median = get_last_item_median(df["ConstructionAccuracy"])
  total_params_median = get_last_item_median(df["ConstructionTotalParameters"])
  pruned_params_median = get_last_item_median(df["ConstructionPrunedParameters"])
  total_params_median -= pruned_params_median

  trainable_params_median = get_mean_list_with_zero_fill(df["ConstructionTrainableParameters"])
  pruned_params_median = get_mean_list_with_zero_fill(df["ConstructionPrunedParameters"])
  construction_epochs_median = get_mean_list_with_zero_fill(df["ConstructionStepEpochs"])
  construction_effort = np.sum(np.array(trainable_params_median) * np.array(construction_epochs_median))

  printmd("Construction Accuracy: " + str(construction_accuracy_median))
  if construction_effort > 0:
    printmd("Construction Effort: " + str(math.log(construction_effort)))
  else:
    printmd("Construction Effort: N/A (Invalid value)")
  printmd("Total Parameters Median: " + str(total_params_median))
  printmd("Test Accuracy Median: " + str(df["TestAccuracy"].median()))

In [None]:
def printComparisons(df_prune_false, df_prune_true):
  # NORMAL
  construction_accuracy_median = get_last_item_median(df_prune_false["ConstructionAccuracy"])
  total_params_median = get_last_item_median(df_prune_false["ConstructionTotalParameters"])
  pruned_params_median = get_last_item_median(df_prune_false["ConstructionPrunedParameters"])
  total_params_median -= pruned_params_median

  trainable_params_median = get_mean_list_with_zero_fill(df_prune_false["ConstructionTrainableParameters"])
  pruned_params_median = get_mean_list_with_zero_fill(df_prune_false["ConstructionPrunedParameters"])
  construction_epochs_median = get_mean_list_with_zero_fill(df_prune_false["ConstructionStepEpochs"])
  construction_effort = np.sum(np.array(trainable_params_median) * np.array(construction_epochs_median))

  # PRUNE
  construction_accuracy_prune_median = get_last_item_median(df_prune_true["ConstructionAccuracy"])
  total_params_prune_median = get_last_item_median(df_prune_true["ConstructionTotalParameters"])
  pruned_params_prune_median = get_last_item_median(df_prune_true["ConstructionPrunedParameters"])
  total_params_prune_median -= pruned_params_prune_median

  prune_trainable_params_median = get_mean_list_with_zero_fill(df_prune_true["ConstructionTrainableParameters"])
  pruned_params_prune_median = get_mean_list_with_zero_fill(df_prune_true["ConstructionPrunedParameters"])
  prune_construction_epochs_median = get_mean_list_with_zero_fill(df_prune_true["ConstructionStepEpochs"])
  prune_construction_effort = np.sum(np.array(prune_trainable_params_median) * np.array(prune_construction_epochs_median))

  # DIFF
  test_acc_diff = 100 * (df_prune_true["TestAccuracy"].median() - df_prune_false["TestAccuracy"].median()) / abs(df_prune_false["TestAccuracy"].median())
  total_params_diff = 100 * (total_params_prune_median - total_params_median) / abs(total_params_median)
  construction_effort_diff = 100 * (prune_construction_effort - construction_effort) / abs(construction_effort)

  # PRINT
  printmd("Test Accuracy Change: " + str(test_acc_diff) + "%")
  printmd("Total Parameters Change: " + str(total_params_diff) + "%")
  if construction_effort is not None:
    printmd("Construction Effort Change: " + str(construction_effort_diff) + "%")
  else:
    printmd("Construction Effort: N/A (Invalid value)")


## READ DATA

In [None]:
df_results = pd.read_csv('Logs.csv', sep=';')

algorithm_names = df_results['AlgorithmType'].unique().tolist()
for algorithm_name in algorithm_names:
    print(algorithm_names)
    df_algo = df_results.loc[df_results['AlgorithmType'] == algorithm_name]
    print(df_algo['Hyperparameters'].unique().tolist())

dataset_names = df_results['DatasetType'].unique().tolist()
print(dataset_names)

In [None]:
df_comparisons = pd.read_csv('Comparisons.csv', sep=';')

## RESULT METRICS

In [None]:
for algorithm_name in algorithm_names:
  printmd("# **" + algorithm_name + "**")
  df_algorithm = df_results.loc[df_results['AlgorithmType'] == algorithm_name]
  hyperparam_sets = df_algorithm['Hyperparameters'].unique().tolist()

  for dataset_name in dataset_names:
    printmd("## **" + dataset_name + "**")
    df_dataset = df_algorithm.loc[df_algorithm['DatasetType'] == dataset_name]

    for hyperparam_set in hyperparam_sets:
      df = df_dataset.loc[df_dataset['Hyperparameters'] == hyperparam_set]

      if df.empty:
        continue

      printmd("### " + hyperparam_set)

      df_prune_false = df.loc[df['PruningActive'] == False]
      if not df_prune_false.empty:
        printmd("**Without Pruning:**")
        printResults(df_prune_false)

      df_prune_true = df.loc[df['PruningActive'] == True]
      if not df_prune_true.empty:
        printmd("**With Pruning:**")
        printResults(df_prune_true)

      if not df_prune_false.empty and not df_prune_true.empty:
        printmd("**Pruning Comparisons:**")
        printComparisons(df_prune_false, df_prune_true)

    print("\n")

  print("\n\n")

## RESULT DIAGRAMS

### PERFORMANCE OVER CONSTRUCTION STEPS

In [None]:
import re

for algorithm_name in algorithm_names:
  printmd("# **" + algorithm_name + "**")
  df_algorithm = df_results.loc[df_results['AlgorithmType'] == algorithm_name]
  hyperparam_sets = df_algorithm['Hyperparameters'].unique().tolist()

  for dataset_name in dataset_names:
    printmd("## **" + dataset_name + "**")
    df_dataset = df_algorithm.loc[df_algorithm['DatasetType'] == dataset_name]

    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(1, 1, 1)

    for hyperparam_set in hyperparam_sets:
      df = df_dataset.loc[df_dataset['Hyperparameters'] == hyperparam_set]

      if df.empty:
        continue

      dataset_name_actual = dataset_name
      if dataset_name == "Curves":
        dataset_name_actual = "Moons"
      if dataset_name == "Compound":
        dataset_name_actual = "Classification"

      df_prune = df.loc[df['PruningActive'] == False]
      if not df_prune.empty:
        x = [0] + get_mean_list(df_prune["ConstructionStep"])
        y = [0.0] + get_mean_list(df_prune["ConstructionAccuracy"])
        #ax.plot(x, y, label=re.sub('[^0-9,]', '', hyperparam_set))
        ax.plot(x, y, label=hyperparam_set)
        #ax.set_xscale('log')
        ax.set_title(dataset_name_actual + " - Network Construction", fontsize = 18)
        #ax.set_xlim([0, len(y)])
        ax.set_xlabel("Construction Step")
        ax.set_ylim([0, 1])
        ax.set_ylabel("Accuracy")

    plt.legend(loc='lower right')
    #plt.show()
    plt.savefig("Step_" + dataset_name_actual + "_NoPrune" + '.pdf')


    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(1, 1, 1)

    for hyperparam_set in hyperparam_sets:
      df = df_dataset.loc[df_dataset['Hyperparameters'] == hyperparam_set]

      if df.empty:
        continue

      dataset_name_actual = dataset_name
      if dataset_name == "Curves":
        dataset_name_actual = "Moons"
      if dataset_name == "Compound":
        dataset_name_actual = "Classification"

      df_prune = df.loc[df['PruningActive'] == True]
      if not df_prune.empty:
        x = [0] + get_mean_list(df_prune["ConstructionStep"])
        y = [0.0] + get_mean_list(df_prune["ConstructionAccuracy"])
        #ax.plot(x, y, label=re.sub('[^0-9,]', '', hyperparam_set))
        ax.plot(x, y, label=hyperparam_set)
        #ax.set_xscale('log')
        ax.set_title(dataset_name_actual + " - Network Construction (Pruning)", fontsize = 18)
        #ax.set_xlim([0, len(y)])
        ax.set_xlabel("Construction Step")
        ax.set_ylim([0, 1])
        ax.set_ylabel("Accuracy")

    plt.legend(loc='lower right')
    #plt.show()
    plt.savefig("Step_" + dataset_name_actual + "_Prune" + '.pdf')

    print("\n")

  print("\n\n")

### PERFORMANCE OVER CONSTRUCTION PARAMETERS

In [None]:
for algorithm_name in algorithm_names:
  printmd("# **" + algorithm_name + "**")
  df_algorithm = df_results.loc[df_results['AlgorithmType'] == algorithm_name]
  hyperparam_sets = df_algorithm['Hyperparameters'].unique().tolist()

  for dataset_name in dataset_names:
    printmd("## **" + dataset_name + "**")
    df_dataset = df_algorithm.loc[df_algorithm['DatasetType'] == dataset_name]

    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(1, 1, 1)

    for hyperparam_set in hyperparam_sets:
      df = df_dataset.loc[df_dataset['Hyperparameters'] == hyperparam_set]

      if df.empty:
        continue

      dataset_name_actual = dataset_name
      if dataset_name == "Curves":
        dataset_name_actual = "Moons"
      if dataset_name == "Compound":
        dataset_name_actual = "Classification"

      df_prune = df.loc[df['PruningActive'] == False]
      if not df_prune.empty:
        x = [1] + get_mean_list(df_prune["ConstructionTotalParameters"])
        y = [0.0] + get_mean_list(df_prune["ConstructionAccuracy"])
        ax.plot(x, y)
        #ax.set_xscale('log')
        ax.set_title(dataset_name_actual + " - Network Construction", fontsize = 18)
        ax.set_xlim([0, 200])
        ax.set_xlabel("Network Parameters")
        ax.set_ylim([0, 1])
        ax.set_ylabel("Accuracy")

    plt.legend(loc='lower right')
    #plt.show()
    plt.savefig("Param_" + dataset_name_actual + "_NoPrune" + '.pdf')


    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(1, 1, 1)

    for hyperparam_set in hyperparam_sets:
      df = df_dataset.loc[df_dataset['Hyperparameters'] == hyperparam_set]

      if df.empty:
        continue

      dataset_name_actual = dataset_name
      if dataset_name == "Curves":
        dataset_name_actual = "Moons"
      if dataset_name == "Compound":
        dataset_name_actual = "Classification"

      df_prune = df.loc[df['PruningActive'] == True]
      if not df_prune.empty:
        x = [1] + get_mean_list(df_prune["ConstructionTotalParameters"])
        y = [0.0] + get_mean_list(df_prune["ConstructionAccuracy"])
        ax.plot(x, y, label=hyperparam_set)
        #ax.set_xscale('log')
        ax.set_title(dataset_name_actual + " - Network Construction (Pruning)", fontsize = 18)
        ax.set_xlim([0, 200])
        ax.set_xlabel("Network Parameters")
        ax.set_ylim([0, 1])
        ax.set_ylabel("Accuracy")

    plt.legend(loc='lower right')
    #plt.show()
    plt.savefig("Param_" + dataset_name_actual + "_Prune" + '.pdf')

    print("\n")

  print("\n\n")

# COMPARISONS

In [None]:
algorithm_names = df_comparisons['AlgorithmType'].unique().tolist()
dataset_names = df_comparisons['DatasetType'].unique().tolist()

for dataset_name in dataset_names:
  printmd("## **" + dataset_name + "**")
  df_dataset = df_comparisons.loc[df_comparisons['DatasetType'] == dataset_name]

  fig = plt.figure(figsize=(8, 5))
  ax = fig.add_subplot(1, 1, 1)

  dataset_name_actual = dataset_name
  if dataset_name == "Curves":
    dataset_name_actual = "Moons"
  if dataset_name == "Compound":
    dataset_name_actual = "Classification"

  for algorithm_name in algorithm_names:
    df_algorithm = df_dataset.loc[df_dataset['AlgorithmType'] == algorithm_name]
    df_prune = df_algorithm.loc[df_algorithm['PruningActive'] == False]

    algorithm_name_actual = algorithm_name
    if algorithm_name == "CCG_DLNN":
      algorithm_name_actual = "CCG-DLNN"

    x = [1] + get_mean_list2(df_prune["ConstructionTotalParameters"])
    y = [0.0] + get_mean_list2(df_prune["ConstructionAccuracy"])
    ax.plot(x, y, label=algorithm_name_actual)
    #ax.set_xscale('log')
    ax.set_title(dataset_name_actual + " - Network Construction", fontsize = 18)
    #ax.set_xlim([0, len(y)])
    ax.set_xlabel("Network Parameters")
    ax.set_ylim([0, 1])
    ax.set_ylabel("Accuracy")

  plt.legend(loc='lower right')
  #plt.show()
  plt.savefig("Param_" + dataset_name + "_NoPrune" + '.pdf')

  print("\n\n")

In [None]:
algorithm_names = df_comparisons['AlgorithmType'].unique().tolist()
dataset_names = df_comparisons['DatasetType'].unique().tolist()

for dataset_name in dataset_names:
  printmd("## **" + dataset_name + "**")
  df_dataset = df_comparisons.loc[df_comparisons['DatasetType'] == dataset_name]

  fig = plt.figure(figsize=(8, 5))
  ax = fig.add_subplot(1, 1, 1)

  dataset_name_actual = dataset_name
  if dataset_name == "Curves":
    dataset_name_actual = "Moons"
  if dataset_name == "Compound":
    dataset_name_actual = "Classification"

  for algorithm_name in algorithm_names:
    df_algorithm = df_dataset.loc[df_dataset['AlgorithmType'] == algorithm_name]
    df_prune = df_algorithm.loc[df_algorithm['PruningActive'] == False]

    x = [0] + get_mean_list2(df_prune["ConstructionStep"])
    y = [0.0] + get_mean_list2(df_prune["ConstructionAccuracy"])
    #ax.plot(x, y, label=re.sub('[^0-9,]', '', hyperparam_set))
    ax.plot(x, y, label=algorithm_name)
    #ax.set_xscale('log')
    ax.set_title(dataset_name_actual + " - Network Construction", fontsize = 18)
    #ax.set_xlim([0, len(y)])
    ax.set_xlabel("Construction Step")
    ax.set_ylim([0, 1])
    ax.set_ylabel("Accuracy")

  plt.legend(loc='lower right')
  #plt.show()
  plt.savefig("Param_" + dataset_name + "_NoPrune" + '.pdf')

  print("\n\n")