# Benchmark configuration
This notebook creates the list of parameters with which verticox+ is benchmarked.

1. Number of parties vs number of iterations
2. Number of records
3. Number of features

Number 1 and 2 are benchmarked to compare the results to the original verticox paper. Number 3 is included because the both the original algorithm and the n-party protocol affect the scalability of the algorithm with respect to the number of features.

## Parameters

We are going to vary the following parameters:
- number of parties
- number of iterations
- number of records
- number of features

In [1]:
from collections import namedtuple

#defaults
DEFAULT_N_PARTIES = 3
DEFAULT_N_ITERATIONS = 1000
DEFAULT_N_RECORDS = 100
DEFAULT_N_FEATURES = 6



Parameters = namedtuple("parameters", ["parties", "iterations", "records", "features", "category"],
                       defaults=[DEFAULT_N_PARTIES, DEFAULT_N_ITERATIONS, DEFAULT_N_RECORDS,
                        DEFAULT_N_FEATURES, None])

## 1. Number of parties vs number of iterations

In [2]:
num_parties = [2,3,4,5]
num_iterations = [100, 500, 1000, 15000, 2000]

variations1 = []


for p in num_parties:
    for i in num_iterations:
        variations1.append(Parameters(parties=p, iterations=i, category="parties_vs_iterations"))
        
variations1

[parameters(parties=2, iterations=100, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=500, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=1000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=15000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=2000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=100, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=500, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=1000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=15000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=2000, records=100, features=6, category='parties_vs_iterations'),
 p

## 2. Number of records

In [3]:
n_records = [50, 100, 500, 1000, 2000, 3000, 4000, 5000]

variations2 = []

for r in n_records:
    variations2.append(Parameters(records=r, category="records"))
    
variations2

[parameters(parties=3, iterations=1000, records=50, features=6, category='records'),
 parameters(parties=3, iterations=1000, records=100, features=6, category='records'),
 parameters(parties=3, iterations=1000, records=500, features=6, category='records'),
 parameters(parties=3, iterations=1000, records=1000, features=6, category='records'),
 parameters(parties=3, iterations=1000, records=2000, features=6, category='records'),
 parameters(parties=3, iterations=1000, records=3000, features=6, category='records'),
 parameters(parties=3, iterations=1000, records=4000, features=6, category='records'),
 parameters(parties=3, iterations=1000, records=5000, features=6, category='records')]

## 3. Number of features

In [4]:
n_features=[4, 8, 12, 16, 20]
variations3 = []

for f in n_features:
    variations3.append(Parameters(features=f, category="features"))
    
variations3

[parameters(parties=3, iterations=1000, records=100, features=4, category='features'),
 parameters(parties=3, iterations=1000, records=100, features=8, category='features'),
 parameters(parties=3, iterations=1000, records=100, features=12, category='features'),
 parameters(parties=3, iterations=1000, records=100, features=16, category='features'),
 parameters(parties=3, iterations=1000, records=100, features=20, category='features')]

In [5]:
variations = variations1 + variations2 + variations3

variations

[parameters(parties=2, iterations=100, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=500, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=1000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=15000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=2, iterations=2000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=100, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=500, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=1000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=15000, records=100, features=6, category='parties_vs_iterations'),
 parameters(parties=3, iterations=2000, records=100, features=6, category='parties_vs_iterations'),
 p

In [6]:
from csv import DictWriter

In [7]:
path = "benchmark/benchmark_parameters.csv"

In [8]:
variations_dict = (v._asdict() for v in variations)

with open(path, "w") as f:
    writer = DictWriter(f, Parameters._fields)
    
    writer.writeheader()
    
    writer.writerows(variations_dict)