## Import data

In [16]:
%matplotlib inline
%reload_ext autoreload
from rtxlib.rtx_run import setup_database, db
from analysis_lib.one_sample_tests import KolmogorovSmirnov
from factorial_anova_analysis import get_raw_data

index = "rtxgaussian-600-300-baseline-5iter" 

results = get_raw_data(index, False, 5000)

data retrieved from file rtxgaussian-600-300-baseline-5iter-5000.pickle


In [18]:
import pprint
pp = pprint.PrettyPrinter(indent=4)

knobs = {}
data = {}

gaussian_best_conf = {
    're_route_every_ticks': 63, 
    'freshness_cut_off_value': 359, 
    'max_speed_and_length_factor': 2.3643285696975598, 
    'exploration_percentage': 0.19508109229406423, 
    'freshness_update_factor': 15, 
    'route_random_sigma': 0.11615606716272785, 
    'average_edge_duration_factor': 1.2081011415096372
}

for r in results:
    knob_str = str(r[1])
    if r[1] == gaussian_best_conf:
        if not knob_str in knobs:        
            knobs[knob_str] = []
        if len(knobs[knob_str]) < 5000:
            knobs[knob_str].append(r[1])
        if not knob_str in data:        
            data[knob_str] = []
        if len(data[knob_str]) < 5000:
            data[knob_str].append(r[0])

printmd("**Found " + str(len(knobs)) + " different configurations in " + str(len(results)) + " data **", "green")

for k in knobs:
    print str(len(knobs[k])) + " of these:"
    pp.pprint(eval(k))
    print "\n"

<span style='color:green'>**Found 1 different configurations in 25000 data **</span>

5000 of these:
{   u'average_edge_duration_factor': 1.2081011415096372,
    u'exploration_percentage': 0.19508109229406423,
    u'freshness_cut_off_value': 359,
    u'freshness_update_factor': 15,
    u'max_speed_and_length_factor': 2.36432856969756,
    u're_route_every_ticks': 63,
    u'route_random_sigma': 0.11615606716272785}




## Ttests with variable sample sizes

In [19]:
from analysis_lib.two_sample_tests import Ttest
from analysis_lib.two_sample_tests import TtestPower
from complaints_generator import generate_complaints
from scipy.stats import binom_test
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc("savefig", dpi=150)
import random 
from IPython.display import Markdown, display

def printmd(string, color=None):
    colorstr = "<span style='color:{}'>{}</span>".format(color, string)
    display(Markdown(colorstr))
    
color = {
    "PURPLE": '\033[95m',
    "CYAN": '\033[96m',
    "DARKCYAN": '\033[36m',
    "BLUE": '\033[94m',
    "GREEN": '\033[92m',
    "YELLOW": '\033[93m',
    "RED": '\033[91m',
    "BOLD": '\033[1m',
    "UNDERLINE": '\033[4m',
    "END": '\033[0m',
}
    
random.seed(123456)

y_key = 'overhead'
alpha = 0.05
necessary_power = 0.80

fake_run_id = "123456"

knob_values = sorted(knobs.keys())
best_knob = knob_values[0]
other_knob_values = knob_values[1:] 

sample_sizes = []
pvalues = {}
powers = {}

for i in knob_values:
    pvalues[i] = []
    powers[i] = []
        
step = 100

count = 0
for other_knob_value in other_knob_values:   
    for sample_size in range(100, 5001, step):
    #     print "Sample size: " + str(sample_size)
        sample_sizes.append(sample_size)

        analysis_data = {}
        raw_data = data[best_knob] 
#         analysis_data[0] = raw_data[:sample_size] #  also: 
        analysis_data[0] = random.sample(raw_data, sample_size)
  
        raw_data = data[other_knob_value] 
#         analysis_data[1] = raw_data[:sample_size] #  also: 
        analysis_data[1] = random.sample(raw_data, sample_size)
          
        t = Ttest(fake_run_id, y_key, alpha).start(analysis_data, {})
        pvalues[other_knob_value].append(t["pvalue"])
        one_sided_p_value = t["pvalue"]/2
        
        if t["statistic"] < 0:
            t_p = TtestPower(fake_run_id, y_key, t["effect_size"], alpha, 'smaller').start(analysis_data, {})            
        else: 
            t_p = TtestPower(fake_run_id, y_key, t["effect_size"], alpha, 'larger').start(analysis_data, {})            
        powers[other_knob_value].append(t_p["power"])
        
        if one_sided_p_value < alpha and t_p["power"] >= necessary_power:
            printmd("**SIGNIFICANT DIFFERENCE**", "green")
            different = "smaller" if t["statistic"] < 0 else "larger"
            print "sample size: " + str(sample_size) 
            print "================"
            print "Overhead in " 
            pp.pprint(eval(best_knob))
            print " is " + different + " than in "
            pp.pprint(eval(other_knob_value))
            print "================"
            print "mean difference: " + str(t["mean_diff"]) 
            print "p value: " + str(one_sided_p_value)                                 
            print "effect size: " + str(t["effect_size"]) 
            print "power: " + str(t_p["power"])
            print "\n"
            
            count += 1
            if t["statistic"] >= 0:
                best_knob = other_knob_value
            break

        if sample_size == 5000:
            printmd("**NO SIGNIFICANT DIFFERENCE**", "red")
            different = "smaller" if t["statistic"] < 0 else "larger"
            print "sample size: " + str(sample_size) 
            print "================"
            print "Overhead in " 
            pp.pprint(eval(best_knob))
            print " is " + different + " than in "
            pp.pprint(eval(other_knob_value))
            print "================"
            print "mean difference: " + str(t["mean_diff"]) 
            print "p value: " + str(one_sided_p_value)                                 
            print "effect size: " + str(t["effect_size"]) 
            print "power: " + str(t_p["power"])
            print "\n"
        
print str(count) + " out of " + str(len(other_knob_values)) + " comparisons found significant diffference."
print "******************"
print "BEST CONFIGURATION"
print "******************"
pp.pprint(knobs[best_knob][0])


0 out of 0 comparisons found significant diffference.
******************
BEST CONFIGURATION
******************
{   u'average_edge_duration_factor': 1.2081011415096372,
    u'exploration_percentage': 0.19508109229406423,
    u'freshness_cut_off_value': 359,
    u'freshness_update_factor': 15,
    u'max_speed_and_length_factor': 2.36432856969756,
    u're_route_every_ticks': 63,
    u'route_random_sigma': 0.11615606716272785}


In [20]:
results = get_raw_data("rtxgaussian-600-300-3-5iter", True, 5000)

b_knobs = {}
b_data = {}
for r in results:
    knob_str = str(r[1])
    if not knob_str in b_knobs:        
        b_knobs[knob_str] = []
    if len(b_knobs[knob_str]) < 5000:
        b_knobs[knob_str].append(r[1])
    if not knob_str in b_data:        
        b_data[knob_str] = []
    if len(b_data[knob_str]) < 5000:
        b_data[knob_str].append(r[0])

print "len(knobs):" + str(len(b_knobs))

for k in b_knobs:
    print k
    print len(b_knobs[k])

print "========="

[36m> OEDA configuration: Using elasticsearch database.[39m
data saved to file rtxgaussian-600-300-3-5iter-5000.pickle
len(knobs):2
{u'freshness_update_factor': 12}
5000
{u'freshness_update_factor': 20}
5000


In [21]:
b_knobs[best_knob] = knobs[best_knob]
b_data[best_knob] = data[best_knob]

print "len(knobs):" + str(len(b_knobs))

for k in b_knobs:
    print k
    print len(b_knobs[k])

print "========="

len(knobs):3
{u'freshness_update_factor': 12}
5000
{u'freshness_update_factor': 20}
5000
{u're_route_every_ticks': 63, u'freshness_cut_off_value': 359, u'max_speed_and_length_factor': 2.36432856969756, u'exploration_percentage': 0.19508109229406423, u'freshness_update_factor': 15, u'route_random_sigma': 0.11615606716272785, u'average_edge_duration_factor': 1.2081011415096372}
5000


In [22]:
y_key = 'overhead'
alpha = 0.05
necessary_power = 0.80

fake_run_id = "123456"

knob_values = sorted(b_knobs.keys())
best_knob = knob_values[0]
other_knob_values = knob_values[1:] 

sample_sizes = []
pvalues = {}
powers = {}

for i in knob_values:
    pvalues[i] = []
    powers[i] = []
        
step = 100

count = 0
for other_knob_value in other_knob_values:   
    for sample_size in range(100, 5001, step):
    #     print "Sample size: " + str(sample_size)
        sample_sizes.append(sample_size)

        analysis_data = {}
        raw_data = b_data[best_knob] 
#         analysis_data[0] = raw_data[:sample_size] #  also: 
        analysis_data[0] = random.sample(raw_data, sample_size)
  
        raw_data = b_data[other_knob_value] 
#         analysis_data[1] = raw_data[:sample_size] #  also: 
        analysis_data[1] = random.sample(raw_data, sample_size)
          
        t = Ttest(fake_run_id, y_key, alpha).start(analysis_data, {})
        pvalues[other_knob_value].append(t["pvalue"])
        one_sided_p_value = t["pvalue"]/2
        
        if t["statistic"] < 0:
            t_p = TtestPower(fake_run_id, y_key, t["effect_size"], alpha, 'smaller').start(analysis_data, {})            
        else: 
            t_p = TtestPower(fake_run_id, y_key, t["effect_size"], alpha, 'larger').start(analysis_data, {})            
        powers[other_knob_value].append(t_p["power"])
        
        if one_sided_p_value < alpha and t_p["power"] >= necessary_power:
            printmd("**SIGNIFICANT DIFFERENCE**", "green")
            different = "smaller" if t["statistic"] < 0 else "larger"
            print "sample size: " + str(sample_size) 
            print "================"
            print "Overhead in " 
            pp.pprint(eval(best_knob))
            print " is " + different + " than in "
            pp.pprint(eval(other_knob_value))
            print "================"
            print "mean difference: " + str(t["mean_diff"]) 
            print "p value: " + str(one_sided_p_value)                                 
            print "effect size: " + str(t["effect_size"]) 
            print "power: " + str(t_p["power"])
            print "\n"
            
            count += 1
            if t["statistic"] >= 0:
                best_knob = other_knob_value
            break
        
        if sample_size == 5000:
            printmd("**NO SIGNIFICANT DIFFERENCE**", "red")
            different = "smaller" if t["statistic"] < 0 else "larger"
            print "sample size: " + str(sample_size) 
            print "================"
            print "Overhead in " 
            pp.pprint(eval(best_knob))
            print " is " + different + " than in "
            pp.pprint(eval(other_knob_value))
            print "================"
            print "mean difference: " + str(t["mean_diff"]) 
            print "p value: " + str(one_sided_p_value)                                 
            print "effect size: " + str(t["effect_size"]) 
            print "power: " + str(t_p["power"])
            print "\n"

print str(count) + " out of " + str(len(other_knob_values)) + " comparisons found significant difference."
print "******************"
print "BEST CONFIGURATION"
print "******************"
pp.pprint(b_knobs[best_knob][0])

<span style='color:red'>**NO SIGNIFICANT DIFFERENCE**</span>

sample size: 5000
Overhead in 
{   u'freshness_update_factor': 12}
 is smaller than in 
{   u'freshness_update_factor': 20}
mean difference: -0.00837385664225
p value: 0.28312265119
effect size: -0.0114733295313
power: 0.142033920934




<span style='color:green'>**SIGNIFICANT DIFFERENCE**</span>

sample size: 1400
Overhead in 
{   u'freshness_update_factor': 12}
 is smaller than in 
{   u'average_edge_duration_factor': 1.2081011415096372,
    u'exploration_percentage': 0.19508109229406423,
    u'freshness_cut_off_value': 359,
    u'freshness_update_factor': 15,
    u'max_speed_and_length_factor': 2.36432856969756,
    u're_route_every_ticks': 63,
    u'route_random_sigma': 0.11615606716272785}
mean difference: -0.0705744445022
p value: 0.00477844591512
effect size: -0.0980545017249
power: 0.828638167959


1 out of 2 comparisons found significant difference.
******************
BEST CONFIGURATION
******************
{   u'freshness_update_factor': 12}
