## Import data

In [None]:
%matplotlib inline
%reload_ext autoreload
from rtxlib.rtx_run import setup_database, db
from analysis_lib.one_sample_tests import KolmogorovSmirnov
from factorial_anova_analysis import get_data_and_knobs

strategy_knobs = {
    "route_random_sigma": ([0.0, 0.3], 0.3),
    "exploration_percentage": ([0.0, 0.3], 0.3),
    "max_speed_and_length_factor": ([1.0, 2.5], 1.5),
    "average_edge_duration_factor": ([1.0, 2.5], 1.5),
    "freshness_update_factor": ([5, 20], 15),
    "freshness_cut_off_value": ([100, 700], 600),
    "re_route_every_ticks": ([10, 70], 60),
}

index = "rtxfactorial1" # cars: 750, smart cars: 750, no pendulum (= rtx50)

data, knobs, exp_count = get_data_and_knobs("rtxfactorial1", strategy_knobs, False, 10000)

print "exp_count: " + str(exp_count)

data retrieved from file rtxfactorial1.pickle
~~~~~~~~~
rtxfactorial1
~~~~~~~~~


## Factorial ANOVA with variable sample sizes

In [None]:
from analysis_lib.factorial_tests import FactorialAnova

alpha = 0.01
fake_id = "123456"
y_key = 'overhead'

sample_sizes = []
probabilities = {}

def all_knobs_considered(exp):
    keys = strategy_knobs.keys() 
    for e in exp.index: 
        for k in keys: 
            if e.find(k) != -1:
                keys.remove(k)
    return len(keys) == 0

for sample_size in range(10, 100, 10):
    print "--------"
    print "Sample size: " + str(sample_size)
    sample_sizes.append(sample_size)
    
    inds = {}
    for k in data.keys():
        inds[k] = random.sample(range(10000), sample_size)

    print inds
    
    analysis_data = {}
#     for k, v in all_data.iteritems(): 
#         analysis_data[k] = v[:sample_size]
    for k in data.keys(): 
        analysis_data[k] = [data[i] for i in inds[k]]
        
    analysis_knobs = {}
#     for k, v in all_knobs.iteritems(): 
#         analysis_knobs[k] = v[:sample_size]
    for k in knobs.keys(): 
        analysis_knobs[k] = [knobs[i] for i in inds[k]]

    aov_table = FactorialAnova(fake_id, y_key, strategy_knobs.keys(), exp_count).start(analysis_data, analysis_knobs)

    aov_table = aov_table.sort_values(by='PR(>F)', ascending=True)
    probabilities = aov_table["PR(>F)"]
    significant_probs =  probabilities[probabilities < alpha]

    print "Evaluated " + str(len(probabilities)) + " out of which " + str(len(significant_probs)) + " were found significant."
    
    if all_knobs_considered(significant_probs):
        print "============="
        print "...stopping here since all knobs are considered. Significant probabilities:"
        print significant_probs        
        print "============="
        break
    