In [1]:
import os
os.chdir('..')

In [28]:
import numpy as np
from scipy.spatial import distance_matrix
import scipy.stats as st


from src.common.scales import scales
from src.common.utils import generate_population
from src.common.fitness import (griewank, schwefel, ackley, rastrigin)
from src.elegant_fuzzy_genetic_algorithms.helpers.all_params_wrapper import AllEFGAParamsParallelWrapper
from src.elegant_fuzzy_genetic_algorithms.priority_diff_simulation import simulation_priorities

np.random.seed(1)

Given that 90+% of time spent running fuzzy genetic algorithms is spent on using fuzzy logic, the idea is to make an approximation using nearest neighbors. 

## Priority estimation (EFGA)

### Getting the error confidence interval

In [29]:
priority_inferencer = AllEFGAParamsParallelWrapper(n_terms_params=3, n_terms_priority=7)

In [30]:
c1_range = np.linspace(start=0, stop=1, num=60)
c2_range = np.linspace(start=0, stop=1, num=60)

In [31]:
params_combinations = np.array(np.meshgrid(c1_range, c2_range)).T.reshape(-1, 2)
priorities = priority_inferencer.infer_priority(c1=params_combinations[:, 0], c2=params_combinations[:, 1])

In [32]:
entries = np.random.uniform(0, 1, size=(200, 2))

In [33]:
priorities_est = priorities[np.argmin(distance_matrix(entries, params_combinations), axis=1)]
priorities_actual =  priority_inferencer.infer_priority(c1=entries[:, 0], c2=entries[:, 1])

In [34]:
diff = np.abs(priorities_est - priorities_actual)

In [35]:
conf_int = st.t.interval(alpha=0.95, df=len(diff)-1, loc=np.mean(diff), scale=st.sem(diff)) 

We know the 95% confidence interval of the priority estimation error. If we prove that it's smaller than 95% of differences between the current priority and next best, then usage of this estimation is appropriate. 

### Estimating priority difference

In [49]:
priorities = []

for fn in [griewank, schwefel, rastrigin, ackley]:
    fn_name = fn.__name__
    priorities_ = simulation_priorities(N=100, epochs=200, fitness_fn=fn, population_scale=scales[fn_name][0], 
                      mutation_scale=scales[fn_name][1], seed=1)
    priorities.append(priorities_)

100%|██████████| 200/200 [00:19<00:00, 10.38it/s]
100%|██████████| 200/200 [00:19<00:00, 10.13it/s]
100%|██████████| 200/200 [00:19<00:00, 10.04it/s]
100%|██████████| 200/200 [00:20<00:00,  9.64it/s]


In [50]:
priorities_mtr = np.array(priorities)
priorities_mtr = np.sort(priorities_mtr, axis=1)
priority_difference_mtr = np.diff(priorities_mtr, axis=1)
priority_difference =  priority_difference_mtr.ravel()

In [51]:
conf_int_actual_diff = st.t.interval(alpha=0.95, df=len(priority_difference)-1, loc=np.mean(priority_difference), scale=st.sem(priority_difference)) 

In [52]:
conf_int_actual_diff, conf_int

((0.026625617917013247, 0.027913588650583764),
 (0.005057507916810315, 0.00619624680288484))

In [53]:
first_second_diff = np.abs(np.diff(priority_difference_mtr[:, :2], axis=1).ravel())
conf_int_first_second = st.t.interval(alpha=0.95, df=len(first_second_diff)-1, loc=np.mean(first_second_diff), scale=st.sem(first_second_diff)) 

In [54]:
conf_int_first_second

(0.05903070832699521, 0.09724286275938558)

### Conclusion

Provided that the actual difference between first and second entry, as well as difference is much bigger than actual errror of approximation (even in the worst case the error is 10 times smaller), we can successfully use approximation techniques. 

## Partner age estimation (Gendered selection)