In [1]:
import os 
os.chdir('/home/oleksandr/UT/Thesis/Implementations/')

In [2]:
from sklearn.model_selection import train_test_split
import faiss
import numpy as np
from scipy.special import softmax
from scipy import stats as st
from tqdm import tqdm
import pandas as pd

In [9]:
from src.gendered_selection.age_diff_estimator import Simulation
from src.gendered_selection.faster_fuzzy_logic.generalized_partition_inferrer import GeneralizedInferrer
from src.gendered_selection.conf.gendered_selection_config import Config
from src.common.utils import (mutate_single_point, crossover, mutation)
from src.common.fitness import griewank
from src.common.scales import scales
from src.common.approximation_helpers import generate_search_space


In [6]:
resulting_intervals = {}

for n_terms in [3, 5, 7]:
    sim = Simulation(conf=Config(), fitness_fn=griewank, mutation=mutation, crossover=crossover)
    history = sim.run(n_partitions=n_terms)

    # Getting differences between closest-age females
    diffs = np.hstack([np.diff(np.sort(np.array(i))) for i in history]).ravel()
    conf_interval = st.t.interval(alpha=0.95, df=len(diffs)-1, loc=np.mean(diffs), scale=st.sem(diffs))

    resulting_intervals[n_terms] = conf_interval
resulting_intervals


  5%|▌         | 1/20 [00:00<00:02,  6.45it/s]

[-0.0653952  -0.16006199 -0.16041788 -0.17327834 -0.17606861 -0.18080592
 -0.18246453 -0.21136536 -0.22002375 -0.24133642]


100%|██████████| 20/20 [00:03<00:00,  5.01it/s]
  5%|▌         | 1/20 [00:00<00:03,  6.31it/s]

[-0.1600197  -0.2100762  -0.22492183 -0.27886807 -0.28678765 -0.29921657
 -0.30629065 -0.30843108 -0.31442553 -0.319319  ]


100%|██████████| 20/20 [00:03<00:00,  5.05it/s]
  5%|▌         | 1/20 [00:00<00:03,  5.28it/s]

[-0.13968207 -0.20768556 -0.22476372 -0.23327528 -0.2367383  -0.25329415
 -0.2542283  -0.25863205 -0.27369513 -0.27822504]


100%|██████████| 20/20 [00:04<00:00,  4.19it/s]


{3: (0.006257315780936058, 0.007359649944097279),
 5: (0.006972866431465529, 0.007862308281009919),
 7: (0.0064121030740478995, 0.007362704658306925)}

In [10]:
resulting_intervals_estimation_error = {}
for n_terms in [3, 5, 7]:
    inferrer = GeneralizedInferrer(n_partitions=n_terms)
    X = generate_search_space(n_splits=100, ranges=[(0, 1), (0, 10)])
    y =   np.array([inferrer.infer_partner_age(*X[i, :]) for i in tqdm(range(X.shape[0]))])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.1, random_state=42)
    param_index = faiss.IndexFlatL2(X_train.shape[1])
    param_index.add(X_train)

    D, I = param_index.search(X_test, k=10)
    y_pred = (y_train[I] * softmax(D, axis=1)).sum(axis=1)
    errors = np.abs(y_pred.ravel() - y_test.ravel())
    error_confint = st.t.interval(alpha=0.95, df=len(errors)-1, loc=np.mean(errors), scale=st.sem(errors)) 
    resulting_intervals_estimation_error[n_terms] = error_confint

100%|██████████| 10000/10000 [00:43<00:00, 228.62it/s]
100%|██████████| 10000/10000 [00:48<00:00, 208.12it/s]
100%|██████████| 10000/10000 [00:50<00:00, 197.22it/s]


In [11]:
df = []

for n_terms in [3, 5, 7]:
    record = {
        'n_terms': n_terms, 
        'age_difference_95%_confint': resulting_intervals[n_terms], 
        'age_estimation_error_95%_confint': resulting_intervals_estimation_error[n_terms]
    }
    df.append(record)

df = pd.DataFrame(df)
df

Unnamed: 0,n_terms,age_difference_95%_confint,age_estimation_error_95%_confint
0,3,"(0.006257315780936058, 0.007359649944097279)","(0.0024410575272460072, 0.002988461517415478)"
1,5,"(0.006972866431465529, 0.007862308281009919)","(0.0029407168015879344, 0.0035231845494672308)"
2,7,"(0.0064121030740478995, 0.007362704658306925)","(0.0031198455079784236, 0.0037750803351908457)"


In [12]:
df.to_csv('results/gfga_proof_confints.csv')