In [2]:
import pandas as pd
import numpy as np
from collections import Counter
import time
from PMTK.sampling.preferences_sampler import *
from PMTK.pref.preferences import *
from PMTK.utility.utility_solver import *
from PMTK.sampling.subset_samplers import *
from PMTK.utility.subset_finder import *
from PMTK.sampling.gibbs import *
from PMTK.data.film_dataset import *
from PMTK.sampling.decider import *
from PMTK.utility import *

In [3]:
def build_approx_theta(prf, init_theta = None):
    connivents = []
    if not init_theta:
        init_theta = [EMPTY_SET]
    theta = init_theta
    min_k = 1
    c  = get_connivent(theta, prf)
    while c:
        if not c in connivents:
            connivents.append(c)
        cit = get_candidate_iterator(c)
        skey = sorted(cit.keys())[0]
        b = False
        for k in cit:
            if b:
                break
            for i in cit[k]:
                for t in i:
                    b = False or check_connivence_resolution(c, t)
                    if not t in theta and check_connivence_resolution(c, t):
                        theta.append(t)
        c  = get_connivent(theta, prf)
    a = additivity(theta)
    for c_i in connivents:
        cit = get_candidate_iterator(c_i)
        for k in cit:
            if k > a:
                break
            for i in cit[k]:
                for t in i:
                    if not t in theta and check_connivence_resolution(c_i,t):
                        theta.append(t)
    
    return theta

In [9]:
def predict_from_mult_thetas(thetas,p, subsets):
    mdls = [utility_polyhedron(p.items, t, p) for t in thetas]
    prf = Preferences(p.items)
    for i_1 in range(len(subsets)):
        s_1 = subsets[i_1]
        for i_2 in range(i_1 + 1, len(subsets)):
            s_2 = subsets[i_2]
            r = [ordinal_dominance(s_1, s_2, mdl) for mdl in mdls]
            if all(i == "SUP" for i in r):
                prf.add_preference(s_1, s_2)
            elif all(i == "INF" for i in r):
                prf.add_preference(s_2, s_1)
            elif all(i == "EQ" for i in r):
                prf.add_indifference(s_1,s_2)
    return prf

def predict_from_theta(theta, p, subsets):
    mdl = utility_polyhedron(p.items, theta, p)
    prf = ordinal_peferences(p.items, subsets, mdl)
    return prf

In [10]:
def ACR(prf, decider):
    C = 0
    W = 0
    T = len(prf.preferred)
    for x,y in prf.preferred:
        if decider(x) > decider(y):
            C += 1
        elif decider(x) < decider(y):
            W += 1
    return C,W,T

def intersect_per_union(prf_1, prf_2):
    intersection = prf_1.intersection(prf_2)
    un = prf_1 + prf_2
    return len(intersection) / len(un)

## Testing on synthethic data

In [None]:
n_items = 6
items = np.arange(n_items)
ground_truth = Tierlist_Decider(items, p=0.3, alpha = 0.3)
decider = Objective_Function(items, ground_truth)

budget = 24
n_subsets = 10

data = {
    "budget":[],
    "theta":[],
    "n_theta_min":[],
    "n_preferences":[],
    "additivity":[],
    "size":[],
    "sizes_sum":[],
    "C":[],
    "W":[],
    "T":[],
}

contradictions_data = {
    "theta_1":[],
    "theta_2":[],
    "union_per_intersection":[]
}

for repetition in range(5):

    for b in range(budget):
        s = sample_subset(items)
        while s in decider.saved:
            s = sample_subset(items)
            
        decider(s)
        prf = decider.relation()
        if len(prf) == 0:
            continue
        
        for s_rep in range(5):
            print(f"repetition = {repetition}, b = {b}, srep = {s_rep}")

            test_subsets = sample_subsets(items, n_subsets=n_subsets)
            
            t_heuristic = build_approx_theta(prf, [EMPTY_SET])
            t_mins = get_kernels_opt(prf,t_heuristic)
            t_random = random.choice(t_mins)
            t_union = union(t_mins)
            
            #print(f"t_union = {t_union}, t_heuristic = {t_heuristic}, t_mins = {t_mins}, t_random = {t_random}")
            
            prf_t_heuristic = predict_from_theta(t_heuristic, prf, test_subsets)
            prf_t_random = predict_from_theta(t_random, prf, test_subsets)
            prf_t_union = predict_from_theta(t_union, prf, test_subsets)
            
            prf_t_mins = predict_from_mult_thetas(t_mins, prf, test_subsets)
            
            C_t_heuristic, W_t_heuritistic, T_t_heuristic = ACR(prf_t_heuristic, ground_truth)
            C_t_random, W_t_random, T_t_random = ACR(prf_t_random, ground_truth)
            C_t_union, W_t_union, T_t_union = ACR(prf_t_union, ground_truth)
            C_t_mins, W_t_mins, T_t_mins = ACR(prf_t_mins, ground_truth)
            
            data["budget"].append(b)
            data["theta"].append("heuristic")
            data["n_theta_min"].append(len(t_mins))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_heuristic))
            data["size"].append(len(t_heuristic))
            data["sizes_sum"].append(sum(len(i) for i in t_heuristic))
            data["C"].append(C_t_heuristic)
            data["W"].append(W_t_heuritistic)
            data["T"].append(T_t_heuristic)
            
            data["budget"].append(b)
            data["theta"].append("random")
            data["n_theta_min"].append(len(t_mins))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_random))
            data["size"].append(len(t_random))
            data["sizes_sum"].append(sum(len(i) for i in t_random))
            data["C"].append(C_t_random)
            data["W"].append(W_t_random)
            data["T"].append(T_t_random)
            
            data["budget"].append(b)
            data["theta"].append("union")
            data["n_theta_min"].append(len(t_mins))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_union))
            data["size"].append(len(t_union))
            data["sizes_sum"].append(sum(len(i) for i in t_union))
            data["C"].append(C_t_union)
            data["W"].append(W_t_union)
            data["T"].append(T_t_union)
            
            data["budget"].append(b)
            data["theta"].append("all_thetas")
            data["n_theta_min"].append(len(t_mins))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_mins[0]))
            data["size"].append(len(t_mins[0]))
            data["sizes_sum"].append(sum(len(i) for i in t_mins[0]))
            data["C"].append(C_t_mins)
            data["W"].append(W_t_mins)
            data["T"].append(T_t_mins)
            
            df = pd.DataFrame(data)
            df.to_csv("theta_comparisons_4.csv")
            #print(df)
        
        
        
    

repetition = 0, b = 1, srep = 0
repetition = 0, b = 1, srep = 1
repetition = 0, b = 1, srep = 2
repetition = 0, b = 1, srep = 3
repetition = 0, b = 1, srep = 4
repetition = 0, b = 2, srep = 0
repetition = 0, b = 2, srep = 1
repetition = 0, b = 2, srep = 2
repetition = 0, b = 2, srep = 3
repetition = 0, b = 2, srep = 4
repetition = 0, b = 3, srep = 0
repetition = 0, b = 3, srep = 1
repetition = 0, b = 3, srep = 2
repetition = 0, b = 3, srep = 3
repetition = 0, b = 3, srep = 4
repetition = 0, b = 4, srep = 0
repetition = 0, b = 4, srep = 1
repetition = 0, b = 4, srep = 2
repetition = 0, b = 4, srep = 3
repetition = 0, b = 4, srep = 4
repetition = 0, b = 5, srep = 0
repetition = 0, b = 5, srep = 1
repetition = 0, b = 5, srep = 2
repetition = 0, b = 5, srep = 3
repetition = 0, b = 5, srep = 4
repetition = 0, b = 6, srep = 0
repetition = 0, b = 6, srep = 1
repetition = 0, b = 6, srep = 2
repetition = 0, b = 6, srep = 3
repetition = 0, b = 6, srep = 4
repetition = 0, b = 7, srep = 0
repetiti