In [1]:
import pandas as pd
import numpy as np
from collections import Counter
import time
from PMTK.sampling.preferences_sampler import *
from PMTK.pref.preferences import *
from PMTK.utility.utility_solver import *
from PMTK.sampling.subset_samplers import *
from PMTK.utility.subset_finder import *
from PMTK.sampling.gibbs import *
from PMTK.data.film_dataset import *
from PMTK.sampling.decider import *
from PMTK.utility import *
from itertools import combinations
from sklearn import svm

def build_approx_theta(prf, init_theta = None):
    connivents = []
    if not init_theta:
        init_theta = [EMPTY_SET]
    theta = init_theta
    min_k = 1
    c  = get_connivent(theta, prf)
    cpt = 0
    while c:
        if not c in connivents:
            connivents.append(c)
        cit = get_candidate_iterator(c)
        skey = sorted(cit.keys())[0]
        b = False
        for k in cit:
            if b:
                break
            for i in cit[k]:
                s = set(i)
                for t in s:
                    b = b or (check_connivence_resolution(c, t) and not t in theta)
                    if not t in theta and check_connivence_resolution(c, t):
                        theta.append(t)
        c  = get_connivent(theta, prf)
        cpt = cpt + 1
        #print("solved connivent: ", cpt, " with", theta)
    a = additivity(theta)
    for c_i in connivents:
        cit = get_candidate_iterator(c_i)
        for k in cit:
            if k > a:
                break
            for i in cit[k]:
                for t in i:
                    if not t in theta and check_connivence_resolution(c_i,t):
                        theta.append(t)
    
    return theta

n_items = 6
items = np.arange(n_items)
ground_truth = Tierlist_Decider(items, p=0.3, alpha = 0.3)
decider = Objective_Function(items, ground_truth)

subsets = sample_subsets(items, n_subsets=10)
for s in subsets:
    decider(s)
    
prf = decider.relation()
theta = build_approx_theta(prf)

In [2]:
def train_cardinal_model(prf ,theta):
    mdl = utility_polyhedron(prf.items, theta, prf)
    mdl.maximize(-mdl.slack_sum)
    mdl.solve()
    d = {t : mdl.utilities[t].solution_value for t in mdl.utilities if mdl.utilities[t].solution_value != 0}
    return d

def evaluate_cardinal_model(cardinal_model, subset):
    s = 0
    for x in cardinal_model:
        if all(i in subset for i in x):
            s += cardinal_model[x]
    return s
        
        
def predict_cardinal(cardinal_model, items, subsets):
    prf = Preferences(items)
    for i_1 in range(len(subsets)):
        s_1 = subsets[i_1]
        e_1 = evaluate_cardinal_model(cardinal_model, s_1)
        for i_2 in range(i_1 + 1, len(subsets)):
            s_2 = subsets[i_2]
            e_2 = evaluate_cardinal_model(cardinal_model, s_2)
            if e_1 > e_2:
                prf.add_preference(s_1, s_2)
            if e_2 > e_1:
                prf.add_preference(s_2, s_1)
            if e_1 == e_2:
                prf.add_indifference(s_1, s_2)
    return prf


def train_svm_model(prf, theta, clf = None):
    if not clf:
        clf = svm.SVC()
    L = []
    for x,y in prf.preferred:
        v_x = list(vectorize_subset(x, theta))
        v_y = list(vectorize_subset(y, theta))
        L.append(np.array(v_x + v_y + [1]))
        L.append(np.array(v_y + v_x + [0]))
    data = np.array(L)
    X,y = data[:, :-1], data[:, -1]
    clf.fit(X,y)
    return clf

def predict_svm(svm_model,items, subsets, theta):
    prf = Preferences(items)
    for i_x in range(len(subsets)):
        x = subsets[i_x]
        for i_y in range(i_x+1, len(subsets)):
            y = subsets[i_y]
            v_x = list(vectorize_subset(x, theta))
            v_y = list(vectorize_subset(y, theta))
            v = np.array(v_x + v_y).reshape((1,-1))
            vi = np.array(v_y + v_x).reshape((1,-1))
            c = svm_model.predict(v)[0]
            ci = svm_model.predict(vi)[0]
            if c == 1 and ci == 0:
                prf.add_preference(x,y)
            if c == 0 and ci == 1:
                prf.add_preference(y,x)
    return prf

In [3]:
def build_approx_theta(prf, init_theta = None):
    connivents = []
    if not init_theta:
        init_theta = [EMPTY_SET]
    theta = init_theta
    min_k = 1
    c  = get_connivent(theta, prf)
    cpt = 0
    while c:
        if not c in connivents:
            connivents.append(c)
        cit = get_candidate_iterator(c)
        skey = sorted(cit.keys())[0]
        b = False
        for k in cit:
            if b:
                break
            for i in cit[k]:
                s = set(i)
                for t in s:
                    b = b or (check_connivence_resolution(c, t) and not t in theta)
                    if not t in theta and check_connivence_resolution(c, t):
                        theta.append(t)
        c  = get_connivent(theta, prf)
        cpt = cpt + 1
        #print("solved connivent: ", cpt, " with", theta)
    a = additivity(theta)
    for c_i in connivents:
        cit = get_candidate_iterator(c_i)
        for k in cit:
            if k > a:
                break
            for i in cit[k]:
                for t in i:
                    if not t in theta and check_connivence_resolution(c_i,t):
                        theta.append(t)
    
    return theta

def predict_from_mult_thetas(thetas,p, subsets):
    mdls = [utility_polyhedron(p.items, t, p) for t in thetas]
    prf = Preferences(p.items)
    for i_1 in range(len(subsets)):
        s_1 = subsets[i_1]
        for i_2 in range(i_1 + 1, len(subsets)):
            s_2 = subsets[i_2]
            r = [ordinal_dominance(s_1, s_2, mdl) for mdl in mdls]
            if all(i == "SUP" for i in r):
                prf.add_preference(s_1, s_2)
            elif all(i == "INF" for i in r):
                prf.add_preference(s_2, s_1)
            elif all(i == "EQ" for i in r):
                prf.add_indifference(s_1,s_2)
    return prf

def predict_from_theta(theta, p, subsets):
    mdl = utility_polyhedron(p.items, theta, p)
    prf = ordinal_peferences(p.items, subsets, mdl)
    return prf

def ACR(prf, decider):
    C = 0
    W = 0
    T = len(prf.preferred)
    for x,y in prf.preferred:
        if decider(x) > decider(y):
            C += 1
        elif decider(x) < decider(y):
            W += 1
    return C,W,T

def intersect_per_union(prf_1, prf_2):
    intersection = prf_1.intersection(prf_2)
    un = prf_1 + prf_2
    return len(intersection) / len(un)

In [9]:
n_items = 7
items = np.arange(n_items)
ground_truth = Tierlist_Decider(items, p=0.3, alpha = 0.3)
decider = Objective_Function(items, ground_truth)

budget = 35
n_subsets = 10

data = {
    "budget":[],
    "theta_definition":[],
    "theta_operator":[],
    "n_theta_min":[],
    "n_preferences":[],
    "additivity":[],
    "size":[],
    "sizes_sum":[],
    "time":[],
    "C":[],
    "W":[],
    "T":[],
}


for repetition in range(20):
    ground_truth = Tierlist_Decider(items, p=0.2, alpha = 0.2)
    decider = Objective_Function(items, ground_truth)

    for b in range(budget):
        s = sample_subset(items)
        while s in decider.saved:
            s = sample_subset(items)
            
        decider(s)
        prf = decider.relation()
        if len(prf) < 10:
            continue
            
        t_heuristic = build_approx_theta(prf, [EMPTY_SET])

        
        
        ti_lex_3 = time.time()
        t_lex3 = get_kernels_lex3(prf, get_all_k_sets(items, len(items)))
        ti_lex_3 = time.time() - ti_lex_3
        
        t_lex3_rnd = random.choice(t_lex3)
        t_lex3_union = union(t_lex3)
        
        
        print(f"Lex3 took {ti_lex_3:.2f}")


        for s_rep in range(10):
            print(f"repetition = {repetition}, b = {b}, srep = {s_rep}, npref = {len(prf)}")
            test_subsets = sample_subsets(items, n_subsets=n_subsets)
            #print(f"t_union = {t_union}, t_heuristic = {t_heuristic}, t_mins = {t_mins}, t_random = {t_random}")
            
            prf_t_heuristic = predict_from_theta(t_heuristic, prf, test_subsets) - prf

            prf_tlex3_union = predict_from_theta(t_lex3_union, prf, test_subsets) - prf
            prf_tlex3_rnd = predict_from_theta(t_lex3_rnd, prf, test_subsets) - prf
            prf_tlex3_all = predict_from_mult_thetas(t_lex3, prf, test_subsets) - prf
            
            svm_tlex3 = train_svm_model(prf, t_lex3_union, clf = None)
            prf_svm_tlex3 = predict_svm(svm_tlex3,prf.items, test_subsets, t_lex3_union) - prf
            
            card_tlex3 = train_cardinal_model(prf ,t_lex3_union)
            prf_card_tlex3 = predict_cardinal(card_tlex3, items, test_subsets) - prf            
            
            
            C_t_heuristic, W_t_heuritistic, T_t_heuristic = ACR(prf_t_heuristic, ground_truth)
        
            
            C_tlex3_union, W_tlex3_union, T_tlex3_union = ACR(prf_tlex3_union, ground_truth)
            C_tlex3_rnd, W_tlex3_rnd, T_tlex3_rnd = ACR(prf_tlex3_rnd, ground_truth)
            C_tlex3_all, W_tlex3_all, T_tlex3_all = ACR(prf_tlex3_all, ground_truth)
            C_svm_tlex3, W_svm_tlex3, T_svm_tlex3 = ACR(prf_svm_tlex3, ground_truth)
            C_card_tlex3, W_card_tlex3, T_card_tlex3 = ACR(prf_card_tlex3, ground_truth)

            

            
            ####LEX 3 
            data["budget"].append(b) 
            data["theta_definition"].append("LEX3")
            data["theta_operator"].append("UNION")
            data["time"].append(ti_lex_3)
            data["n_theta_min"].append(len(t_lex3))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_lex3_union))
            data["size"].append(len(t_lex3_union))
            data["sizes_sum"].append(sum(len(i) for i in t_lex3_union))
            data["C"].append(C_tlex3_union)
            data["W"].append(W_tlex3_union)
            data["T"].append(T_tlex3_union)
            
            data["budget"].append(b) 
            data["theta_definition"].append("LEX3")
            data["theta_operator"].append("RND")
            data["time"].append(ti_lex_3)
            data["n_theta_min"].append(len(t_lex3))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_lex3_rnd))
            data["size"].append(len(t_lex3_rnd))
            data["sizes_sum"].append(sum(len(i) for i in t_lex3_rnd))
            data["C"].append(C_tlex3_rnd)
            data["W"].append(W_tlex3_rnd)
            data["T"].append(T_tlex3_rnd)
            
            data["budget"].append(b) 
            data["theta_definition"].append("LEX3")
            data["theta_operator"].append("ALL")
            data["time"].append(ti_lex_3)
            data["n_theta_min"].append(len(t_lex3))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_lex3[0]))
            data["size"].append(len(t_lex3[0]))
            data["sizes_sum"].append(sum(len(i) for i in t_lex3[0]))
            data["C"].append(C_tlex3_all)
            data["W"].append(W_tlex3_all)
            data["T"].append(T_tlex3_all)
            
            data["budget"].append(b) 
            data["theta_definition"].append("SVM")
            data["theta_operator"].append("SVM")
            data["time"].append(ti_lex_3)
            data["n_theta_min"].append(len(t_lex3))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_lex3[0]))
            data["size"].append(len(t_lex3[0]))
            data["sizes_sum"].append(sum(len(i) for i in t_lex3[0]))
            data["C"].append(C_svm_tlex3)
            data["W"].append(W_svm_tlex3)
            data["T"].append(T_svm_tlex3)
            
            data["budget"].append(b) 
            data["theta_definition"].append("CARD")
            data["theta_operator"].append("CARD")
            data["time"].append(ti_lex_3)
            data["n_theta_min"].append(len(t_lex3))
            data["n_preferences"].append(len(prf))
            data["additivity"].append(additivity(t_lex3[0]))
            data["size"].append(len(t_lex3[0]))
            data["sizes_sum"].append(sum(len(i) for i in t_lex3[0]))
            data["C"].append(C_card_tlex3)
            data["W"].append(W_card_tlex3)
            data["T"].append(T_card_tlex3)
            
            df = pd.DataFrame(data)
            df.to_csv("theta_definitions_comparisons_with_cardinals_4.csv")
            #print(df)
        
        
        
    
        

Lex3 took 0.28
repetition = 0, b = 5, srep = 0, npref = 11
repetition = 0, b = 5, srep = 1, npref = 11
repetition = 0, b = 5, srep = 2, npref = 11
repetition = 0, b = 5, srep = 3, npref = 11
repetition = 0, b = 5, srep = 4, npref = 11
repetition = 0, b = 5, srep = 5, npref = 11
repetition = 0, b = 5, srep = 6, npref = 11
repetition = 0, b = 5, srep = 7, npref = 11
repetition = 0, b = 5, srep = 8, npref = 11
repetition = 0, b = 5, srep = 9, npref = 11
Lex3 took 0.28
repetition = 0, b = 6, srep = 0, npref = 17
repetition = 0, b = 6, srep = 1, npref = 17
repetition = 0, b = 6, srep = 2, npref = 17
repetition = 0, b = 6, srep = 3, npref = 17
repetition = 0, b = 6, srep = 4, npref = 17
repetition = 0, b = 6, srep = 5, npref = 17
repetition = 0, b = 6, srep = 6, npref = 17
repetition = 0, b = 6, srep = 7, npref = 17
repetition = 0, b = 6, srep = 8, npref = 17
repetition = 0, b = 6, srep = 9, npref = 17
Lex3 took 0.28
repetition = 0, b = 7, srep = 0, npref = 21
repetition = 0, b = 7, srep = 1