In [5]:
%load_ext autoreload

In [165]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from PMTK.random.preferences_sampler import sample_preferences_from_order, sample_preferences_from_complete_order
from PMTK.utils import *
from PMTK.random.subset_samplers import sample_subsets
from PMTK.utility.additive_utility import AdditiveUtility
from PMTK.preferences import *
from PMTK.utility.utility_fitter import Utility_Fitter
from PMTK.utility.connivence_solver import Connivence_Solver
from PMTK.utility.extension_solver import *
from PMTK.utility.kernel_finder import *
from PMTK.utility.model_solver import *
from tqdm.notebook import tqdm
from itertools import chain
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
%autoreload 2

In [166]:
def pareto_dominate(x,y, epsilon = 1e-2):
    if ((x - y) >= 0).all():
        return 1
    if ((y - x) >= 0).all():
        return -1
    return 0

def is_pareto_efficient_simple(costs):
    is_efficient = np.ones(costs.shape[0], dtype = bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(costs[is_efficient]<c, axis=1)  # Keep any point with a lower cost
            is_efficient[i] = True  # And keep self
    return is_efficient

class Random_Configuration_Problem: 
    
    def __init__(self, components, additivity = 1):
        self.components = components
        self.costs = np.random.randint(1, 100, size = (len(self.components), ))
        self.utilities = {i:np.abs(np.random.normal(0,10)*10) for i in get_all_k_sets(self.components,additivity)}
    
    def __str__(self):
        ch = "Model: \n"
        ch += f"Costs: {self.costs} \n"
        for u in self.utilities:
            ch += f"{u} : {self.utilities[u]} \n"
        return ch
    
    def __repr__(self):
        return self.__str__()
    
    def __call__(self, x):
        if len(x) == 0:
            return np.array([-np.inf, -np.inf])
        u_s = 0
        for u in self.utilities:
            if all([i in x for i in u]):
                u_s += self.utilities[u]
        cost = 0
        for i in x:
            cost += self.costs[list(self.components).index(i)]
            
        return np.array([u_s, -cost])
    
class MO_Objective_Function:
    def __init__(self, items, f):
        self.f = f
        self.items = items
        self.budget = 0
        self.saved = {}
        self.epsilon = 1e-6
        
    def relation(self):
        preferences = Preferences(self.items)
        for i in self.saved:
            for j in self.saved:
                if i == j:
                    continue
                if pareto_dominate(self.saved[i], self.saved[j], self.epsilon) > 0:
                    preferences.add_preference(i, j)
                elif pareto_dominate(self.saved[i], self.saved[j], self.epsilon) < 0:
                    preferences.add_preference(j,i) 
        return preferences
    
    def pareto_front(self):
        costs = - np.array(list(self.saved.values()))
        elements = np.array(list(self.saved.keys()))
        return elements[is_pareto_efficient_simple(costs)]
    
    def evaluated_elements(self): 
        return len(self.saved.keys())
    
    def __call__(self, x):
        if not x in self.saved:
            self.saved[x] = self.f(x)
            self.budget += 1
        return self.saved[x]
    
def get_unifying_model(prf, init_mdl):
    print("Init model", init_mdl)
    T = Tree(prf.items, prf ,init_mdl)
    T.head.open_node()
    return union(T.found_theta)

In [167]:
class Preference_Model: 
    
    def __init__(self, item, **kwargs):
        self.item = item
        self.preference = None
        self.data = []
        
    def update_model(self, x, y, order, **kwargs):
        D = {"x":x,"y":y, "order":order}
        D.update(kwargs)
        self.data.append(D)
    
    def __str__(self):
        return str(pd.DataFrame(self.data))
    
    def fit(self, preferences):
        self.preferences = preferences
        for x in self.preferences.subsets:
            for y in self.preferences.subsets:
                if x==y:
                    continue
                if self.preferences.is_preferred(x,y) != 0:
                    self.update_model(x,y,">")
                    self.update_model(y,x,"<")
                elif self.preferences.is_indifferent(x,y):
                    self.update_model(x,y,"=")
                    self.update_model(y,x,"=")
                else:
                    self.update_model(x,y,"?")
                    self.update_model(y,x,"?")
        pass
    
    def predict(self, x, y):
        pass
    
    
class  Clf_cp_mdl(Preference_Model):
    
    def __init__(self, item, **kwargs):
        super().__init__(item, **kwargs)
        assert "clf" in kwargs, "A Classification Preference model needs a classifier to be specified with the keyword clf"
        self.clf = kwargs["clf"]
        self.class_dict = {
            ">":0,
            "<":1,
            "=":2,
        }
        self.df = None
        self.X = None
        self.y = None
        self.clf_mdl = None
        self.model = None
        self.cost_vector = None
    
    def train(self,preferences, model, cost_vector, **kwargs):
        self.fit(preferences)
        self.df = pd.DataFrame(self.data)
        self.model = model
        self.cost_vector = cost_vector
        self.items = preferences.items
        arr = []
        classes = []
        singletons = get_all_k_sets(preferences.items, 1)
        for x,y,o in zip(self.df["x"], self.df['y'], self.df["order"]):
            if o == "?":
                continue
            v_x = vectorize_subset(x, self.model)
            v_y = vectorize_subset(y, self.model)
            cost_dif = cost_vector @ (vectorize_subset(x, singletons) - vectorize_subset(y,singletons))
            line = list(v_x) + list(v_y) + [cost_dif]
            line = np.array(line)
            arr.append(line)
            classes.append(self.class_dict[o])
        arr = np.array(arr)
        self.X = arr
        self.y = classes
        self.clf_mdl = self.clf(**kwargs)
        self.clf_mdl.fit(self.X,self.y)
    
    def predict(self, x, y):
        v_x = vectorize_subset(x, self.model)
        v_y = vectorize_subset(y, self.model)
        singletons = get_all_k_sets(self.items, 1)
        cost_dif = self.cost_vector @ (vectorize_subset(x, singletons) - vectorize_subset(y,singletons))
        line = list(v_x) + list(v_y) + [cost_dif]
        line = np.array(line).reshape((1,-1))
        return self.clf_mdl.predict(line)[0]
    
    def score(self, subsets, preferences):
        union_size = 0
        intersection_size = 0
        predicted_mdl = 0
        true_pref = 0
        for x in subsets:
            for y in subsets:
                if x == y:
                    continue
                if self.predict(x,y) != 3:
                    predicted_mdl += 1
                if  not preferences.is_incomparable(x,y):
                    true_pref += 1
                if self.predict(x,y) != 3 or not preferences.is_incomparable(x,y):
                    union_size += 1
                if preferences.is_preferred(x,y) == 1 and self.predict(x,y) == 0:
                    intersection_size += 1
                elif preferences.is_preferred(y,x) == 1 and self.predict(x,y) == 1:
                    intersection_size += 1
                elif preferences.is_indifferent(y,x) and self.predict(x,y) == 2:
                    intersection_size += 1
                else:
                    pass
                    #print(preferences.is_preferred(y,x), " vs ", self.predict(x,y))
        print("Intersection size:", intersection_size)
        print("Union size", union_size)
        print("Predicted mdl: ", predicted_mdl)
        print("true pref:", true_pref)
        return intersection_size/union_size

In [168]:
n_items = 10
it = np.arange(n_items)
RCP = Random_Configuration_Problem(it, additivity=6)
train_f = MO_Objective_Function(it, RCP)
ground_t = MO_Objective_Function(it, RCP)
RCP.costs

array([14, 16, 99, 15, 45, 99, 45, 35, 20, 93])

In [169]:
mdl = get_all_k_sets(it, 1)

train_sets = sample_subsets(it, n_subsets=200)
for s in train_sets:
    train_f(s)
    ground_t(s)
    
print("Building function ended")
print("subsets for training: ", len(train_f.relation().subsets)) 
PM = Clf_cp_mdl(it, clf=RandomForestClassifier)
PM.train(train_f.relation(), mdl , RCP.costs)

Building function ended
subsets for training:  199


In [170]:
test_set = sample_subsets(it, n_subsets=30)
for s in test_set:
    ground_t(s)

In [171]:
PM.score(test_set, ground_t.relation())

Intersection size: 36
Union size 870
Predicted mdl:  870
true pref: 94


0.041379310344827586

In [172]:
len(train_f.relation())

2916

In [163]:
%%capture
theta = get_unifying_model(train_f.relation(), get_all_k_sets(it, 1))

Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will be used
Long-step dual simplex will 

In [164]:
theta

[(0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (0, 3), (6, 8), (7, 9)]

In [154]:
PM = Clf_cp_mdl(it, clf=svm.SVC)
PM.train(train_f.relation(), theta , RCP.costs)

In [155]:
PM.score(test_set, ground_t.relation())

Intersection size: 55
Union size 870
Predicted mdl:  870
true pref: 124


0.06321839080459771