In [2]:
import pandas as pd
import numpy as np
from collections import Counter
from PMTK.sampling.preferences_sampler import *
from PMTK.pref.preferences import *
from PMTK.utility.utility_solver import *
from PMTK.sampling.subset_samplers import *
from PMTK.utility.model_solver import *
from PMTK.utility.subset_finder import *
from PMTK.sampling.gibbs import *

class Film_Dataset:
    
    def __init__(self, n_films, n_users):
        self.n_films = n_films
        self.n_users = n_users
        
        df = pd.read_csv("ratings.csv")
        df = df.groupby("userId").count().reset_index()
        count_user = {u:m for u,m in zip(df["userId"], df["movieId"])}
        
        df = pd.read_csv("ratings.csv")
        df = df.groupby("movieId").count().reset_index()
        
        count_films = {u:m for u,m in zip(df["userId"], df["movieId"])}
        
        count_user = {i:j for i,j in sorted(count_user.items(), key = lambda x:x[1], reverse= True)}
        count_films = {i:j for i,j in sorted(count_films.items(), key = lambda x:x[1], reverse= True)}
        df = pd.read_csv("ratings.csv")
        self.users = list(count_user.keys())[:n_users+1]
        self.films = list(count_films.keys())[:n_films+1]
        rates_matrix = np.zeros((len(self.films), len(self.users)))
        for i_m, m in enumerate(self.films):
            for i_u, u in enumerate(self.users):
                d = df[(df.movieId == m) & (df.userId == u)]
                if d.shape[0] == 0:
                    r = -1
                else:
                    r = d["rating"].values[0]
                rates_matrix[i_m, i_u] = r
        self.rates_matrix = rates_matrix
        self.__remove_empty()
        
    def get_users(self):
        return self.users
    
    def binarize_vector(self,v):
        L = []
        for i in v:
            L.append(1 if i >= 2.5 else 0)
            L.append(1 if i < 2.5 else 0)
        return np.array(L)
    
    def get_subset(self, v):
        t = np.array(v)
        return tuple(np.where(t == 1)[0])
    
    def get_preferences_items(self, user):
        u_rates = self.rates_matrix[:, user]
        others = np.hstack([self.rates_matrix[:, :user], self.rates_matrix[:, user+1:]])
        v = np.array([self.binarize_vector(v) for v in others])
        D = {}
        for k,r in zip(v, u_rates):
            D[tuple(k)] = D.get(tuple(k), []) + [r]
        D2 = {}
        for i in D:
            L = D[i]
            if all(i == -1 for i in L):
                continue
            elif all(i != -1 for i in L):
                D2[self.get_subset(i)] = np.mean(L)
            else:
                D2[self.get_subset(i)] = np.mean([i for i in L if i != -1])
                
        items = list(np.arange(max(max(i) for i in D2.keys())))
        print(items)
        prf = Preferences(items)
        for s_1 in D2:
            for s_2 in D2:
                if all(i in s_1 for i in s_2) and all(i in s_2 for i in s_1):
                    continue
                if D2[s_1] > D2[s_2]:
                    prf.add_preference(s_1, s_2)
                elif D2[s_2] > D2[s_1]:
                    prf.add_preference(s_2, s_1)
        return prf, D2
                
    def __remove_empty(self):
        v = np.where(self.rates_matrix.sum(axis = 1) > -self.n_users)[0]
        rates = self.rates_matrix[v, :]

In [3]:
fd = Film_Dataset(100,4)
prf, D2 = fd.get_preferences_items(2)
#print(D2[(0, 2, 5, 6, 9, 10, 12, 15, 17)])
prf.items

[0, 1, 2, 3, 4, 5, 6]


[0, 1, 2, 3, 4, 5, 6]

In [4]:
theta = [EMPTY_SET]
theta_mins, stats = get_min_thetas(prf, theta)

Calling unsat on 
It gave the solution:  1
 === Call with theta=  [] ===
Theta: []
Theta min: []
Connivent is [((0, 2, 5), (1, 2, 5))]
Calling unsat on 
It gave the solution:  1
 === Call with theta=  [(0,)] ===
Theta: [(0,)]
Theta min: []
Connivent is [((1, 2, 5), (1, 3, 5))]
Calling unsat on 
It gave the solution:  1
 === Call with theta=  [(0,), (2,)] ===
Theta: [(0,), (2,)]
Theta min: []
Connivent is [((1, 2, 5), (1, 3, 5)), ((0, 3, 5), (0, 2, 5))]
Calling unsat on 
It gave the solution:  1
 === Call with theta=  [(0,), (2,), (0, 3)] ===
Theta: [(0,), (2,), (0, 3)]
Theta min: []
Connivent is [((1, 3, 5), (0, 3, 4)), ((1, 2, 5), (1, 3, 5)), ((0, 3, 5), (0, 2, 5)), ((0, 2, 5), (1, 2, 5))]
Calling unsat on 
It gave the solution:  1
 === Call with theta=  [(0,), (2,), (0, 3), (5,)] ===
Theta: [(0,), (2,), (0, 3), (5,)]
Theta min: []
Connivent is [((0, 2, 5), (0, 2, 5, 6))]
Calling unsat on 
It gave the solution:  1
 === Call with theta=  [(0,), (2,), (0, 3), (5,), (6,)] ===
Theta: [(0,

In [5]:
theta = union(theta_mins)
theta 

[(0,), (2,), (5,), (3, 5), (1, 5)]

In [6]:
mdl = utility_polyhedron(prf.items, theta, prf)
ordinal_peferences(prf.items, prf.subsets, mdl)

x=(0, 2, 4, 6), y = (0, 3, 4), e1 = -2.0, e2 = 1.0
x=(0, 2, 4, 6), y = (1, 3, 4), e1 = -4.0, e2 = 1.0
x=(0, 2, 4, 6), y = (1, 3, 5), e1 = -1.0, e2 = 1.0
x=(0, 2, 4, 6), y = (0, 3, 5, 6), e1 = 0.0, e2 = 1.0
x=(0, 2, 4, 6), y = (0, 2, 5, 6), e1 = 1.0, e2 = 1.0
x=(0, 2, 4, 6), y = (0, 2, 5), e1 = 1.0, e2 = 1.0
x=(0, 2, 4, 6), y = (0, 3, 5), e1 = 0.0, e2 = 1.0
x=(0, 2, 4, 6), y = (1, 2, 4, 6), e1 = -2.0, e2 = 1.0
x=(0, 2, 4, 6), y = (1, 2, 5), e1 = 0.0, e2 = 1.0
x=(0, 2, 4, 6), y = (1, 2, 5, 6), e1 = 0.0, e2 = 1.0
x=(0, 3, 4), y = (1, 3, 4), e1 = -2.0, e2 = 1.0
x=(0, 3, 4), y = (1, 3, 5), e1 = 1.0, e2 = -1.0
x=(0, 3, 4), y = (0, 3, 5, 6), e1 = 1.0, e2 = -2.0
x=(0, 3, 4), y = (0, 2, 5, 6), e1 = 1.0, e2 = -2.0
x=(0, 3, 4), y = (0, 2, 5), e1 = 1.0, e2 = -2.0
x=(0, 3, 4), y = (0, 3, 5), e1 = 1.0, e2 = -2.0
x=(0, 3, 4), y = (1, 2, 4, 6), e1 = 1.0, e2 = 1.0
x=(0, 3, 4), y = (1, 2, 5), e1 = 1.0, e2 = -1.0
x=(0, 3, 4), y = (1, 2, 5, 6), e1 = 1.0, e2 = -1.0
x=(1, 3, 4), y = (1, 3, 5), e1 = 1.0, e2 

(0, 2, 4, 6) > (0, 3, 4) 
(0, 2, 4, 6) > (1, 3, 4) 
(0, 2, 4, 6) > (1, 3, 5) 
(0, 2, 4, 6) > (1, 2, 4, 6) 
(0, 3, 4) > (1, 3, 4) 
(1, 3, 5) > (0, 3, 4) 
(0, 3, 5, 6) > (0, 3, 4) 
(0, 2, 5, 6) > (0, 3, 4) 
(0, 2, 5) > (0, 3, 4) 
(0, 3, 5) > (0, 3, 4) 
(1, 2, 5) > (0, 3, 4) 
(1, 2, 5, 6) > (0, 3, 4) 
(1, 3, 5) > (1, 3, 4) 
(0, 3, 5, 6) > (1, 3, 4) 
(0, 2, 5, 6) > (1, 3, 4) 
(0, 2, 5) > (1, 3, 4) 
(0, 3, 5) > (1, 3, 4) 
(1, 2, 4, 6) > (1, 3, 4) 
(1, 2, 5) > (1, 3, 4) 
(1, 2, 5, 6) > (1, 3, 4) 
(0, 3, 5, 6) > (1, 3, 5) 
(0, 2, 5, 6) > (1, 3, 5) 
(0, 2, 5) > (1, 3, 5) 
(0, 3, 5) > (1, 3, 5) 
(1, 3, 5) > (1, 2, 4, 6) 
(0, 3, 5, 6) > (1, 2, 4, 6) 
(0, 2, 5, 6) > (1, 2, 4, 6) 
(0, 2, 5, 6) > (1, 2, 5) 
(0, 2, 5, 6) > (1, 2, 5, 6) 
(0, 2, 5) > (1, 2, 4, 6) 
(0, 2, 5) > (1, 2, 5) 
(0, 2, 5) > (1, 2, 5, 6) 
(0, 3, 5) > (1, 2, 4, 6) 
(1, 2, 5) > (1, 2, 4, 6) 
(1, 2, 5, 6) > (1, 2, 4, 6) 
(0, 3, 5, 6) = (0, 3, 5) 
(0, 2, 5, 6) = (0, 2, 5) 
(1, 2, 5) = (1, 2, 5, 6) 