In [1]:
import pandas as pd
import numpy as np
from collections import Counter
from PMTK.sampling.preferences_sampler import *
from PMTK.pref.preferences import *
from PMTK.utility.utility_solver import *
from PMTK.sampling.subset_samplers import *
from PMTK.utility.model_solver import *
from PMTK.utility.subset_finder import *
from PMTK.sampling.gibbs import *
from PMTK.data.film_dataset import *
from PMTK.sampling.decider import *
from PMTK.utility.heuristics import * 
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate


In [2]:
n_users = 10
n_films = 50
fd = Film_Dataset(n_films,n_users + 1)
prf = fd.get_preferences_items(n_users + 1)
t_heuristic = build_approx_theta(prf, [EMPTY_SET])

In [3]:
t_mins = get_kernels_lex3(prf,t_heuristic)

In [17]:
class Simple_DM:
    
    def __init__(self, items):
        self.alternatives = []
        self.rates = []
        self.items = items
    
    def rate_alternative(self, alt, rate):
        if alt in self.alternatives:
            i = self.alternatives.index(alt)
            self.alternatives.remove(alt)
            self.rates.remove(self.rates[i])
        self.alternatives.append(alt)
        self.rates.append(rate)
        
    
    def rate_binary_alternative(self, b_alternative, rate):
        alt = tuple(np.where(b_alternative == 1)[0])
        if alt in self.alternatives:
            i = self.alternatives.index(alt)
            self.alternatives.remove(alt)
            self.rates.remove(self.rates[i])
            
        self.alternatives.append(alt)
        self.rates.append(rate)
        
    def preferences(self):
        prf = Preferences(self.items)
        for alt_1, r_1 in zip(self.alternatives, self.rates):
            for alt_2, r_2 in zip(self.alternatives, self.rates):
                if r_1 > r_2:
                    prf.add_preference(alt_1, alt_2)
        return prf

In [5]:
def rates_to_binary(rates_matrix, targeted_user, rate_encoder):
    X = np.hstack([rates_matrix[:, :targeted_user], rates_matrix[:, targeted_user+1:]])
    rates = rates_matrix[:, targeted_user]
    encoded_mat = []
    for v in X:
        L = []
        for i in v:
            r_e = rate_encoder(i)
            if r_e != None:
                L = L + rate_encoder(i)
        L = np.array(L)
        encoded_mat.append(L)
    encoded_mat = np.array(encoded_mat)
    pos = np.where(rates != -1)[0]
    rates = rates[pos]
    encoded_mat = encoded_mat[pos, :]
    return encoded_mat, rates

In [6]:
def rates_enc(x):
    if x == -1:
        return [0,0]
    if x <= 3 and x >= 2:
        return [0,0]
    if x > 3:
        return [1,1]
    if x < 2:
        return [1,0]
    return None

In [7]:
def get_rates_matrix(f_path, n_users, n_films, targeted_user):
    df = pd.read_csv(f_path)
    
    u_df = pd.read_csv(f_path)
    u_df = u_df.groupby("userId").count().reset_index()
    count_u = {u:m for u,m in zip(u_df["userId"], u_df["movieId"])}
    
    m_df = pd.read_csv(f_path)
    m_df = m_df.groupby("movieId").count().reset_index()
    count_m = {u:m for u,m in zip(m_df["userId"], m_df["movieId"])}
    
    count_u = {i:j for i,j in sorted(count_u.items(), key = lambda x:x[1], reverse= True)}
    count_m = {i:j for i,j in sorted(count_m.items(), key = lambda x:x[1], reverse= True)}
    
    users_id = list(count_u.keys())[:n_users]
    films_id = list(count_m.keys())[:n_films]

    rates_matrix = np.zeros((len(films_id), len(users_id)))
    for u in users_id:
        for f in films_id:
            x = df[(df.movieId == f) & (df.userId == u)]
            if x.shape[0] == 0:
                rates_matrix[films_id.index(f), users_id.index(u)] = -1
            else:
                rates_matrix[films_id.index(f), users_id.index(u)] = x["rating"].values[0]
                
    return rates_matrix

In [8]:
def get_rates_matrix(f_path, targeted_user, bound_films = 50, bound_users = 5):
    df = pd.read_csv(f_path)
    
    u_df = pd.read_csv(f_path)
    u_df = u_df[u_df.rating > 0].groupby("userId").count().reset_index()
    count_u = {u:m for u,m in zip(u_df["userId"], u_df["movieId"])}
    count_u = {i:j for i,j in sorted(count_u.items(), key = lambda x:x[1], reverse= True)}
    targeted_id = list(count_u.keys())[targeted_user]
    
    
    target_df = df[df.userId == targeted_id]

    
    m_df = df[df.movieId.isin(target_df.movieId)].groupby("movieId").count().reset_index()
    count_m = {m:u for u,m in zip(m_df["userId"], m_df["movieId"])}
    count_m = {i:j for i,j in sorted(count_m.items(), key = lambda x:x[1], reverse= True)}
    films_id = list(count_m.keys())[:bound_films]
    #print(count_m)
    
    user_df = df[df.movieId.isin(films_id)]
    user_df = user_df.groupby("userId").count().reset_index()
    count_u = {u:m for u,m in zip(user_df["userId"], user_df["movieId"])}
    count_u = {i:j for i,j in sorted(count_u.items(), key = lambda x:x[1], reverse= True)}
    users_id = list(count_u.keys())[:bound_users]
    if targeted_id in users_id:
        users_id = list(count_u.keys())[:bound_users+1]
        users_id.remove(targeted_id)
    users_id = [targeted_id] + users_id
    
    print(users_id)
    print(films_id)

    
    rates_matrix = np.zeros((len(films_id), len(users_id))) + 188
    for u in users_id:
        for f in films_id:
            x = df[(df.movieId == f) & (df.userId == u)]
            if x.shape[0] == 0:
                rates_matrix[films_id.index(f), users_id.index(u)] = -1
            else:
                rates_matrix[films_id.index(f), users_id.index(u)] = x["rating"].values[0]
    
                
    return rates_matrix

In [9]:
rates_mat = get_rates_matrix("data/ratings.csv", 1)

[599, 414, 68, 274, 483, 590]
[356, 318, 296, 593, 2571, 260, 480, 110, 589, 2959, 1, 1196, 50, 2858, 47, 780, 150, 1198, 4993, 1210, 858, 457, 592, 2028, 5952, 7153, 588, 608, 2762, 380, 32, 364, 377, 1270, 3578, 4306, 1580, 648, 344, 4226, 367, 6539, 58559, 595, 1214, 1036, 165, 500, 1265, 79132]


In [10]:
rates_mat

array([[ 3.5,  5. ,  3.5,  4.5,  5. ,  5. ],
       [ 4. ,  5. ,  3. ,  4.5,  5. ,  4.5],
       [ 5. ,  5. ,  2. ,  5. ,  5. ,  4.5],
       [ 3. ,  4. ,  3.5,  4. , -1. ,  3.5],
       [ 5. ,  5. ,  4.5,  4. ,  4.5,  4. ],
       [ 5. ,  5. ,  5. ,  3. ,  4.5,  5. ],
       [ 4. ,  4. ,  3.5,  3.5,  4. ,  4. ],
       [ 3.5,  5. ,  2.5,  4.5,  4. ,  4. ],
       [ 4.5,  5. ,  3.5,  4.5,  4. ,  4.5],
       [ 5. ,  5. ,  2.5,  5. ,  4.5,  3.5],
       [ 3. ,  4. ,  2.5,  4. ,  4. ,  4. ],
       [ 5. ,  5. ,  5. ,  4.5,  4.5,  5. ],
       [ 3.5,  5. ,  3. ,  4. ,  4.5,  4.5],
       [ 5. ,  5. ,  5. ,  5. ,  4. ,  3. ],
       [ 4. ,  4. ,  4. ,  4. ,  3.5,  3. ],
       [ 3.5,  4. ,  4. ,  3.5,  3. ,  4. ],
       [ 2.5,  4. ,  3. , -1. ,  2. ,  4.5],
       [ 3.5,  5. ,  5. ,  4. ,  4. ,  5. ],
       [ 4. ,  5. ,  4.5,  3. ,  4.5,  4.5],
       [ 5. ,  5. ,  5. ,  4. ,  4.5,  4.5],
       [ 4. ,  5. ,  4. ,  3.5,  4. ,  5. ],
       [ 3. ,  5. ,  4.5,  3.5,  2.5,  4. ],
       [ 3

In [13]:
b_mat, rates = rates_to_binary(rates_mat, 0, rates_enc)

In [18]:
SDM = Simple_DM(np.arange(b_mat.shape[1]))

In [19]:
for b_v, r in zip(b_mat, rates):
    SDM.rate_binary_alternative(b_v, r)

In [20]:
SDM.preferences()

NameError: name 'TypeException' is not defined