In [1]:
import json
from tqdm import tqdm

def load_Yelp_data(path):
    final_user_set = {}
    final_item_set = {}
    edge_index = {}
    edge_feature = {}

    with open(path+'yelp_academic_dataset_user.json', 'r', encoding='utf-8') as f:
        all_data = f.readlines()
        for line in tqdm(all_data):
            current_data = json.loads(line)
            current_node = current_data['user_id']
            del current_data['user_id']
            final_user_set[current_node] = current_data      
   
                
    with open(path+'yelp_academic_dataset_business.json', 'r', encoding='utf-8') as f:
        all_data = f.readlines()
        for line in tqdm(all_data):
            current_data = json.loads(line)
            current_node = current_data['business_id']
            del current_data['business_id']
            final_item_set[current_node] = current_data      
            
    
    with open(path+'yelp_academic_dataset_review.json', 'r', encoding='utf-8') as f:
        all_data = f.readlines()
        for line in tqdm(all_data):
            current_data = json.loads(line)
            current_business = current_data['business_id']
            current_user = current_data['user_id']
            cool_level = int(current_data['funny']) + int(current_data['useful']) + int(current_data['cool'])
            current_edge_feature = {'date':current_data['date'], 'text':current_data['text'], 'rate':current_data['stars'], 'rate_level':cool_level}
            edge_feature[str(current_user) + '|' + str(current_business)] = current_edge_feature
            if current_user in edge_index:
                edge_index[current_user].append(current_business)
            else:
                edge_index[current_user] = [current_business]
                
    return edge_index, final_user_set, final_item_set, edge_feature

In [2]:
edge_index, final_user_feature_dic, final_item_feature_dic, edge_feature = load_Yelp_data('yelp_dataset/')

100%|█████████████████████████████████████████████████████████████████████| 2189457/2189457 [00:28<00:00, 77921.36it/s]
100%|███████████████████████████████████████████████████████████████████████| 160585/160585 [00:03<00:00, 50583.18it/s]
100%|█████████████████████████████████████████████████████████████████████| 8635403/8635403 [05:19<00:00, 27055.85it/s]


In [3]:
import ast
def attribute_analysis(attr):
    final_str = ''
    for key in attr.keys():
        if attr[key][0] != '{' and attr[key][-1] != '}':
            final_str += '{} is {}. '.format(key, attr[key])
        else:
            values = json.loads(json.dumps(ast.literal_eval(attr[key])))
            final_str += 'The business contains {} where '.format(key)
            for sub_key in values:
                final_str+='{} is {}. '.format(sub_key, values[sub_key])
    return final_str

def feature_transformation(final_feature):
    final_feature_dic = {}
    for key_name in tqdm(list(final_feature.keys())):
        feature = final_feature[key_name]
        if 'fans' in feature.keys(): # User feature
            if len(feature['elite'])!=0:
                elite_period = max([int(i) for i in feature['elite'].split(',')]) - min([int(i) for i in feature['elite'].split(',')])
            else:
                elite_period = 0
            friends_count = len(feature['friends'].split(', '))
            valid_user_feature = [feature[i] for i in list(feature.keys())[-13:]]
            temp_feature = [feature['review_count'], feature['useful'], feature['funny'], feature['cool'],elite_period, friends_count]
            final_feature_val = temp_feature + valid_user_feature
            final_feature_dic[key_name] = final_feature_val
        else:
            name = feature['name']
            address = feature['address']
            city = feature['city']
            state = feature['state']
            postal_code = feature['postal_code']
            stars = feature['stars']
            review_count = feature['review_count']
            attribures = feature['attributes']
            categories = 'The business offers {}. '.format(feature['categories'])
            current_address_information = 'The business {} located at {},{},{},{}. '.format(name, address,city,state, postal_code)
            if attribures is not None:
                attributes_information = attribute_analysis(attribures)
            else:
                attributes_information = ''
            final_information = categories + current_address_information + attributes_information
            final_feature_dic[key_name] = final_information
    return final_feature_dic

In [4]:
final_item_feature = feature_transformation(final_item_feature_dic)

100%|███████████████████████████████████████████████████████████████████████| 160585/160585 [00:08<00:00, 19261.71it/s]


In [5]:
import torch
import math
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
from torch.nn import Linear, Conv1d
from torch_geometric.nn import GCNConv, RGCNConv, global_sort_pool, global_add_pool
from torch_geometric.utils import dropout_adj
def scaled_dot_product(q, k, v, mask=None):
    d_k = q.size()[-1]
    attn_logits = torch.matmul(q, k.transpose(-2, -1))
    attn_logits = attn_logits / math.sqrt(d_k)
    if mask is not None:
        attn_logits = attn_logits.masked_fill(attn_logits == 0, -9e15)
    attention = F.softmax(attn_logits, dim=-1)
    values = torch.matmul(attention, v)
    return values, attention


class MultiheadAttention(nn.Module):

    def __init__(self, input_dim, embed_dim, num_heads):
        super().__init__()
        assert embed_dim % num_heads == 0, "Embedding dimension must be 0 modulo number of heads."

        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.head_dim = embed_dim // num_heads

        # Stack all weight matrices 1...h together for efficiency
        # Note that in many implementations you see "bias=False" which is optional
        self.qkv_proj = nn.Linear(input_dim, 3*embed_dim)
        self.o_proj = nn.Linear(embed_dim, embed_dim)

        self._reset_parameters()

    def _reset_parameters(self):
        # Original Transformer initialization, see PyTorch documentation
        nn.init.xavier_uniform_(self.qkv_proj.weight)
        self.qkv_proj.bias.data.fill_(0)
        nn.init.xavier_uniform_(self.o_proj.weight)
        self.o_proj.bias.data.fill_(0)

    def forward(self, x, mask=None, return_attention=False):
        batch_size, seq_length, embed_dim = x.size()
        qkv = self.qkv_proj(x)

        # Separate Q, K, V from linear output
        qkv = qkv.reshape(batch_size, seq_length, self.num_heads, 3*self.head_dim)
        qkv = qkv.permute(0, 2, 1, 3) # [Batch, Head, SeqLen, Dims]
        q, k, v = qkv.chunk(3, dim=-1)

        # Determine value outputs
        values, attention = scaled_dot_product(q, k, v, mask=mask)
        values = values.permute(0, 2, 1, 3) # [Batch, SeqLen, Head, Dims]
        values = values.reshape(batch_size, seq_length, embed_dim)
        o = self.o_proj(values)

        if return_attention:
            return o, attention
        else:
            return o
class EncoderBlock(nn.Module):

    def __init__(self, input_dim, num_heads, dim_feedforward, dropout=0.3):
        """
        Inputs:
            input_dim - Dimensionality of the input
            num_heads - Number of heads to use in the attention block
            dim_feedforward - Dimensionality of the hidden layer in the MLP
            dropout - Dropout probability to use in the dropout layers
        """
        super().__init__()

        # Attention layer
        self.self_attn = MultiheadAttention(input_dim, input_dim, num_heads)

        # Two-layer MLP
        self.linear_net = nn.Sequential(
            nn.Linear(input_dim, dim_feedforward),
            nn.Dropout(dropout),
            nn.ReLU(inplace=True),
            nn.Linear(dim_feedforward, input_dim)
        )

        # Layers to apply in between the main layers
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        # Attention part
        attn_out = self.self_attn(x, mask=mask)
        x = x + self.dropout(attn_out)
        x = self.norm1(x)

        # MLP part
        linear_out = self.linear_net(x)
        x = x + self.dropout(linear_out)
        x = self.norm2(x)

        return x

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len=5000):
        """
        Inputs
            d_model - Hidden dimensionality of the input.
            max_len - Maximum length of a sequence to expect.
        """
        super().__init__()

        # Create matrix of [SeqLen, HiddenDim] representing the positional encoding for max_len inputs
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)

        # register_buffer => Tensor which is not a parameter, but should be part of the modules state.
        # Used for tensors that need to be on the same device as the module.
        # persistent=False tells PyTorch to not add the buffer to the state dict (e.g. when we save the model)
        self.register_buffer('pe', pe, persistent=False)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return x
    
class IGMC(torch.nn.Module):
    # The GNN model of Inductive Graph-based Matrix Completion. 
    # Use RGCN convolution + center-nodes readout.
    def __init__(self, device, gconv=RGCNConv, latent_dim=[32, 32, 32, 32], 
                 num_relations=5, num_bases=2, regression=False, adj_dropout=0.2, 
                 force_undirected=False, n_side_features=0, 
                 multiply_by=1, model_type = 'lstm', num_layers = None, num_heads = 8):
        super(IGMC, self).__init__()
        self.regression = regression
        self.adj_dropout = adj_dropout 
        self.force_undirected = force_undirected
        self.model_type = model_type
        self.n_side_features = n_side_features
        self.device = device
        if model_type == 'lstm':
            self.time_module = LSTM_variable_input(embedding_dim=n_side_features, hidden_dim=n_side_features)
            
        elif model_type == 'attention' and num_layers is not None:
            input_dim = n_side_features
            self.num_layers = num_layers
            dim_feedforward = n_side_features

            self.time_module = torch.nn.ModuleList([EncoderBlock(input_dim=input_dim, dim_feedforward=dim_feedforward, num_heads = num_heads) for _ in range(num_layers)])
            
        
        self.multiply_by = multiply_by
        self.convs = torch.nn.ModuleList()
        self.convs.append(gconv(4, latent_dim[0], num_relations, num_bases))
        for i in range(0, len(latent_dim)-1):
            self.convs.append(gconv(latent_dim[i], latent_dim[i+1], num_relations, num_bases)) 
        
        self.lin1 = Linear(2*sum(latent_dim)+n_side_features*2+19, n_side_features)
        self.lin1_2 = Linear(n_side_features, 128)
        
        if self.regression:
            self.lin2 = Linear(128, 1)
        else:
            self.lin2 = Linear(128, 5)
            
    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        self.lin1.reset_parameters()
        self.lin2.reset_parameters()
        
    def forward(self, data):
        x, edge_index, edge_type, batch, timestamp= data.x, data.edge_index, data.edge_type, data.batch, data.time_val
        if self.adj_dropout > 0:
            edge_index, edge_type = dropout_adj(
                edge_index, edge_type, p=self.adj_dropout, 
                force_undirected=self.force_undirected, num_nodes=len(x), 
                training=self.training
            )
        
        concat_states = []
        for conv in self.convs:
            x = torch.tanh(conv(x, edge_index, edge_type))
            concat_states.append(x)
    
        concat_states = torch.cat(concat_states, 1)
        users = data.x[:, 0] == 1
        items = data.x[:, 1] == 1
        
        user_index = users.nonzero(as_tuple=True)[0]

        final_feature = data.side_feature
        Item_list_all = []
        len_all = []
        for user in user_index:
            current_item_idx= (edge_index[0] == user).nonzero(as_tuple=True)[0]
            current_item = torch.index_select(edge_index[1],0, current_item_idx)
            if current_item_idx.shape[0] == 0:
                print('exception occur')
                current_item_idx = (edge_index[1] == user).nonzero(as_tuple=True)[0]
                current_item = torch.index_select(edge_index[0],0, current_item_idx)
            current_time = torch.index_select(timestamp, 0, current_item_idx)
            _, time_indices = torch.sort(current_time)
            temp_dic = dict(zip(time_indices.tolist(),current_item.tolist()))
            sort_dic = dict(sorted(temp_dic.items()))
            current_items = torch.index_select(final_feature, 0, torch.tensor(list(sort_dic.values())).to(self.device))
            Item_list_all.append(current_items)
            len_all.append(current_item.shape[0])
        

    
        final_time_feature = torch.nn.utils.rnn.pad_sequence(Item_list_all, batch_first=True)
        
        if self.model_type == 'attention':
            max_len = max(len_all)
            pos_encode =  PositionalEncoding(self.n_side_features, max_len=max_len).to(self.device)
            final_time_feature = pos_encode(final_time_feature)

        
        
        if self.model_type == 'attention':
            for l in self.time_module:
                len_all = torch.tensor(len_all)
                mask = torch.arange(final_time_feature.size(1))[None, :] < len_all[:, None]
                mask = mask.to(device)
                final_time_feature = l(final_time_feature, mask=mask)
            # Mean pooling for the transformers's output
            input_mask_expanded = mask.unsqueeze(-1).expand(final_time_feature.size()).float()
            sum_embeddings = torch.sum(final_time_feature * input_mask_expanded, 1)
            sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
            time_feature = sum_embeddings / sum_mask
        else:
            time_feature = self.time_module(final_time_feature,len_all)
            

        
        user_feature = torch.cat([concat_states[users], final_feature[users][:,:19], time_feature], 1)
        item_feature = torch.cat([concat_states[items], final_feature[items]], 1)
        
        x = torch.cat([user_feature, item_feature], 1)
#         x = torch.cat([concat_states[users], concat_states[items]], 1)        
#         x = torch.cat([x, final_feature[users][:,:19], final_feature[items], time_feature], 1)
        
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin1_2(x))
        x = F.dropout(x, p=0.4, training=self.training)
        x = self.lin2(x)
        if self.regression:
            return x[:, 0] * self.multiply_by, user_feature, item_feature
        else:
            return F.log_softmax(x, dim=-1), user_feature, item_feature

In [6]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    device = torch.device('cpu')

In [7]:
model = IGMC(
    device= device,
    latent_dim=[32, 32, 32, 32], 
    num_relations=5, 
    num_bases=4, 
    regression=False, 
    adj_dropout=0.3, 
    force_undirected=False, 
    n_side_features=768, 
    multiply_by=1,
    model_type = 'attention',
    num_layers = 1,
)

In [8]:
import torch
import os
def load_model(res_dir,epoch,model):
    model_name = os.path.join(res_dir, 'model_checkpoint{}.pth'.format(epoch))
    model.load_state_dict(torch.load(model_name))
    return model

In [9]:
PGN_model = load_model('Data', '60', model)

In [10]:
import json
new_item_feature =  json.load( open( "Data/new_item_feature.json" ) )
user_embedding =  json.load( open( "Data/user_embedding.json" ) )
item_embedding =  json.load( open( "Data/item_embedding.json" ) )
user_embedding = {k: user_embedding[k][0][0] for k in user_embedding.keys()}

In [11]:
import faiss
import json
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel

u_dict = json.load( open( "Data/u_dict.json" ) )
v_dict = json.load( open( "Data/v_dict.json" ) )
model_name = "sentence-transformers/distilbert-base-nli-mean-tokens"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model_encoder = AutoModel.from_pretrained(model_name).to(device) 

class Generate_dataset(Dataset):
    def __init__(self, data,tokenizer,device, type):
        self.data = data
        self.device = device
        self.tokenizer = tokenizer
        self.type = type
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        if self.type == 'list':
            content = self.data[idx]
        elif self.type == 'dataframe':
            content = self.data.iloc[idx][0]
        content_encoded_input = self.tokenizer(content, padding='max_length', truncation=True,
                                               max_length=256)
        input_ids = torch.tensor(content_encoded_input['input_ids']).to(self.device)
        attention_mask = torch.tensor(content_encoded_input['attention_mask']).to(self.device)
        sample = {'input_ids': input_ids, 'attention_mask': attention_mask}
        return sample


class Sentence_encoding():
    
    def __init__(self, model, tokenizer,device):
        self.model = model
        self.tokenizer = tokenizer
        self.device = device 
    #Mean Pooling - Take attention mask into account for correct averaging
    def mean_pooling(self,model_output, attention_mask):
        token_embeddings = model_output[0] #First element of model_output contains all token embeddings
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
        return sum_embeddings / sum_mask

    def encode(self, final_content, batch_size, type):
        dataset = Generate_dataset(final_content, self.tokenizer, self.device, type)
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
        if type == 'list':
            outputs = []
            self.model.eval()
            with torch.no_grad():
                for data in tqdm(dataloader):
                    model_output = self.model(input_ids=data['input_ids'], attention_mask=data['attention_mask'])
                    sentence_embeddings = self.mean_pooling(model_output, data['attention_mask'])
                    outputs.append(sentence_embeddings)
            return torch.cat(outputs, dim=0)
        elif type == 'dataframe':
            outputs = {}
            temp_index = 0
            for data in tqdm(dataloader):
                with torch.no_grad():
                    model_output = self.model(input_ids=data['input_ids'], attention_mask=data['attention_mask'])
                    sentence_embeddings = self.mean_pooling(model_output, data['attention_mask'])
                    for index, i in enumerate(sentence_embeddings):
                        final_index = temp_index + index
                        outputs[final_content['News_ID'][final_index]] = sentence_embeddings[index].tolist()
                    temp_index+=batch_size
            return outputs


def request_encoder(request,model,tokenizer, device):
    model.eval()
    encoder = Sentence_encoding(model, tokenizer,device)
    final_items_embedding = encoder.encode([request],1,'list')
    return final_items_embedding[0].tolist()

def Faiss_search(new_item_feature,request_embedding, TopN):
    embeddings = np.array([embedding for embedding in list(new_item_feature.values())]).astype("float32")
    # Step 2: Instantiate the index
    index = faiss.IndexFlatL2(embeddings.shape[1])

    # Step 3: Pass the index to IndexIDMap
    index = faiss.IndexIDMap(index)

    # Step 4: Add vectors and their IDs
    ids_dic = {index:val for index, val in enumerate(new_item_feature.keys())} 
    
    
    IDs = np.array(list(ids_dic.keys()))
    IDs = np.asarray(IDs.astype('int64'))
    index.add_with_ids(embeddings, IDs)

    # Retrieve the 10 nearest neighbours
    D, I = index.search(np.array([request_embedding]).astype("float32"), k=TopN)
    searched_ID = [ids_dic[i] for i in I[0]][::-1]
    return searched_ID

In [12]:
class Rate_model(torch.nn.Module):
    def __init__(self, pretrained_model):
        super(Rate_model, self).__init__()
        self.lin1 = pretrained_model.lin1
        self.lin2 = pretrained_model.lin1_2
        self.lin3 = pretrained_model.lin2
    def forward(self, item_feature, user_feature):
        x = torch.cat([user_feature, item_feature], 1)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.dropout(x, p=0.4, training=self.training)
        x = self.lin3(x)
        return F.log_softmax(x, dim=-1).sigmoid()
# Could implement multi_item comparision

In [170]:
request = 'I like Thai food'

TopN_food = 20

In [171]:
request_embedding = request_encoder(request,model_encoder,tokenizer, device)
searched_ID= Faiss_search(new_item_feature,request_embedding,TopN_food)

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 35.35it/s]


In [172]:
# for i in searched_ID:
#     print(final_item_feature[i])

In [173]:
Item_ID = [v_dict[i] for i in searched_ID]

In [174]:
final_valid_items = []
for i in Item_ID:
    if str(i) in item_embedding.keys():
        final_valid_items.append(str(i))

In [175]:
final_valid_items_embeddings = [item_embedding[i] for i in final_valid_items]

In [176]:
def evalutate_candidate_user(model, valid_item_embeddings, user_embedding, TopN_user):
    all_recommend = []
    model.eval()
    with torch.no_grad():
        for item in tqdm(valid_item_embeddings):
            user_rank = {}
            item = item[0][0] + [0 for _ in range(19)]
            candidate_user_embedding_ID = Faiss_search(user_embedding,item,TopN_user)
            for user in candidate_user_embedding_ID:
                user_emb, item_emb = torch.tensor(user_embedding[user]).to(device), torch.tensor(item).to(device)
                user_emb = user_emb.unsqueeze(dim=0)
                item_emb = item_emb[:-19].unsqueeze(dim=0)
                rank_score = model(item_emb, user_emb)
                user_rank[user] = [rank_score.tolist()[0][-1]]
            all_recommend.append(user_rank)
    return all_recommend
# Add batch for speed up
rate_model = Rate_model(PGN_model).to(device)

In [177]:
all_recommend = evalutate_candidate_user(rate_model, final_valid_items_embeddings, user_embedding,1000)

100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:19<00:00,  1.01it/s]


In [178]:
import operator
N = 100
u_dict_revserd = {v: k for k, v in u_dict.items()}
all_users_recommend = []
for user_rank in all_recommend:
    sorted_user_rank = sorted(user_rank.items(), key=lambda x: x[1], reverse=True)
    Top_N_user = sorted_user_rank[:N]
    TopN_user = [u_dict_revserd[int(i)] for i in [item[0] for item in Top_N_user]]
    all_users_recommend.append(TopN_user)

TopN_user_common =  list(set.intersection(*map(set, all_users_recommend)))
TopN_user_all = [j for i in all_users_recommend for j in i]
Final_TopN_user = list(set(TopN_user_all) - set(TopN_user_common))

In [188]:
keywords = ['Thai']

In [189]:
# I want to eat pasta
count = 0
print(len(Final_TopN_user))
for i in Final_TopN_user:
    print('User name:',final_user_feature_dic[i]['name'])
    users_items = edge_index[i]
    
    print('---------------------------------')
    print('The resturant that the user visited:')
    for item in users_items:
        cat = final_item_feature_dic[item]['categories']
        print(cat)
        m =0
        for keyword in keywords:
            if keyword in cat:
                count +=1
                m = 1
                break
        if m == 1:
            break
        
    print('\n')

70
User name: Gene
---------------------------------
The resturant that the user visited:
American (New), American (Traditional), Restaurants, Burgers
Restaurants, Event Planning & Services, Modern European, Pizza, Personal Chefs, Italian, American (New)
Sandwiches, Barbeque, Southern, Restaurants, Salad
Tapas/Small Plates, Restaurants, Breakfast & Brunch, American (New)
Restaurants, Izakaya, Nightlife, Barbeque, Ramen, Cocktail Bars, Food, Japanese, Smokehouse, Bars
Event Planning & Services, Hotels & Travel, Hotels
Dive Bars, Bars, Burgers, Restaurants, Nightlife, Cocktail Bars, American (Traditional)
Pizza, Restaurants, Food Trucks, Food
Local Flavor, Salad, Sandwiches, Restaurants, Barbeque, Food, Food Stands
Nightlife, Bars
Restaurants, Food Trucks, Barbeque, Sandwiches, Food
Restaurants, Pizza, American (Traditional), Nightlife, Bars, Breakfast & Brunch, Beer Bar
American (New), Breakfast & Brunch, Pubs, American (Traditional), Wine Bars, Bars, Nightlife, Beer Gardens, Restaurant

In [190]:
# Thai
print(count/len(Final_TopN_user))

0.21428571428571427


In [169]:
# American
print(count/len(Final_TopN_user))

0.8354430379746836


In [137]:
# Coffee
print(count/len(Final_TopN_user))

0.6842105263157895


In [157]:
# Pasta
print(count/len(Final_TopN_user))

0.4883720930232558


In [125]:
# Pizza
print(count/len(Final_TopN_user))

0.574468085106383


In [110]:
# Burger
print(count/len(Final_TopN_user))

0.4533333333333333


In [181]:
# Thai
print(count/len(Final_TopN_user))

0.21428571428571427
