In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder 
from recbole.config import Config
from recbole.data import create_dataset, data_preparation

from logging import getLogger
from recbole.model.general_recommender import BPR
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger

import torch
from recbole.data.interaction import Interaction

import pickle
import os

import make_atomic_files as at

In [2]:
at.og_atomic_files()

In [3]:
def get_history(inter):
    visits = []
    set_uid = set(inter['uid:token'])
    for u in set_uid:
        visits.append(inter[inter['uid:token'] == u]['venue_id:token'].values.tolist())
    
    return np.array(visits)

inter = pd.read_csv('foursquare/foursquare.inter', sep='\t')

visits = get_history(inter)

## Tuning

In [4]:
from recbole.trainer import HyperTuning
from recbole.utils import init_seed
import os
from recbole.utils import get_model, get_trainer

In [13]:
def objective_function(config_dict=None, config_file_list=None):
    k = 10

    config = Config(config_dict=config_dict, config_file_list=['foursquare_general.yaml'])
    init_seed(config['seed'], config['reproducibility'])
    dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, dataset)
    model_name = config['model']
    model = get_model(model_name)(config, train_data._dataset).to(config['device'])
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
    best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)
    test_result = trainer.evaluate(test_data)

    return {
        'model': model_name,
        'best_valid_score': best_valid_score,
        'valid_score_bigger': config['valid_metric_bigger'],
        'best_valid_result': best_valid_result,
        'test_result': test_result
    }

hp = HyperTuning(objective_function=objective_function, algo='random', early_stop=10,
                max_evals=100, params_file='bpr.hyper', fixed_config_file_list=['foursquare_general.yaml'])


In [14]:
hp.run()
params = hp.best_params
print(params)

running parameters:                                    
{'dynamic': False, 'embedding_size': 128, 'learning_rate': 0.06546900811244058, 'mlp_hidden_size': '[64,64,64]', 'sample_num': 1, 'train_batch_size': 2048}
current best valid score: 0.0037                       
current best valid result:                             
OrderedDict([('hit@10', 0.0037), ('precision@10', 0.0004)])
current test result:                                   
OrderedDict([('hit@10', 0.0028), ('precision@10', 0.0003)])
running parameters:                                                   
{'dynamic': False, 'embedding_size': 128, 'learning_rate': 0.42166822630766093, 'mlp_hidden_size': '[64,64,64]', 'sample_num': 0, 'train_batch_size': 2048}
running parameters:                                                   
{'dynamic': True, 'embedding_size': 64, 'learning_rate': 0.010544030274656958, 'mlp_hidden_size': '[64,64,64]', 'sample_num': 5, 'train_batch_size': 2048}
running parameters:                            

## Train General Recommendation

In [15]:
# configurations initialization
k = 10

config = Config(model='BPR', dataset='foursquare', config_file_list=['foursquare_general.yaml'], config_dict = params)

# init random seed
init_seed(config['seed'], config['reproducibility'])

# dataset creating and filtering
dataset = create_dataset(config)

# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)

model = BPR(config, train_data.dataset).to(config['device'])

# trainer loading and initialization
trainer = Trainer(config, model)

# model training
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

# model evaluation
test_result = trainer.evaluate(test_data)
print(test_result)

OrderedDict([('hit@10', 0.0009), ('precision@10', 0.0001)])


## Make prediction

In [11]:
visits = get_history(inter)

unique_users = list(set(inter['uid:token']))
unique_locations = list(set(inter['venue_id:token']))
print(len(unique_locations), len(unique_users))

current_time = max(inter['timestamp:token'])+1

#make prediction for users
input_inter = Interaction({
    'uid': torch.tensor(unique_users),
    'venue_id': torch.tensor(visits)
})

with torch.no_grad():
    scores = model.full_sort_predict(input_inter).reshape((len(unique_users), -1))

#length |items| + 1 because of the padding
print(scores.shape)

# get the 10 items with highest scores
rec_list = np.argsort(scores, axis = 1)[:, -k:]

# select one item at random for each user
def random_choice(a):
    # select one item, but then translated back
    r_c = np.random.choice(a, 1)
    return int(r_c) - 1

random_item = np.apply_along_axis(random_choice, 1, rec_list)
print(current_time)

27898 1083
torch.Size([1083, 27899])
102


  return int(r_c) - 1


## Add new values in the dataset

In [12]:
new_locations = pd.DataFrame({'uid:token': unique_users, 'venue_id:token':random_item.tolist(), 'timestamp:token':[current_time]*len(random_item)}, columns=['uid:token', 'venue_id:token', 'timestamp:token'])
new_locations.head()

inter = pd.concat([inter, new_locations], axis = 0).reset_index(drop = True)
inter.sort_values(by=['uid:token', 'timestamp:token'], inplace=True)

unique_users = list(set(inter['uid:token']))
unique_locations = list(set(inter['venue_id:token']))
print(len(unique_locations), len(unique_users))

inter.to_csv('foursquare/foursquare.inter', index=False, sep = '\t')

27898 1083
