# Steps
1. Choose data scenario.
2. Generate the dataset accordingly.
3. Analyze the dataset.
4. Choose an algorithm to investigate.
5. Choose 'fixed' configuration.
6. For each 'fixed' configuration, optimize the other parameters based on RMSE.
7. Given optimal setting, run popularity bias analysis for every version of the 'fixed' configuration.

## Libraries

In [None]:
%env MKL_THREADING_LAYER=tbb
%env OPENBLAS_NUM_THREADS=24
%env NUMBA_NUM_THREADS=96
%env MKL_NUM_THREADS=96
%env OMP_NUM_THREADS=1

In [None]:
import os
os.environ["MKL_THREADING_LAYER"] = "tbb"
os.environ["OPENBLAS_NUM_THREADS"] = '24'
os.environ["NUMBA_NUM_THREADS"] = '96'
os.environ["MKL_NUM_THREADS"] = '96'
os.environ["OMP_NUM_THREADS"] = '1'
# for random generation
import numpy as np 
import random as rd


# basic functions
import pandas as pd
pd.options.display.max_rows = 100
import pickle
import scipy 

# custom-made functions
import modelling_mf
from data_generation import generate_data
from optimize_hp import optimize_lkpy, optimize_cornac



# cornac RS library
from cornac.models import bpr

In [None]:
import lenskit_tf

## Data and parameters

In [None]:
data="epinion"
mat = scipy.io.loadmat("data/"+data+"_events.mat")
mat_df = pd.DataFrame(mat['rating_with_timestamp'])
mat_df.columns = ['user', 'item', '.', 'rating', '..', '...']
epinion_ratings = mat_df[['user','item','rating']].drop_duplicates(subset = ['user','item'], keep = 'last')

In [None]:
data="fairbook"
# user-item interactions
fairbook_ratings = pd.read_csv("data/"+data+"_events.csv")

In [None]:
data="ml1m"
# user-item interactions
ml1m_ratings = pd.read_csv("data/"+data+"_events.dat", header=None, sep='::', engine='python').drop(3, axis=1)
ml1m_ratings.columns = ['user', 'item', 'rating']

In [None]:
user_col = "user" # the name of the column that includes the users
item_col = "item" # the name of the column that includes the items
predict_col="rating" # the name of the column that includes the interaction

Make data choice.

In [None]:
ratings = epinion_ratings.copy()
data_strategy = 'epinion'

## Optimize, train, evaluate LKPY
- **Algorithm**
- **Fixed parameters**
- **To-optimize parameters**

In [None]:
algo_versions = {"BPR":[{"bias":True},
                       ]
                }

In [None]:
evaluation_way = "cross_validation"
verbose = True
plot = True
save_plot = True # save the plots
fallback = False
nr_recs = 10
sampling_strategy = "frac"
partition_way = "user"

Epochs = 50 for epinion

In [None]:
# choose algorithm
algorithm_lkpy = lenskit_tf.BPR
algo_name = "BPR"
versions = algo_versions[algo_name]



    
# for every 'fixed' version of the algorithm
for args in versions:
    print(data_strategy, args)




    p = 'best_parameters/'+algo_name+'/'+data_strategy+'_'+str(args)+'.pkl'
    if os.path.isfile(p):
        print('We got them already')
        with open(p, 'rb') as f:
            best_params = pickle.load(f)
    else:


        print('We have to compute them now')
        # optimize for this fixed version
        best_params = optimize_lkpy(ratings=ratings, algorithm_name=algo_name, args=args, partition_way=row,max_evals=20)
    
        # save the best parameters for this fixed version
    
        with open('best_parameters/'+algo_name+'/'+data_strategy+'_'+str(args)+'.pkl', 'wb') as f:
            pickle.dump(best_params, f)

    
    features_list = [10, 50,100] # check
    optimal_features = features_list[best_params["features"]]

    reg_list = [0, 0.001, 0.01, 0.1]
    optimal_reg = reg_list[best_params["reg"]]


    neg_weight_list = [True, False]
    optimal_nw = neg_weight_list[best_params["neg_weight"]]


    # run the training and evaluation for the fixed version + the best other parameters
    pop_biases_lkpy, metrics_dict_lkpy = modelling_mf.train_algorithm(algorithm = algorithm_lkpy(features=optimal_features,
                                                                                                 reg=optimal_reg,
                                                                                                 neg_weight=optimal_nw,
                                                                                                epochs=50,
                                                                                                ),
                                                    algo_name = algo_name,  
                                                    ratings = ratings,
                                                    evaluation_way = evaluation_way,
                                                    verbose = verbose, 
                                                    n=nr_recs,
                                                    sampling_strategy = sampling_strategy,
                                                    partition_way = partition_way,
                                                    plot = plot,
                                                data_strategy=data_strategy,
                                                args=args,
                                                save_plot=save_plot)

    # Save metrics!
    with open('experimental_results/'+algo_name+'/'+data_strategy+'_'+str(args)+'.pkl', 'wb') as f:
        pickle.dump(metrics_dict_lkpy, f)

## Optimize, train, evaluate Cornac
- **Algorithm**
- **Fixed parameters**
- **To-optimize parameters**

In [None]:
mapping_dict = {} # Create a dictionary that maps each item to an integer - necessary for Cornac.
i=0
for mov in ratings[item_col].unique():
    mapping_dict[mov] = i
    i+=1
ratings[item_col] = ratings[item_col].map(lambda x: mapping_dict.get(x,x)) # Map in the ratings file

In [None]:
algo_versions = {"CornacBPR":[{"bias":True},
                       {"bias":False}]
                }

In [None]:
evaluation_way = "cross_validation"
verbose = True
plot = True
save_plot = True # save the plots
fallback = False
nr_recs = 10
sampling_strategy = "frac"
partition_way = "user"

In [None]:
# choose algorithm
algorithm_cornac = bpr.BPR
algo_name = "CornacBPR"
versions = algo_versions[algo_name]



# for every 'fixed' version of the algorithm
for args in versions:
    print(data_strategy, args)
    
    p = 'best_parameters/'+algo_name+'/'+data_strategy+'_'+str(args)+'.pkl'
    if os.path.isfile(p):
        print('We got them already')
        with open(p, 'rb') as f:
            best_params = pickle.load(f)
    else:


        print('We have to compute them now')
        # optimize for this fixed version
        best_params = optimize_cornac(ratings=ratings, algorithm_name=algo_name, args=args, max_evals=20)
    
        # save the best parameters for this fixed version
    
        with open('best_parameters/'+algo_name+'/'+data_strategy+'_'+str(args)+'.pkl', 'wb') as f:
            pickle.dump(best_params, f)

    
    optimal_k = best_params["k"]
    optimal_reg = best_params["lambda_reg"]
    optimal_lr = best_params["learning_rate"]

    

    # run the training and evaluation for the fixed version + the best other parameters
    pop_biases_cornac, metrics_dict_cornac = modelling_mf.train_algorithm_cornac(algorithm = algorithm_cornac(k=optimal_k,
                                                            
                                                            lambda_reg=optimal_reg,
                                                            learning_rate=optimal_lr,
                                                            
                                                            use_bias=args['bias']),algo_name = algo_name,  ratings = ratings, evaluation_way = evaluation_way,
                                                    verbose = verbose, 
                                                    n=nr_recs,
                                                    sampling_strategy = sampling_strategy,
                                                    partition_way = partition_way,
                                                    plot = plot,
                                                data_strategy=data_strategy,
                                                args=args,
                                                save_plot=save_plot)

    # Save metrics!
    with open('experimental_results/'+algo_name+'/'+data_strategy+'_'+str(args)+'.pkl', 'wb') as f:
        pickle.dump(metrics_dict_cornac, f)