In [24]:
%load_ext Cython

The Cython extension is already loaded. To reload it, use:
  %reload_ext Cython


In [27]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
import matplotlib.pyplot as pyplot
import optuna as op
from sklearn.model_selection import ParameterSampler
%matplotlib inline
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout
from Recommenders.SLIM.SLIMElasticNetRecommender import MultiThreadSLIM_SLIMElasticNetRecommender
from Utils.createURM import createURM
from sklearn.model_selection import train_test_split
from Utils.createICM import createICM

### URM

In [7]:
URM = createURM()
#ICM = createICM()

  dataset = pd.read_csv('/Users/matteopancini/PycharmProjects/recsys-challenge-2022-Pancini-Vitali/Input/interactions_and_impressions.csv')


In [8]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM, train_percentage = 0.80)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)



In [34]:
train_x, valid_x, train_y, valid_y = train_test_split(URM, URM, test_size=0.2)

In [26]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 713 ( 1.7%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 319 ( 0.8%) Users that have less than 1 test interactions


In [35]:
train_x

<33303x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 1247245 stored elements in Compressed Sparse Row format>

In [36]:
valid_x

<8326x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 307395 stored elements in Compressed Sparse Row format>

In [39]:
train_y

<33303x24507 sparse matrix of type '<class 'numpy.int64'>'
	with 1247245 stored elements in Compressed Sparse Row format>

In [37]:
 URM_train

<41629x24507 sparse matrix of type '<class 'numpy.float64'>'
	with 994970 stored elements in Compressed Sparse Row format>

In [38]:
URM_validation

<41629x24507 sparse matrix of type '<class 'numpy.float64'>'
	with 248742 stored elements in Compressed Sparse Row format>

### SLIM

In [10]:
recommender = MultiThreadSLIM_SLIMElasticNetRecommender(URM_train, verbose=False)

def objective(trial):
    alpha = trial.suggest_float("alpha", 0.00001, 0.1)
    l1_ratio = trial.suggest_float("l1_ratio", 0.000001, 0.1)
    topK = trial.suggest_float("topK", 300, 400)
    recommender.fit(alpha=alpha, l1_ratio=l1_ratio, topK=int(topK))

    result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

    return result_dict.loc[10]["MAP"]

In [11]:
study = op.create_study(direction='maximize')
study.optimize(objective, n_trials=3)

[32m[I 2022-11-07 09:15:28,235][0m A new study created in memory with name: no-name-6939472b-9618-4b9f-b9bd-e41414cff535[0m
100%|█████████▉| 24504/24507 [03:15<00:00, 125.60it/s]


EvaluatorHoldout: Processed 40916 (100.0%) in 21.42 sec. Users per second: 1910


[32m[I 2022-11-07 09:19:04,849][0m Trial 0 finished with value: 0.02116299829150505 and parameters: {'alpha': 0.024538291519953584, 'l1_ratio': 0.036287650949949526, 'topK': 373.26486191259335}. Best is trial 0 with value: 0.02116299829150505.[0m
100%|█████████▉| 24504/24507 [03:27<00:00, 118.24it/s]


EvaluatorHoldout: Processed 40916 (100.0%) in 22.12 sec. Users per second: 1850


[32m[I 2022-11-07 09:22:54,282][0m Trial 1 finished with value: 0.020910678207151027 and parameters: {'alpha': 0.023341787400459772, 'l1_ratio': 0.02971766556803617, 'topK': 305.696621709463}. Best is trial 0 with value: 0.02116299829150505.[0m
100%|█████████▉| 24504/24507 [04:44<00:00, 86.16it/s] 


EvaluatorHoldout: Processed 40916 (100.0%) in 23.65 sec. Users per second: 1730


[32m[I 2022-11-07 09:28:02,587][0m Trial 2 finished with value: 0.020875175737515114 and parameters: {'alpha': 0.045923373364497255, 'l1_ratio': 0.0011056617555199742, 'topK': 324.9468079191235}. Best is trial 0 with value: 0.02116299829150505.[0m


In [12]:
alpha = study.best_params['alpha']
l1_ratio = study.best_params['l1_ratio']
topK = study.best_params['topK']

In [13]:
alpha

0.024538291519953584

In [14]:
l1_ratio

0.036287650949949526

In [15]:
topK

373.26486191259335

In [16]:
recommender.fit(alpha=alpha, l1_ratio=l1_ratio, topK=int(topK))

result_dict, _ = evaluator_validation.evaluateRecommender(recommender)

100%|█████████▉| 24504/24507 [03:15<00:00, 125.53it/s]


EvaluatorHoldout: Processed 40916 (100.0%) in 21.49 sec. Users per second: 1904


In [48]:
import json

resultParameters = result_dict.to_json(orient="records")
parsed = json.loads(resultParameters)

with open("logs/" + recommender.RECOMMENDER_NAME + "_logs.json", 'w') as json_file:
    json.dump(study.best_params, json_file, indent=4)
    json.dump(parsed, json_file, indent=4)

In [24]:
result_dict.iloc[0]['MAP']

0.02116256670654663

## Course To Fine

In [None]:
grid_size = 100
TUNE_ITER = 10
num_epochs = 2
worse_score = 0

init_param_grid = {'l1_ratio': [i for i in range(0, 1)],
                   'topK': [i for i in range(10, 500)]
                  }

new_param_grid = init_param_grid.copy()
best_params_dict = {'score':worse_score,'params':[]}
tried_params_list = []

for epoch in range(num_epochs):

    # List of sampled hyperparameter combinations will be used for random search
    param_list = list(ParameterSampler(new_param_grid, n_iter=TUNE_ITER,random_state=0))

    # Searching the Best Parameters with Random Search
    rs_results_dict = {}
    for tune_iter in range(TUNE_ITER):
        # Get the set of parameter for this iteration
        strategy_params = param_list[tune_iter]

        recommender = MultiThreadSLIM_SLIMElasticNetRecommender(URM_train)
        recommender.fit(alpha=0.0001, l1_ratio=strategy_params['l1_ratio'], topK=strategy_params['topK'])
        results, _ = evaluator_validation.evaluateRecommender(recommender)
        results = results.loc[10]['MAP']

        rs_results_dict[tuple(strategy_params.values())] = {'score':results}

        if results > best_params_dict['score']:
            best_params_dict['score'] = results
            best_params_dict['params'] = list(strategy_params.values())

    # Save the results in dataframe and sort it based on score
    df_rs_results = pd.DataFrame(rs_results_dict).T.reset_index()
    df_rs_results.columns = list(strategy_params.keys()) + ['score']
    df_rs_results = df_rs_results.sort_values(['score'],ascending=False).head(num_epochs-epoch)

    # If the best score from this epoch is worse than the best score,
    # then append the best hyperaparameters combination to this epoch dataframe
    if df_rs_results['score'].iloc[0] < best_params_dict['score']:
        new_row_dict = {}
        new_row_dict['score'] = best_params_dict['score']
        for idx, key in enumerate(init_param_grid):
            new_row_dict[key] = best_params_dict['params'][idx]

        df_rs_results = df_rs_results.append(new_row_dict,ignore_index=True)
        df_rs_results = df_rs_results.sort_values(['score'],ascending=False).head(num_epochs-epoch)

    display(df_rs_results)
    print(df_rs_results.head(1).T.to_dict())

    # Get the worse and best hyperparameter combinations
    df_rs_results_min = df_rs_results[df_rs_results['score']>worse_score].min(axis=0)
    df_rs_results_max = df_rs_results[df_rs_results['score']>worse_score].max(axis=0)

    # Generate new hyperparameter space based on current worse and best hyperparameter combinations
    for key in init_param_grid:
        if isinstance(init_param_grid[key][0],int):
            new_param_grid[key] = np.unique([i for i in range(int(df_rs_results_min[key]), int(df_rs_results_max[key])+1)])
        elif isinstance(init_param_grid[key][0],float):
            new_param_grid[key] = np.unique(np.linspace(df_rs_results_min[key], df_rs_results_max[key], grid_size))
        else:
            new_param_grid[key] = init_param_grid[key]

    # Decrease the tuning iteration for random search
    TUNE_ITER = int(TUNE_ITER - epoch * TUNE_ITER/num_epochs)

In [None]:
best_params_dict