# Coarse to fine

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterSampler

import matplotlib.pyplot as pyplot
import optuna as op
%matplotlib inline
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Evaluation.Evaluator import EvaluatorHoldout
from Recommenders.KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
from Utils.createURM import createURM2

In [2]:
URM = createURM2()

  dataset = pd.read_csv('../Input/interactions_and_impressions.csv')


In [3]:
URM_train, URM_test = split_train_in_two_percentage_global_sample(URM, train_percentage = 0.80)
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)



In [4]:
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

EvaluatorHoldout: Ignoring 752 ( 1.8%) Users that have less than 1 test interactions
EvaluatorHoldout: Ignoring 290 ( 0.7%) Users that have less than 1 test interactions


In [5]:
grid_size = 100
TUNE_ITER = 10
num_epochs = 2
worse_score = 0

init_param_grid = {'topK': [i for i in range(10, 500)],
                   'shrink': [i for i in range(10, 100)],
                  }

new_param_grid = init_param_grid.copy()
best_params_dict = {'score':worse_score,'params':[]}
tried_params_list = []

for epoch in range(num_epochs):

    # List of sampled hyperparameter combinations will be used for random search
    param_list = list(ParameterSampler(new_param_grid, n_iter=TUNE_ITER,random_state=0))

    # Searching the Best Parameters with Random Search
    rs_results_dict = {}
    for tune_iter in range(TUNE_ITER):
        # Get the set of parameter for this iteration
        strategy_params = param_list[tune_iter]

        recommender = ItemKNNCFRecommender(URM_train)
        recommender.fit(shrink=strategy_params['shrink'], topK=strategy_params['topK'])
        results, _ = evaluator_validation.evaluateRecommender(recommender)
        results = results.loc[10]['MAP']

        rs_results_dict[tuple(strategy_params.values())] = {'score':results}

        if results > best_params_dict['score']:
            best_params_dict['score'] = results
            best_params_dict['params'] = list(strategy_params.values())

    # Save the results in dataframe and sort it based on score
    df_rs_results = pd.DataFrame(rs_results_dict).T.reset_index()
    df_rs_results.columns = list(strategy_params.keys()) + ['score']
    df_rs_results = df_rs_results.sort_values(['score'],ascending=False).head(num_epochs-epoch)

    # If the best score from this epoch is worse than the best score,
    # then append the best hyperaparameters combination to this epoch dataframe
    if df_rs_results['score'].iloc[0] < best_params_dict['score']:
        new_row_dict = {}
        new_row_dict['score'] = best_params_dict['score']
        for idx, key in enumerate(init_param_grid):
            new_row_dict[key] = best_params_dict['params'][idx]

        df_rs_results = df_rs_results.append(new_row_dict,ignore_index=True)
        df_rs_results = df_rs_results.sort_values(['score'],ascending=False).head(num_epochs-epoch)

    display(df_rs_results)
    print(df_rs_results.head(1).T.to_dict())

    # Get the worse and best hyperparameter combinations
    df_rs_results_min = df_rs_results[df_rs_results['score']>worse_score].min(axis=0)
    df_rs_results_max = df_rs_results[df_rs_results['score']>worse_score].max(axis=0)

    # Generate new hyperparameter space based on current worse and best hyperparameter combinations
    for key in init_param_grid:
        if isinstance(init_param_grid[key][0],int):
            new_param_grid[key] = np.unique([i for i in range(int(df_rs_results_min[key]), int(df_rs_results_max[key])+1)])
        elif isinstance(init_param_grid[key][0],float):
            new_param_grid[key] = np.unique(np.linspace(df_rs_results_min[key], df_rs_results_max[key], grid_size))
        else:
            new_param_grid[key] = init_param_grid[key]

    # Decrease the tuning iteration for random search
    TUNE_ITER = int(TUNE_ITER - epoch * TUNE_ITER/num_epochs)

Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 366.11 column/sec. Elapsed time 1.12 min
EvaluatorHoldout: Processed 40877 (100.0%) in 1.33 min. Users per second: 513
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 316.79 column/sec. Elapsed time 1.29 min
EvaluatorHoldout: Processed 40877 (100.0%) in 1.38 min. Users per second: 492
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 285.31 column/sec. Elapsed time 1.43 min
EvaluatorHoldout: Processed 40877 (100.0%) in 1.26 min. Users per second: 540
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 847.89 column/sec. Elapsed time 28.90 sec
EvaluatorHoldout: Processed 40877 (100.0%) in 27.88 sec. Users per second: 1466
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 993.23 column/sec. Elapsed time 24.67 sec

Unnamed: 0,topK,shrink,score
3,183,53,0.019434
7,187,52,0.019409


{3: {'topK': 183.0, 'shrink': 53.0, 'score': 0.01943409399705082}}
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 811.25 column/sec. Elapsed time 30.21 sec
EvaluatorHoldout: Processed 40877 (100.0%) in 26.33 sec. Users per second: 1553
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 863.05 column/sec. Elapsed time 28.40 sec
EvaluatorHoldout: Processed 40877 (100.0%) in 27.01 sec. Users per second: 1513
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 871.57 column/sec. Elapsed time 28.12 sec
EvaluatorHoldout: Processed 40877 (100.0%) in 26.64 sec. Users per second: 1534
Unable to load Cython Compute_Similarity, reverting to Python
Similarity column 24507 (100.0%), 886.14 column/sec. Elapsed time 27.66 sec
EvaluatorHoldout: Processed 40877 (100.0%) in 26.85 sec. Users per second: 1522
Unable to load Cython Compute_Similarity, reverting to Python

Unnamed: 0,topK,shrink,score
7,185,53,0.01944


{7: {'topK': 185.0, 'shrink': 53.0, 'score': 0.01943996332784615}}


In [6]:
best_params_dict

{'score': 0.01943996332784615, 'params': [185, 53]}