# Pointwise Evaluation

In this notebook, pointwise algorithms can be evaluated.
To start an experiment, define it using the following parameters:

<b>name</b>: Name of the experiment <br>
<b>model</b>: The model to use (Possible choices are nbg, lr, svm, dt, rf, ada, gb) <br>
<b>pca</b>: PCA components for dimensionality reduction (None with 0) <br>
<b>search_space</b>: Values to use in bayesian optimization (Optional) <br>
<b>trials</b>: Number of hyperparameter optimization trials (Optional)

### Imports

In [1]:
import os
import sys
from skopt.space import Integer
from skopt.space import Real
from skopt.space import Categorical
sys.path.append(os.path.dirname((os.path.abspath(""))))

In [2]:
from src.pipeline import Pipeline

[nltk_data] Downloading package punkt to /Users/tim/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/tim/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/tim/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/tim/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [3]:
pipeline = Pipeline(
    collection='data/processed/30_5000_1000_collection.pkl',
    queries='data/processed/30_5000_1000_queries.pkl',
    queries_val='data/processed/30_5000_1000_queries_val.pkl',
    queries_test='data/processed/30_5000_1000_queries_test.pkl',
    features='data/processed/30_5000_1000_features.pkl',
    qrels_val='data/processed/30_5000_1000_qrels_val.pkl',
    qrels_test='data/processed/30_5000_1000_qrels_test.pkl',
    features_test='data/processed/30_5000_1000_features_test.pkl',
    features_val='data/processed/30_5000_1000_features_val.pkl',
)

<b>!</b> Run the next line only for feature selection

In [6]:
feature_selection_result = ['bert_cosine', 'bm25', 'char_query', 'tfidf_manhattan', 'w2v_tfidf_manhattan', 'words_difference', 'query_nouns', 'query_adjectives', 'jaccard', 'w2v_cosine', 'query_verbs', 'doc_nouns', 'polarity_doc', 'words_rel_difference']
pipeline.features = pipeline.features[['qID', 'pID', 'y'] + feature_selection_result]
pipeline.features_test = pipeline.features_test[['qID', 'pID'] + feature_selection_result]
pipeline.features_val = pipeline.features_val[['qID', 'pID'] + feature_selection_result]

In [7]:
pipeline.features

Unnamed: 0,qID,pID,y,bert_cosine,bm25,char_query,tfidf_manhattan,w2v_tfidf_manhattan,words_difference,query_nouns,query_adjectives,jaccard,w2v_cosine,query_verbs,doc_nouns,polarity_doc,words_rel_difference
0,603195,7050012,1,0.899372,-24.655536,26,4.264462,22.236694,57.0,3,1,0.107143,0.972107,1,23,0.000000,9.142857
1,474183,325505,1,0.880772,-33.129796,27,2.659753,11.347487,44.0,4,0,0.200000,0.971866,0,18,0.450000,12.000000
2,320545,1751825,1,0.582573,-16.699603,18,3.666979,17.352688,69.0,2,1,0.133333,0.947701,1,20,0.500000,10.857143
3,89798,5069949,1,0.731121,-27.678576,35,5.243465,13.493497,80.0,3,1,0.117647,0.972710,0,25,0.066667,17.000000
4,1054603,2869106,1,0.774794,-28.497519,28,4.997087,14.369308,69.0,2,2,0.142857,0.965680,1,20,0.000000,12.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,128401,6127598,0,0.100652,-8.866170,18,5.340976,24.841764,63.0,2,1,0.038462,0.796978,0,16,-0.520833,22.000000
4996,1044540,4616118,0,0.476531,-7.852468,18,7.170364,17.812756,88.0,0,0,0.021739,0.922095,1,25,0.156250,30.333333
4997,486146,1137390,0,0.457971,-15.909103,32,4.112665,34.815641,38.0,2,0,0.090909,0.946438,2,12,-0.100000,8.600000
4998,532697,5161847,0,0.374877,-16.617979,32,4.341613,16.122506,55.0,3,1,0.095238,0.938939,0,18,0.284375,12.000000


### Logistic Regression

In [8]:
pipeline.evaluate(
    name='default_fs',
    model='lr', 
    pca=0
)

MRR: 0.04032739838075125


##### With Hyperparameter Optimization

In [20]:
logistic_regression_search_space: list = []
logistic_regression_search_space.append(Categorical(['l2', 'none'], name='penalty'))
logistic_regression_search_space.append(Real(0.1, 100.0, 'log-uniform', name='C'))
logistic_regression_search_space.append(Real(1e-6, 0.1, name='tol'))

In [21]:
pipeline.evaluate(
    name='hpo',
    model='lr', 
    pca=0, 
    search_space=logistic_regression_search_space,
    trials=80
)



Best MRR: 0.017502758796491415
Best Hyperparameters: ['l2', 0.17050983070448095, 0.010431082410692452]
MRR on test set: 0.04032525818680413


### Naive Bayes

In [9]:
pipeline.evaluate(
    name='default_fs',
    model='nbg', 
    pca=0
)

MRR: 0.03190777315304822


### Multi-layer Perceptron

In [10]:
pipeline.evaluate(
    name='default_fs',
    model='mlp', 
    pca=0
)

MRR: 0.03415704196147883


##### With Hyperparameter Optimization

In [22]:
mlp_search_space: list = []
mlp_search_space.append(Categorical(['identity', 'logistic', 'tanh', 'relu'], name='activation'))
mlp_search_space.append(Real(1e-6, 0.1, name='alpha'))
mlp_search_space.append(Real(1e-6, 0.1, name='learning_rate_init'))

In [23]:
pipeline.evaluate(
    name='hpo',
    model='mlp', 
    pca=0,
    search_space=mlp_search_space,
    trials=80
)



Best MRR: 0.021389911903328847
Best Hyperparameters: ['relu', 0.06182287613368023, 0.088131546212749]
MRR on test set: 0.04008374381599637


### Support Vector Machine

In [11]:
pipeline.evaluate(
    name='default_fs',
    model='svm', 
    pca=0
)

MRR: 0.03290961769836377


##### With Hyperparameter Optimization

In [18]:
svm_search_space: list = []
svm_search_space.append(Categorical(['poly', 'rbf', 'sigmoid'], name='kernel'))
svm_search_space.append(Real(0.1, 100.0, name='C'))

In [19]:
pipeline.evaluate(
    name='hpo',
    model='svm', 
    pca=0,
    search_space=svm_search_space,
    trials=10
)

Best MRR: 0.006591189472904975
Best Hyperparameters: ['poly', 96.43409248718199]
MRR on test set: 0.02065391377002617


### Decision Tree

In [12]:
pipeline.evaluate(
    name='default_fs',
    model='dt', 
    pca=0
)

MRR: 7.715325837812955e-05


##### With Hyperparameter Optimization

In [24]:
decision_tree_search_space: list = []
decision_tree_search_space.append(Categorical(['gini', 'entropy'], name='criterion'))
decision_tree_search_space.append(Integer(2, 15, name='min_samples_split'))
decision_tree_search_space.append(Integer(1, 10, name='min_samples_leaf'))
decision_tree_search_space.append(Integer(5, 100, name='max_leaf_nodes'))
decision_tree_search_space.append(Integer(10, 50, name='max_depth'))
decision_tree_search_space.append(Real(0.0, 0.2, name='min_weight_fraction_leaf'))

In [27]:
pipeline.evaluate(
    name='hpo',
    model='dt', 
    pca=0,
    search_space=decision_tree_search_space,
    trials=80
)

Best MRR: 0.014240579043972797
Best Hyperparameters: ['gini', 8, 1, 54, 50, 0.09589400710794578]
MRR on test set: 0.031025164291920405


### Random Forest

In [13]:
pipeline.evaluate(
    name='default_fs',
    model='rf', 
    pca=0
)

MRR: 0.01848330416627482


##### With Hyperparameter Optimization

In [31]:
random_forest_search_space: list = []
random_forest_search_space.append(Integer(20, 200, name='n_estimators'))
random_forest_search_space.append(Categorical(['gini', 'entropy'], name='criterion'))
random_forest_search_space.append(Integer(2, 15, name='min_samples_split'))
random_forest_search_space.append(Integer(1, 10, name='min_samples_leaf'))
random_forest_search_space.append(Integer(5, 100, name='max_leaf_nodes'))
random_forest_search_space.append(Integer(10, 50, name='max_depth'))
random_forest_search_space.append(Real(0.0, 0.2, name='min_weight_fraction_leaf'))

In [None]:
pipeline.evaluate(
    name='hpo',
    model='rf', 
    pca=0,
    search_space=random_forest_search_space,
    trials=80
)

### Adaptive Boosting

In [14]:
pipeline.evaluate(
    name='default_fs',
    model='ada', 
    pca=0
)

MRR: 0.03280269179184586


##### With Hyperparameter Optimization

In [None]:
ada_boost_search_space: list = []
ada_boost_search_space.append(Integer(20, 200, name='n_estimators'))
ada_boost_search_space.append(Real(1e-2, 2.0, name='learning_rate'))

In [None]:
pipeline.evaluate(
    name='hpo',
    model='ada', 
    pca=0,
    search_space=ada_boost_search_space,
    trials=80
)

### Gradient Boosting

In [15]:
pipeline.evaluate(
    name='default_fs',
    model='gb', 
    pca=0
)

MRR: 0.019908559703379892


##### With Hyperparameter Optimization

### Results

In [29]:
import pandas as pd
from src.utils.utils import load

results = load('data/results/results.pkl')

##### Default Settings

In [15]:
results[results['name'] == 'default']

Unnamed: 0,name,model,hyperparameters,pairwise_model,pairwise_k,features,sampling_training,sampling_test,pca,MRR,MAP,nDCG,accuracy,precision,recall,f1,accuracy@50,precision@50,recall@50,f1@50
432,default,LogisticRegression(random_state=42),"{'C': 1.0, 'class_weight': None, 'dual': False...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.03806,0.86509,0.859446,0.997135,0.993724,0.268969,0.423351,0.690667,0.993289,0.490608,0.656805
433,default,GaussianNB(),"{'priors': None, 'var_smoothing': 1e-09}",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.036072,0.874882,0.875631,0.997173,0.995943,0.278029,0.434706,0.718,0.995851,0.532741,0.694143
434,default,MLPClassifier(random_state=42),"{'activation': 'relu', 'alpha': 0.0001, 'batch...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.020241,0.559354,0.73408,0.996413,0.574642,0.318233,0.409621,0.676667,0.610231,0.752252,0.67384
435,default,"SVC(probability=True, random_state=42)","{'C': 1.0, 'break_ties': False, 'cache_size': ...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.031905,0.775297,0.826087,0.997208,0.994129,0.287656,0.446201,0.748667,0.993776,0.561547,0.717603
436,default,DecisionTreeClassifier(random_state=42),"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.004835,0.327402,0.574378,0.996838,0.707617,0.326161,0.446512,0.848,0.698539,0.998102,0.821875
437,default,RandomForestClassifier(random_state=42),"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.01807,0.660944,0.794175,0.997419,0.991803,0.342582,0.509259,0.859333,0.992895,0.729765,0.841234
438,default,AdaBoostClassifier(random_state=42),"{'algorithm': 'SAMME.R', 'base_estimator': Non...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.032657,0.827516,0.854278,0.997219,0.992278,0.291053,0.450088,0.718667,0.991667,0.532438,0.692868
439,default,GradientBoostingClassifier(random_state=42),"{'ccp_alpha': 0.0, 'criterion': 'friedman_mse'...",,,"[""w2v_cosine"", ""w2v_euclidean"", ""w2v_manhattan...",9977,451680,0,0.023998,0.723875,0.826181,0.99725,0.992481,0.298981,0.45953,0.774667,0.991903,0.59466,0.743551


##### Feature Selection and Default

In [17]:
results[results['name'] == 'default_fs']

Unnamed: 0,name,model,hyperparameters,pairwise_model,pairwise_k,features,sampling_training,sampling_test,pca,MRR,MAP,nDCG,accuracy,precision,recall,f1,accuracy@50,precision@50,recall@50,f1@50
440,default_fs,LogisticRegression(random_state=42),"{'C': 1.0, 'class_weight': None, 'dual': False...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.040327,0.883309,0.872551,0.997186,0.995992,0.281427,0.438852,0.696667,0.995662,0.503289,0.668609
441,default_fs,GaussianNB(),"{'priors': None, 'var_smoothing': 1e-09}",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.031908,0.861935,0.865709,0.99698,0.995074,0.228766,0.372007,0.667333,0.995025,0.445931,0.615858
442,default_fs,MLPClassifier(random_state=42),"{'activation': 'relu', 'alpha': 0.0001, 'batch...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.034157,0.760575,0.834966,0.997306,0.958264,0.325028,0.485412,0.776,0.962162,0.628975,0.760684
443,default_fs,"SVC(probability=True, random_state=42)","{'C': 1.0, 'break_ties': False, 'cache_size': ...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.03291,0.799849,0.839896,0.997257,0.992523,0.30068,0.461538,0.741333,0.991935,0.561644,0.717201
444,default_fs,DecisionTreeClassifier(random_state=42),"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,7.7e-05,0.411414,0.63484,0.997308,0.935127,0.334655,0.492911,0.973333,0.933444,1.0,0.965577
445,default_fs,RandomForestClassifier(random_state=42),"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.018483,0.61713,0.774883,0.997396,0.994966,0.335787,0.502117,0.885333,0.994575,0.764951,0.86478
446,default_fs,AdaBoostClassifier(random_state=42),"{'algorithm': 'SAMME.R', 'base_estimator': Non...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.032803,0.828655,0.857594,0.997303,0.992806,0.312571,0.475452,0.744667,0.994231,0.576366,0.729711
447,default_fs,GradientBoostingClassifier(random_state=42),"{'ccp_alpha': 0.0, 'criterion': 'friedman_mse'...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.019909,0.677873,0.814548,0.997314,0.99287,0.315402,0.478728,0.831333,0.992366,0.676203,0.804331


##### Feature Selection and Hyperparameter Optimization

In [30]:
results[results['name'] == 'hpo']

Unnamed: 0,name,model,hyperparameters,pairwise_model,pairwise_k,features,sampling_training,sampling_test,pca,MRR,MAP,nDCG,accuracy,precision,recall,f1,accuracy@50,precision@50,recall@50,f1@50
448,hpo,"LogisticRegression(C=0.17050983070448095, rand...","{'C': 0.17050983070448095, 'class_weight': Non...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.040325,0.883308,0.872752,0.997186,0.995992,0.281427,0.438852,0.696667,0.995662,0.503289,0.668609
449,hpo,"MLPClassifier(alpha=0.06182287613368023, learn...","{'activation': 'relu', 'alpha': 0.061822876133...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.040084,0.887354,0.87382,0.997151,0.995859,0.272367,0.427746,0.69,0.995614,0.495093,0.661326
451,hpo,"DecisionTreeClassifier(max_depth=50, max_leaf_...","{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",,,"[""bert_cosine"", ""bm25"", ""char_query"", ""tfidf_m...",9977,451680,0,0.031025,0.838926,0.852904,0.997591,0.992733,0.38675,0.556642,0.808,0.993569,0.685144,0.811024


##### Principal Component Analysis

In [19]:
results[results['name'] == 'default_pca']

Unnamed: 0,name,model,hyperparameters,pairwise_model,pairwise_k,features,sampling_training,sampling_test,pca,MRR,MAP,nDCG,accuracy,precision,recall,f1,accuracy@50,precision@50,recall@50,f1@50
