In [3]:
import matplotlib.pyplot as plt
from sklearn.metrics import PrecisionRecallDisplay
from sklearn.metrics import auc
import numpy as np
import json
import requests
import pandas as pd

QUERY_URL_BASIC = 'http://localhost:8982/solr/reviews/select?fl=score%20*&fq=rating_score%3A%20%5B8.0%20TO%2010.0%5D&indent=true&q.op=OR&q=genre%3Ajazz%5E2%0Areview_content%3Acalm%0Areview_content%3Aquiet%0Areview_content%3Aambient%0Areview_content%3Arelaxing%0Areview_content%3A%22classical%20music%22%5E2%0A!review_content%3A%22violent%22&rows=25'
QUERY_URL_ADVANCED = 'http://localhost:8984/solr/reviews/select?fl=score%20*&fq=rating_score%3A%20%5B8.0%20TO%2010.0%5D&indent=true&q.op=OR&q=genre%3Ajazz%5E2%0Areview_content%3Acalm%0Areview_content%3Aquiet%0Areview_content%3Aambient%0Areview_content%3Arelaxing%0Areview_content%3A%22classical%20music%22%5E2%0A!review_content%3A%22violent%22&rows=25'

QRELS_FILE_BASIC = f'qrels1_basic.txt'
QRELS_FILE_ADVANCED = f'qrels1_advanced.txt'

In [4]:
relevant_basic = list(map(lambda el: [int(el.strip())], open(QRELS_FILE_BASIC).readlines()))
results_basic = requests.get(QUERY_URL_BASIC).json()['response']['docs']

relevant_advanced = list(map(lambda el: [int(el.strip())], open(QRELS_FILE_ADVANCED).readlines()))
results_advanced = requests.get(QUERY_URL_ADVANCED).json()['response']['docs']


In [5]:
metrics = {}
metric = lambda f: metrics.setdefault(f.__name__, f)
print(metric)

<function <lambda> at 0x282a95f80>


In [6]:
@metric
def ap(results, relevant):
    """Average Precision"""
    for doc in results:
        print(doc['reviewid'])
    precision_values = [
        len([
            doc 
            for doc in results[:idx]
            if doc['reviewid'] in relevant
        ]) / idx 
        for idx in range(1, len(results))
    ]
    return sum(precision_values)/len(precision_values)


@metric
def p10(results, relevant, n=10):
    """Precision at N"""
    return len([doc for doc in results[:n] if doc['reviewid'] in relevant])/n

def calculate_metric(key, results, relevant):
    return metrics[key](results, relevant)

# Define metrics to be calculated
evaluation_metrics = {
    'ap': 'Average Precision',
    'p10': 'Precision at 10 (P@10)'
}

In [None]:
# Calculate all metrics and export results as LaTeX table
df_basic = pd.DataFrame([['Metric','Value']] +
    [
        [evaluation_metrics[m], calculate_metric(m, results_basic, relevant_basic)]
        for m in evaluation_metrics
    ]
)

with open(f'results_basic.tex','w') as tf:
    tf.write(df_basic.to_latex())

df_advanced = pd.DataFrame([['Metric','Value']] +
    [
        [evaluation_metrics[m], calculate_metric(m, results_advanced, relevant_advanced)]
        for m in evaluation_metrics
    ]
)

with open(f'results_advanced.tex','w') as tf:
    tf.write(df_advanced.to_latex())

In [None]:
# PRECISION-RECALL CURVE
# Calculate precision and recall values as we move down the ranked list
precision_values_basic = [
    len([
        doc 
        for doc in results_basic[:idx]
        if doc['reviewid'] in relevant_basic
    ]) / idx 
    for idx, _ in enumerate(results_basic, start=1)
]

recall_values_basic = [
    len([
        doc for doc in results_basic[:idx]
        if doc['reviewid'] in relevant_basic
    ]) / len(relevant)
    for idx, _ in enumerate(results_basic, start=1)
]

precision_values_advanced = [
    len([
        doc 
        for doc in results_advanced[:idx]
        if doc['reviewid'] in relevant_advanced
    ]) / idx 
    for idx, _ in enumerate(results_advanced, start=1)
]

recall_values_basic_advanced = [
    len([
        doc for doc in results_advanced[:idx]
        if doc['reviewid'] in relevant_advanced
    ]) / len(relevant)
    for idx, _ in enumerate(results_advanced, start=1)
]

In [None]:
precision_recall_match_basic = {k: v for k,v in zip(recall_values_basic, precision_values_basic)}
precision_recall_match_advanced = {k: v for k,v in zip(recall_values_advanced, precision_values_advanced)}

# Extend recall_values to include traditional steps for a better curve (0.1, 0.2 ...)
recall_values_basic.extend([step for step in np.arange(0.1, 1.1, 0.1) if step not in recall_values_basic])
recall_values_basic = sorted(set(recall_values_basic))

recall_values_advanced.extend([step for step in np.arange(0.1, 1.1, 0.1) if step not in recall_values_advanced])
recall_values_advanced = sorted(set(recall_values_advanced))

In [None]:
for idx, step in enumerate(recall_values_basic):
    if step not in precision_recall_match_basic:
        if recall_values_basic[idx-1] in precision_recall_match_basic:
            precision_recall_match_basic[step] = precision_recall_match_basic[recall_values_basic[idx-1]]
        else:
            precision_recall_match_basic[step] = precision_recall_match_basic[recall_values_basic[idx+1]]

for idx, step in enumerate(recall_values_advanced):
    if step not in precision_recall_match_advanced:
        if recall_values_advanced[idx-1] in precision_recall_match_advanced:
            precision_recall_match_advanced[step] = precision_recall_match_advanced[recall_values_advanced[idx-1]]
        else:
            precision_recall_match_advanced[step] = precision_recall_match_advanced[recall_values_advanced[idx+1]]



In [None]:
disp_basic = PrecisionRecallDisplay([precision_recall_match_basic.get(r) for r in recall_values_basic], recall_values_basic)
disp_advanced = PrecisionRecallDisplay([precision_recall_match_advanced.get(r) for r in recall_values_advanced], recall_values_advanced)

In [None]:
auc_precision_recall_basic = auc(disp.recall_basic,disp.precision_basic)
auc_precision_recall_advanced = auc(disp.recall_advanced,disp.precision_advanced)