# precision@k

In [None]:
def precision_at_k(expected ,actual,k=40):
    """
    Calculate precision@k.

    Parameters:
    -----------
    expected: list of expected relevant documents, as doc_ids.
    actual: list of the actual documents returned, as doc_ids, sorted by score.
    k: integer, the number of relevant docs to measure.

    Returns:
    precision@k with 3 digits after the decimal point.
    """
    k_actual = actual[:k]
    overlap = set(k_actual).intersection(expected)

    return round(len(overlap) / k, 3)

# recall

In [None]:
def recall(expected ,actual,k=40):
    """
    Calculate recall@k.

    Parameters:
    -----------
    expected: list of expected relevant documents, as doc_ids.
    actual: list of the actual documents returned, as doc_ids, sorted by score.
    k: integer, the number of relevant docs to measure.

    Returns:
    recall@k with 3 digits after the decimal point.
    """
    k_actual = actual[:k]
    overlap = set(k_actual).intersection(expected)

    return round(len(overlap) / len(expected), 3)

# map@k

In [None]:
def map_at_K(expected, actual, k=40):
    """
    Calculate average_precision@k (precision in every recall point).

    Parameters:
    -----------
    expected: list of lists containing expected results for queries, as doc_id.
    actual: list of lists containing sorted actual results for queries, as doc_id, sorted by score. Must be ordered like the expected list.

    Returns:
    -----------
    float, average precision@k with 3 digits after the decimal point.
    """
    if not expected:
        return 0.0
    avg_precision = []
    expected_size = len(expected)
    for i in range(expected_size):
        k_actual = actual[i][:k]
        q_expected = expected[i]
        dic = {doc_id: 0 for doc_id in q_expected}
        sum_result, overlap, current_k = 0.0, 0, 0
        for doc_id in k_actual:
            current_k += 1
            if doc_id in dic:
                overlap += 1
                sum_result += precision_at_k(q_expected, actual[i], current_k)
        avg_precision.append(sum_result / overlap)
    return round(sum(avg_precision) / len(avg_precision), 3)

# plot for map@k

In [None]:
import matplotlib.pyplot as plt

def plot_map_at_K_for_different_k(expected, actual, k):
    """
    Plot map@k.

    Parameters:
    ----------
    expected: list of lists containing expected results for queries, as doc_id.
    actual: list of lists containing sorted actual results for queries, as doc_id, sorted by score.
    k: list of integers of different k.

    Returns:
    ----------
    List of map@k for each k received.
    """
    values = []
    for k_val in k:
      values.append(map_at_K(expected, actual, k_val))
    plt.plot(k, values, color='m', linewidth=3, marker='o', markersize=8)
    plt.xlabel('k')
    plt.ylabel('actual')
    plt.show()
    return values

# plot average time for different versions of the engine

In [None]:
def plot_version_time(times):
    x = [i for i in range(1, len(times) + 1)]
    plt.plot(x, times, color='tan', linewidth=3, marker='o', markersize=8)
    plt.xticks(x)
    plt.xlabel('Engine versions')
    plt.ylabel('Times measurements')
    for a,b in zip(x, times):
      plt.text(a, b, str(b))
    plt.show()

# plot recall@k

In [None]:
def plot_recall_for_different_k(expected, actual, k):
    """
    Plot recall@k.

    Parameters:
    ----------
    expected: list of lists containing expected results for queries, as doc_id.
    actual: list of lists containing sorted actual results for queries, as doc_id, sorted by score.
    k: list of integers of different k.

    Returns:
    ----------
    List of recall@k for each k received.
    """
    values = []
    for k_val in k:
      values.append(recall(expected, actual, k_val))
    plt.plot(k, values, color='m', linewidth=3, marker='o', markersize=8)
    plt.xlabel('k')
    plt.ylabel('actual')
    plt.show()
    return values