#Evaluation Methods
In this notebook, I implemented the necessary methods to evaluate the performance of the models.

In [None]:
from google.colab import drive
drive.mount('/content/drive')
path='/content/drive/MyDrive/RecSys_206894495'

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import json

In [None]:
def evaluate_model(model, model_csr, user_index, n=10):
    user_items = model_csr[user_index]
    recommendations = model.recommend(user_index, user_items, N=n)[0]
    return recommendations

def get_outfit_id_from_index(outfit_indexes, outfit_dict):
    return [outfit_dict[idx] for idx in outfit_indexes]

In [None]:
# Evaluate the mectics at n for a single user

#Hit Rate at n - Hit rate at n is the proportion of users for whom the correct item appears in the top n predictions. It is calculated as the number of hits divided by the total number of users.
def evaluate_hit_rate_at_n(test_id, predicted_ids, n=10):
  if predicted_ids is np.nan:
    return 0
  if test_id is np.nan:
    return 0
  if len(predicted_ids) == 0:
    return 0
  if len(test_id) == 0:
    return 0
  predicted_ids = predicted_ids[:n]
  if type(test_id) == str or type(test_id) == np.str_:
    if test_id in predicted_ids:
      return 1
  elif type(test_id) == list or type(test_id) == np.ndarray:
    for outfit_id in test_id:
      if outfit_id in predicted_ids:
        return 1
  else:
    raise ValueError(f"Unknown type {type(test_id)}")
  return 0
### Precision at n - Precision at n is the number of relevant items in the top n predictions divided by n.

def evaluate_precision_at_n(test_id, predicted_ids, n=10):
  if predicted_ids is np.nan:
    return 0
  if test_id is np.nan:
    return 0
  if len(predicted_ids) == 0:
    return 0
  if len(test_id) == 0:
    return 0
  predicted_ids = predicted_ids[:n]
  if type(test_id) == str or type(test_id) == np.str_:
    return 1 if test_id in predicted_ids else 0
  elif type(test_id) == list or type(test_id) == np.ndarray:
    relevant_items = sum(1 for item in test_id if item in predicted_ids)
    return relevant_items / n
  else:
    raise ValueError(f"Unknown type {type(test_id)}")

### Recall at n - Recall at n is the number of relevant items in the top n predictions divided by the number of relevant items.

def evaluate_recall_at_n(test_id, predicted_ids, n=10):
  if predicted_ids is np.nan:
    return 0
  if test_id is np.nan:
    return 0
  if len(predicted_ids) == 0:
    return 0
  if len(test_id) == 0:
    return 0
  predicted_ids = predicted_ids[:n]
  if type(test_id) == str or type(test_id) == np.str_:
    return 1 if test_id in predicted_ids else 0
  elif type(test_id) == list or type(test_id) == np.ndarray:
    relevant_items = sum(1 for item in test_id if item in predicted_ids)
    if len(test_id) == 0:
      return 0
    return relevant_items / len(test_id)
  else:
    raise ValueError(f"Unknown type {type(test_id)}")

### F1 Score at n - F1 score is the harmonic mean of precision and recall.

def evaluate_f1_score_at_n(test_id, predicted_ids, n=10):
  if predicted_ids is np.nan:
    return 0
  if test_id is np.nan:
    return 0
  if len(predicted_ids) == 0:
    return 0
  if len(test_id) == 0:
    return 0
  precision = evaluate_precision_at_n(test_id, predicted_ids, n)
  recall = evaluate_recall_at_n(test_id, predicted_ids, n)
  if precision + recall == 0:
    return 0
  return 2 * (precision * recall) / (precision + recall)



In [None]:
# Function to evaluate all metrics and save results to a Parquet file
def evaluate_df_metrics_at_n(df, method_name, n=10):
    METRIC_COLUMNS = ["id_hit_rate_at_10", "id_precision_at_10", "id_recall_at_10", "id_f1_score_at_10",
                      "group_hit_rate_at_10", "group_precision_at_10", "group_recall_at_10", "group_f1_score_at_10"]
    df["id_hit_rate_at_10"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["test_outfit_ids"], x["id_prediction"], n=10), axis=1)
    df["id_precision_at_10"] = df.apply(lambda x: evaluate_precision_at_n(x["test_outfit_ids"], x["id_prediction"], n=10), axis=1)
    df["id_recall_at_10"] = df.apply(lambda x: evaluate_recall_at_n(x["test_outfit_ids"], x["id_prediction"], n=10), axis=1)
    df["id_f1_score_at_10"] = df.apply(lambda x: evaluate_f1_score_at_n(x["test_outfit_ids"], x["id_prediction"], n=10), axis=1)

    df["group_hit_rate_at_10"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["test_group"], x["group_prediction"], n=10), axis=1)
    df["group_precision_at_10"] = df.apply(lambda x: evaluate_precision_at_n(x["test_group"], x["group_prediction"], n=10), axis=1)
    df["group_recall_at_10"] = df.apply(lambda x: evaluate_recall_at_n(x["test_group"], x["group_prediction"], n=10), axis=1)
    df["group_f1_score_at_10"] = df.apply(lambda x: evaluate_f1_score_at_n(x["test_group"], x["group_prediction"], n=10), axis=1)

    result_dict = {column: df[column].mean() for column in METRIC_COLUMNS}
    result_dict['method_name'] = [method_name]
    result_df = pd.DataFrame(result_dict)

    display(result_df.T)

    # Load existing xlsx file if it exists
    try:
        existing_df = pd.read_excel(path+"/models/results.xlsx")
        # Check if the method name already exists in the file
        if method_name in existing_df['method_name'].values:
            # Overwrite the existing row with the new results
            existing_df.loc[existing_df['method_name'] == method_name, :] = result_df.iloc[0].values
        else:
            # Append the new results to the existing file
            existing_df = pd.concat([existing_df, result_df], ignore_index=True)

        # Save the updated DataFrame back to the Parquet file
        existing_df.to_excel(path+"/models/results.xlsx",index=False)

    except FileNotFoundError:
        # If the file does not exist, create a new DataFrame and save it to a Parquet file
        result_df.to_excel(path+"/models/results.xlsx",index=False)

    return df, result_dict

In [None]:
def evaluate_val_metrics_at_n(df, method_name, n=10, model_params=None):
    if model_params is None:
        model_params = {}

    METRIC_COLUMNS = [f"id_hit_rate_at_{n}", f"id_precision_at_{n}", f"id_recall_at_{n}", f"id_f1_score_at_{n}",
                      f"group_hit_rate_at_{n}", f"group_precision_at_{n}", f"group_recall_at_{n}", f"group_f1_score_at_{n}"]

    df[f"id_hit_rate_at_{n}"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)
    df[f"id_precision_at_{n}"] = df.apply(lambda x: evaluate_precision_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)
    df[f"id_recall_at_{n}"] = df.apply(lambda x: evaluate_recall_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)
    df[f"id_f1_score_at_{n}"] = df.apply(lambda x: evaluate_f1_score_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)

    df[f"group_hit_rate_at_{n}"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)
    df[f"group_precision_at_{n}"] = df.apply(lambda x: evaluate_precision_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)
    df[f"group_recall_at_{n}"] = df.apply(lambda x: evaluate_recall_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)
    df[f"group_f1_score_at_{n}"] = df.apply(lambda x: evaluate_f1_score_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)

    result_dict = {column: df[column].mean() for column in METRIC_COLUMNS}
    result_dict['method_name'] = method_name
    result_dict['model_params'] = model_params  # Include model parameters in the result
    result_df = pd.DataFrame([result_dict])


    # Save the result to a file named 'val'
    file_path = path + "/models/val.json"

    # Read the existing JSON file
    with open(file_path, 'r') as file:
        existing_data = json.load(file)

    # Ensure existing_data is a list
    if not isinstance(existing_data, list):
        existing_data = [existing_data]

    # Append new objects to the list
    existing_data.append(result_dict)

    # Write the updated dictionary back to the file
    with open(file_path, 'w') as file:
        json.dump(existing_data, file)

    return df, result_dict


In [None]:
def evaluate_val_metrics_at_n_outfit(df, method_name, n=10, model_params=None):
    if model_params is None:
        model_params = {}

    METRIC_COLUMNS = [f"id_hit_rate_at_{n}", f"id_precision_at_{n}", f"id_recall_at_{n}", f"id_f1_score_at_{n}"]

    df[f"id_hit_rate_at_{n}"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)
    df[f"id_precision_at_{n}"] = df.apply(lambda x: evaluate_precision_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)
    df[f"id_recall_at_{n}"] = df.apply(lambda x: evaluate_recall_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)
    df[f"id_f1_score_at_{n}"] = df.apply(lambda x: evaluate_f1_score_at_n(x["val_outfit_ids"], x["id_prediction"], n=n), axis=1)
    result_dict = {column: df[column].mean() for column in METRIC_COLUMNS}
    result_dict['method_name'] = method_name
    result_dict['model_params'] = model_params  # Include model parameters in the result
    result_df = pd.DataFrame([result_dict])


    # Save the result to a file named 'val'
    file_path = path + "/models/val_outfit.json"

    # Read the existing JSON file
    with open(file_path, 'r') as file:
        existing_data = json.load(file)

    # Ensure existing_data is a list
    if not isinstance(existing_data, list):
        existing_data = [existing_data]

    # Append new objects to the list
    existing_data.append(result_dict)

    # Write the updated dictionary back to the file
    with open(file_path, 'w') as file:
        json.dump(existing_data, file)

    return df, result_dict


In [None]:
def evaluate_val_metrics_at_n_group(df, method_name, n=10, model_params=None):
    if model_params is None:
        model_params = {}

    METRIC_COLUMNS = [f"group_hit_rate_at_{n}", f"group_precision_at_{n}", f"group_recall_at_{n}", f"group_f1_score_at_{n}"]

    df[f"group_hit_rate_at_{n}"] = df.apply(lambda x: evaluate_hit_rate_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)
    df[f"group_precision_at_{n}"] = df.apply(lambda x: evaluate_precision_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)
    df[f"group_recall_at_{n}"] = df.apply(lambda x: evaluate_recall_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)
    df[f"group_f1_score_at_{n}"] = df.apply(lambda x: evaluate_f1_score_at_n(x["val_group"], x["group_prediction"], n=n), axis=1)

    result_dict = {column: df[column].mean() for column in METRIC_COLUMNS}
    result_dict['method_name'] = method_name
    result_dict['model_params'] = model_params  # Include model parameters in the result
    result_df = pd.DataFrame([result_dict])


    # Save the result to a file named 'val'
    file_path = path + "/models/val_group.json"

    # Read the existing JSON file
    with open(file_path, 'r') as file:
        existing_data = json.load(file)

    # Ensure existing_data is a list
    if not isinstance(existing_data, list):
        existing_data = [existing_data]

    # Append new objects to the list
    existing_data.append(result_dict)

    # Write the updated dictionary back to the file
    with open(file_path, 'w') as file:
        json.dump(existing_data, file)

    return df, result_dict


In [None]:
def retrieve_best_model_params_from_file_outfit( method_name, n):
    file_path_outfit = path + "/models/val_outfit.json"
    with open(file_path_outfit, 'r') as file:
        results = json.load(file)
    best_hit_rate = -1
    for result in results:
      if isinstance(result, dict):
          if result:
            if result['method_name'] == method_name:
                if result[f"id_hit_rate_at_{n}"] > best_hit_rate:
                    best_hit_rate = result[f"id_hit_rate_at_{n}"]
                    best_params = result['model_params']
      else:
          print(f"Unexpected type: {type(result)}")
    return best_params

In [None]:
def retrieve_best_model_params_from_file_group( method_name, n):
    file_path_group = path + "/models/val_group.json"
    with open(file_path_group, 'r') as file:
        results = json.load(file)
    best_hit_rate = -1
    for result in results:
      if isinstance(result, dict):
          if result:
            if result['method_name'] == method_name:
                if result[f"group_hit_rate_at_{n}"] > best_hit_rate:
                    best_hit_rate = result[f"group_hit_rate_at_{n}"]
                    best_params = result['model_params']
      else:
          print(f"Unexpected type: {type(result)}")
    return best_params

In [None]:
"""empty_dict = {}
with open(path + "/models/val.json", 'w') as json_file:
    json.dump(empty_dict, json_file)
with open(path + "/models/val_outfit.json", 'w') as json_file:
    json.dump(empty_dict, json_file)
with open(path + "/models/val_group.json", 'w') as json_file:
    json.dump(empty_dict, json_file)

print('empty json')"""

empty json
