# Evaluation: Pearson similarity and ITR similarity

In [1]:
import pandas as pd
import numpy as np
from tabulate import tabulate
import csv

## Load Data

In [2]:
ratings_df = pd.read_csv('../dataset/ratings.csv')

In [3]:
# user-item matrix
user_item_matrix = ratings_df.pivot_table(index='userId', columns='movieId', values='rating')
user_item_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,,,,,,2.5,,,,...,,,,,,,,,,
607,4.0,,,,,,,,,,...,,,,,,,,,,
608,2.5,2.0,2.0,,,,,,,4.0,...,,,,,,,,,,
609,3.0,,,,,,,,,4.0,...,,,,,,,,,,


## Ratings user 1

In [4]:
rated_items = user_item_matrix.loc[1].dropna()
ratings = []
for rating in rated_items:
    ratings.append(rating)


## Prediction accuracy of Predict_rating without abs and considering all users in the prediction formula 

In [5]:
from utils.predict_ratings import predict_rating, predict_rating_with_abs
from utils.similarity_metrics import compute_user_similarity_with_ITR_all_users,compute_user_similarity_with_pearson_correlation_all_users

In [6]:
similarities_pearson_correlation = compute_user_similarity_with_pearson_correlation_all_users(user_item_matrix)
similarities_ITR_correlation = compute_user_similarity_with_ITR_all_users(user_item_matrix)

In [8]:
results = []
i = 0
score_ps = 0
score_itr = 0
for item_id in rated_items.index:
    true_value = ratings[i]
    i += 1
    # Prediction with Pearson similarity
    predicted_rating_pearson = predict_rating(1, item_id, user_item_matrix, similarities_pearson_correlation,0)
    # Prediction with ITR similarity
    predicted_rating_itr = predict_rating(1, item_id, user_item_matrix, similarities_ITR_correlation,0)
    
    results.append([item_id,true_value, predicted_rating_pearson, predicted_rating_itr])

    diff_pearson = abs(predicted_rating_pearson - true_value)
    diff_itr = abs(predicted_rating_itr - true_value)

    if diff_pearson < diff_itr:
        score_ps += 1
    if diff_itr < diff_pearson:
        score_itr += 1
        
print("Score person similarity: ",score_ps)
print("Score itr similarity: ",score_itr)
print(tabulate(results, headers=["Item ID", "True Value", "Pearson Similarity Prediction", "ITR Similarity Prediction"], tablefmt="grid"))


Score person similarity:  144
Score itr similarity:  87
+-----------+--------------+---------------------------------+-----------------------------+
|   Item ID |   True Value |   Pearson Similarity Prediction |   ITR Similarity Prediction |
|         1 |            4 |                         4.43837 |                     4.65569 |
+-----------+--------------+---------------------------------+-----------------------------+
|         3 |            4 |                         3.35874 |                     4.08764 |
+-----------+--------------+---------------------------------+-----------------------------+
|         6 |            4 |                         4.47425 |                     4.5783  |
+-----------+--------------+---------------------------------+-----------------------------+
|        47 |            5 |                         5.202   |                     4.72959 |
+-----------+--------------+---------------------------------+-----------------------------+
|        50 | 

In [10]:
file_name = "predictions.csv"

# Open the file
with open(file_name, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    #header
    writer.writerow(["Item ID", "Pearson Similarity Prediction", "ITR Similarity Prediction"])
    
    # write results
    for row in results:
        writer.writerow(row)

print(f"Results saved '{file_name}'.")

Results saved 'predictions.csv'.


## Prediction accuracy of Predict_rating without abs and considering most similar users in the prediction formula (neighborhood)

In [12]:
results = []
i = 0
score_ps = 0
score_itr = 0
for item_id in rated_items.index:
    true_value = ratings[i]
    i += 1
    # Prediction with Pearson similarity
    predicted_rating_pearson = predict_rating(1, item_id, user_item_matrix, similarities_pearson_correlation,1)
    # Prediction with ITR similarity
    predicted_rating_itr = predict_rating(1, item_id, user_item_matrix, similarities_ITR_correlation,1)
    
    results.append([item_id,true_value, predicted_rating_pearson, predicted_rating_itr])
    
    diff_pearson = abs(predicted_rating_pearson - true_value)
    diff_itr = abs(predicted_rating_itr - true_value)

    if diff_pearson < diff_itr:
        score_ps += 1
    if diff_itr < diff_pearson:
        score_itr += 1

print("Score person similarity: ",score_ps)
print("Score itr similarity: ",score_itr)

print(tabulate(results, headers=["Item ID", "True Value", "Pearson Similarity Prediction", "ITR Similarity Prediction"], tablefmt="grid"))

Score person similarity:  91
Score itr similarity:  140
+-----------+--------------+---------------------------------+-----------------------------+
|   Item ID |   True Value |   Pearson Similarity Prediction |   ITR Similarity Prediction |
|         1 |            4 |                         4.53151 |                     4.56465 |
+-----------+--------------+---------------------------------+-----------------------------+
|         3 |            4 |                         4.11498 |                     4.20467 |
+-----------+--------------+---------------------------------+-----------------------------+
|         6 |            4 |                         4.56297 |                     4.46715 |
+-----------+--------------+---------------------------------+-----------------------------+
|        47 |            5 |                         4.94213 |                     4.57481 |
+-----------+--------------+---------------------------------+-----------------------------+
|        50 | 

In [14]:
file_name = "predictions_most_similar.csv"

# Open the file
with open(file_name, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    #header
    writer.writerow(["Item ID", "Pearson Similarity Prediction", "ITR Similarity Prediction"])
    
    # write results
    for row in results:
        writer.writerow(row)

print(f"Results saved '{file_name}'.")

Results saved 'predictions_most_similar.csv'.


## Prediction accuracy of Predict_rating with abs and considering all users in the prediction formula

In [15]:
results_2 = []
i = 0
score_ps = 0
score_itr = 0
for item_id in rated_items.index:
    true_value = ratings[i]
    i += 1
    # Prediction with Pearson similarity
    predicted_rating_pearson = predict_rating_with_abs(1, item_id, user_item_matrix, similarities_pearson_correlation,0)
    # Prediction with ITR similarity
    predicted_rating_itr = predict_rating_with_abs(1, item_id, user_item_matrix, similarities_ITR_correlation,0)
    
    results_2.append([item_id,true_value, predicted_rating_pearson, predicted_rating_itr])

    diff_pearson = abs(predicted_rating_pearson - true_value)
    diff_itr = abs(predicted_rating_itr - true_value)

    if diff_pearson < diff_itr:
        score_ps += 1
    if diff_itr < diff_pearson:
        score_itr += 1

print("Score person similarity: ",score_ps)
print("Score itr similarity: ",score_itr)
print(tabulate(results_2, headers=["Item ID", "True Value", "Pearson Similarity Prediction", "ITR Similarity Prediction"], tablefmt="grid"))


Score person similarity:  93
Score itr similarity:  138
+-----------+--------------+---------------------------------+-----------------------------+
|   Item ID |   True Value |   Pearson Similarity Prediction |   ITR Similarity Prediction |
|         1 |            4 |                         4.40474 |                     4.56465 |
+-----------+--------------+---------------------------------+-----------------------------+
|         3 |            4 |                         4.07405 |                     4.20467 |
+-----------+--------------+---------------------------------+-----------------------------+
|         6 |            4 |                         4.39643 |                     4.46715 |
+-----------+--------------+---------------------------------+-----------------------------+
|        47 |            5 |                         4.84033 |                     4.57481 |
+-----------+--------------+---------------------------------+-----------------------------+
|        50 | 

In [16]:
file_name = "predictions2.csv"

# Open the file
with open(file_name, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    #header
    writer.writerow(["Item ID", "Pearson Similarity Prediction", "ITR Similarity Prediction"])
    
    # write results
    for row in results_2:
        writer.writerow(row)

print(f"Results saved '{file_name}'.")


Results saved 'predictions2.csv'.


## Prediction accuracy of Predict_rating with abs and considering most similar users in the prediction formula (neighborhood)

In [9]:
results_2 = []
i = 0
score_ps = 0
score_itr = 0
for item_id in rated_items.index:
    true_value = ratings[i]
    i += 1
    # Prediction with Pearson similarity
    predicted_rating_pearson = predict_rating_with_abs(1, item_id, user_item_matrix, similarities_pearson_correlation,1)
    # Prediction with ITR similarity
    predicted_rating_itr = predict_rating_with_abs(1, item_id, user_item_matrix, similarities_ITR_correlation,1)
    
    results_2.append([item_id,true_value, predicted_rating_pearson, predicted_rating_itr])

    diff_pearson = abs(predicted_rating_pearson - true_value)
    diff_itr = abs(predicted_rating_itr - true_value)

    if diff_pearson < diff_itr:
        score_ps += 1
    if diff_itr < diff_pearson:
        score_itr += 1
        
print("Score person similarity: ",score_ps)
print("Score itr similarity: ",score_itr)
print(tabulate(results_2, headers=["Item ID", "True Value", "Pearson Similarity Prediction", "ITR Similarity Prediction"], tablefmt="grid"))


Score person similarity:  151
Score itr similarity:  80
+-----------+--------------+---------------------------------+-----------------------------+
|   Item ID |   True Value |   Pearson Similarity Prediction |   ITR Similarity Prediction |
|         1 |            4 |                         4.51632 |                     4.65569 |
+-----------+--------------+---------------------------------+-----------------------------+
|         3 |            4 |                         4.15499 |                     4.08764 |
+-----------+--------------+---------------------------------+-----------------------------+
|         6 |            4 |                         4.51547 |                     4.5783  |
+-----------+--------------+---------------------------------+-----------------------------+
|        47 |            5 |                         4.87364 |                     4.72959 |
+-----------+--------------+---------------------------------+-----------------------------+
|        50 | 

In [18]:
file_name = "predictions2_most_similar.csv"

# Open the file
with open(file_name, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    #header
    writer.writerow(["Item ID", "Pearson Similarity Prediction", "ITR Similarity Prediction"])
    
    # write results
    for row in results_2:
        writer.writerow(row)

print(f"Results saved '{file_name}'.")

Results saved 'predictions2_most_similar.csv'.
