In [1]:
import numpy as np
import pandas as pd

from dataset_processing import split_dataset, get_starting_dataset, enrich_dataset, get_movie_map
from similarity import pearson_similarity
from predict import prediction_function
from group import alpha_hybrid_aggregation
from evaluate import user_satisfaction

from datetime import datetime

In [2]:
NUM_ITERATIONS = 5
MAX_NEIGHBORS = 75      # ~ 3*np.sqrt(num_users)
MAX_RECOMMENDATIONS = 10

In [3]:
movie_map = get_movie_map()

In [4]:
# GROUPS
# "Clique" = [493, 26, 57] -> each Similarity between two Users is > 0.5
# "Couple & 3rd Wheel" = [69, 234, 224] -> sim(69, 234) > 0.5, sim(69, 224) < -0.5, sim(234, 224) < -0.5
# "Strangers" = [437, 329, 73] -> each Similarity between two Users is < -0.5

In [43]:
group = [437, 329, 73]

In [56]:
it_counter = 1
alpha = 0
df_user_sat = pd.DataFrame(index=group, columns=["It1", "It2", "It3", "It4", "It5"])
df_user_satO = pd.DataFrame(index=group, columns=["It1", "It2", "It3", "It4", "It5"])
group_disagreements = list()

In [57]:
while it_counter < 6:
    print(f"ITERATION: {it_counter}" + " -> ALPHA = " + str(alpha) + "\n")
    start = datetime.now()

    if it_counter == 1:
        first_half, chunks = split_dataset(NUM_ITERATIONS, 111)
        matrix = get_starting_dataset(first_half)
    else:
        matrix = enrich_dataset(matrix, chunks[it_counter-2])

    # Compute Similarities
    print("First Step: Compute Similarities of each User of the Group")
    list_of_similarities = list()
    for user in group:
        dictionary = dict()

        other_users = [u for u in matrix.index if u != user]
        for u in other_users:
            dictionary[u] = pearson_similarity(matrix, user, u)
        list_of_similarities.append(dictionary)
    
    # Neighborhood (75) Building
    print("Second Step: Neighborhood Building")
    for i in range(0, len(list_of_similarities)):
        similarities = list_of_similarities.pop(i)
        similarities = {k: v for k, v in sorted(similarities.items(), key=lambda item: item[1], reverse=True)}
        similarities = dict(list(similarities.items())[:MAX_NEIGHBORS])
        list_of_similarities.insert(i, similarities)
    
    # Predictions for Each User
    print("Third Step: Predictions for Each User")
    list_of_scores = list()

    for i in range(0, len(group)):
        scores = prediction_function(matrix, group[i], list_of_similarities[i], matrix.shape[1])
        list_of_scores.append(scores)
    
    # Group Recommendation
    print("Fourth Step: Group Recommendation")
    score_matrix = alpha_hybrid_aggregation(alpha, list_of_scores)

    # Evaluation: Group Satisfaction & Group Disagreements
    print("Fifth Step: Evaluation")
    for i, user in enumerate(group):
        sat = user_satisfaction(score_matrix, list_of_scores[i], MAX_RECOMMENDATIONS)
        df_user_sat.at[user, f"It{it_counter}"] = sat
        df_user_satO.at[user, f"It{it_counter}"] = np.mean(df_user_sat.loc[user])
        # print(f"User {user} -> Satisfaction: {sat:.5f}")
    
    group_satO = np.sum(df_user_satO[f'It{it_counter}'])/3
    print("Group Satisfaction Overall: " + str(group_satO))

    group_dis = np.max(df_user_satO[f'It{it_counter}']) - np.min(df_user_satO[f'It{it_counter}'])
    group_disagreements.append(group_dis)
    print("Group Disagreement Overall: " + str(group_dis))

    # Getting ready for the Next Iteration
    alpha = np.max(df_user_sat[f"It{it_counter}"]) - np.min(df_user_sat[f"It{it_counter}"])

    end = datetime.now()
    print(f"END OF ITERATION {it_counter} - Elapsed Time: {end-start}" + "\n")

    it_counter += 1

ITERATION: 1 -> ALPHA = 0

First Step: Compute Similarities of each User of the Group
Second Step: Neighborhood Building
Third Step: Predictions for Each User
Fourth Step: Group Recommendation
Fifth Step: Evaluation
Group Satisfaction Overall: 0.8246767496514659
Group Disagreement Overall: 0.24764010263980718
END OF ITERATION 1 - Elapsed Time: 0:00:33.763409

ITERATION: 2 -> ALPHA = 0.24764010263980718

First Step: Compute Similarities of each User of the Group
Second Step: Neighborhood Building
Third Step: Predictions for Each User
Fourth Step: Group Recommendation
Fifth Step: Evaluation
Group Satisfaction Overall: 0.8293730972734531
Group Disagreement Overall: 0.21274058971099352
END OF ITERATION 2 - Elapsed Time: 0:00:28.664128

ITERATION: 3 -> ALPHA = 0.17784107678217975

First Step: Compute Similarities of each User of the Group
Second Step: Neighborhood Building
Third Step: Predictions for Each User
Fourth Step: Group Recommendation
Fifth Step: Evaluation
Group Satisfaction Overa

# First Execution (seed == 24) Outcomes

In [46]:
# User Satisfaction
df_user_sat

Unnamed: 0,It1,It2,It3,It4,It5
437,0.875906,0.921854,0.887161,0.891412,0.926059
329,0.664571,0.73019,0.736653,0.753612,0.731574
73,0.877347,0.844455,0.77923,0.807649,0.717571


In [47]:
# Overall User Satisfaction
df_user_satO

Unnamed: 0,It1,It2,It3,It4,It5
437,0.875906,0.89888,0.894974,0.894083,0.900478
329,0.664571,0.69738,0.710471,0.721257,0.72332
73,0.877347,0.860901,0.833678,0.82717,0.805251


In [48]:
# Overall Group Satisfaction -> Group Satisfaction = np.mean(User Satisfactions)
df_user_satO.mean()

It1    0.805942
It2    0.819054
It3    0.813041
It4     0.81417
It5    0.809683
dtype: object

In [49]:
# Group Disagreements
group_disagreements

[0.21277650448567753,
 0.20149981850746668,
 0.1845024101390086,
 0.17282685717257695,
 0.1771584235682072]

# Second Execution (seed == 8) Outcomes

In [52]:
df_user_sat

Unnamed: 0,It1,It2,It3,It4,It5
437,0.861564,0.868649,0.857059,0.816282,0.926059
329,0.813901,0.690504,0.745848,0.784172,0.731574
73,0.76823,0.793768,0.755636,0.775706,0.717571


In [53]:
# Overall User Satisfaction
df_user_satO

Unnamed: 0,It1,It2,It3,It4,It5
437,0.861564,0.865106,0.862424,0.850889,0.865923
329,0.813901,0.752202,0.750084,0.758606,0.7532
73,0.76823,0.780999,0.772545,0.773335,0.762182


In [54]:
# Overall Group Satisfaction -> Group Satisfaction = np.mean(User Satisfactions)
df_user_satO.mean()

It1    0.814565
It2    0.799436
It3    0.795018
It4    0.794277
It5    0.793768
dtype: object

In [55]:
# Group Disagreements
group_disagreements

[0.09333375264390509,
 0.11290397107211769,
 0.11233960848813085,
 0.09228226672191997,
 0.1127227512076816]

# Third Execution (seed == 111) Outcomes

In [58]:
df_user_sat

Unnamed: 0,It1,It2,It3,It4,It5
437,0.920258,0.91953,0.864291,0.857955,0.926059
329,0.672618,0.741689,0.785884,0.736314,0.731574
73,0.881154,0.84099,0.845799,0.736347,0.717571


In [59]:
# Overall User Satisfaction
df_user_satO

Unnamed: 0,It1,It2,It3,It4,It5
437,0.920258,0.919894,0.90136,0.890508,0.897618
329,0.672618,0.707153,0.733397,0.734126,0.733616
73,0.881154,0.861072,0.855981,0.826072,0.804372


In [60]:
# Overall Group Satisfaction -> Group Satisfaction = np.mean(User Satisfactions)
df_user_satO.mean()

It1    0.824677
It2    0.829373
It3    0.830246
It4    0.816902
It5    0.811869
dtype: object

In [61]:
# Group Disagreements
group_disagreements

[0.24764010263980718,
 0.21274058971099352,
 0.16796273348626722,
 0.1563823506010671,
 0.1640028183109994]