In [1]:
import numpy as np
import pandas as pd

from dataset_processing import split_dataset, get_starting_dataset, enrich_dataset, get_movie_map
from similarity import pearson_similarity
from predict import prediction_function
from group import alpha_hybrid_aggregation
from evaluate import user_satisfaction

from datetime import datetime

In [2]:
NUM_ITERATIONS = 5
MAX_NEIGHBORS = 75      # ~ 3*np.sqrt(num_users)
MAX_RECOMMENDATIONS = 10

In [3]:
movie_map = get_movie_map()

In [4]:
# GROUPS
# "Clique" = [493, 26, 57] -> each Similarity between two Users is > 0.5
# "Couple & 3rd Wheel" = [69, 234, 224] -> sim(69, 234) > 0.5, sim(69, 224) < -0.5, sim(234, 224) < -0.5
# "Strangers" = [437, 329, 73] -> each Similarity between two Users is < -0.5

In [43]:
group = [437, 329, 73]

In [56]:
it_counter = 1
alpha = 0
df_user_sat = pd.DataFrame(index=group, columns=["It1", "It2", "It3", "It4", "It5"])
df_user_satO = pd.DataFrame(index=group, columns=["It1", "It2", "It3", "It4", "It5"])
group_disagreements = list()

In [57]:
while it_counter < 6:
    print(f"ITERATION: {it_counter}" + " -> ALPHA = " + str(alpha) + "\n")
    start = datetime.now()

    if it_counter == 1:
        first_half, chunks = split_dataset(NUM_ITERATIONS, 111)
        matrix = get_starting_dataset(first_half)
    else:
        matrix = enrich_dataset(matrix, chunks[it_counter-2])

    # Compute Similarities
    print("First Step: Compute Similarities of each User of the Group")
    list_of_similarities = list()
    for user in group:
        dictionary = dict()

        other_users = [u for u in matrix.index if u != user]
        for u in other_users:
            dictionary[u] = pearson_similarity(matrix, user, u)
        list_of_similarities.append(dictionary)
    
    # Neighborhood (75) Building
    print("Second Step: Neighborhood Building")
    for i in range(0, len(list_of_similarities)):
        similarities = list_of_similarities.pop(i)
        similarities = {k: v for k, v in sorted(similarities.items(), key=lambda item: item[1], reverse=True)}
        similarities = dict(list(similarities.items())[:MAX_NEIGHBORS])
        list_of_similarities.insert(i, similarities)
    
    # Predictions for Each User
    print("Third Step: Predictions for Each User")
    list_of_scores = list()

    for i in range(0, len(group)):
        scores = prediction_function(matrix, group[i], list_of_similarities[i], matrix.shape[1])
        list_of_scores.append(scores)
    
    # Group Recommendation
    print("Fourth Step: Group Recommendation")
    score_matrix = alpha_hybrid_aggregation(alpha, list_of_scores)

    # Evaluation: Group Satisfaction & Group Disagreements
    print("Fifth Step: Evaluation")
    for i, user in enumerate(group):
        sat = user_satisfaction(score_matrix, list_of_scores[i])
        df_user_sat.at[user, f"It{it_counter}"] = sat
        df_user_satO.at[user, f"It{it_counter}"] = np.mean(df_user_sat.loc[user])
        # print(f"User {user} -> Satisfaction: {sat:.5f}")
    
    group_satO = np.sum(df_user_satO[f'It{it_counter}'])/3
    print("Group Satisfaction Overall: " + str(group_satO))

    group_dis = np.max(df_user_satO[f'It{it_counter}']) - np.min(df_user_satO[f'It{it_counter}'])
    group_disagreements.append(group_dis)
    print("Group Disagreement Overall: " + str(group_dis))

    # Getting ready for the Next Iteration
    alpha = np.max(df_user_sat[f"It{it_counter}"]) - np.min(df_user_sat[f"It{it_counter}"])

    end = datetime.now()
    print(f"END OF ITERATION {it_counter} - Elapsed Time: {end-start}" + "\n")

    it_counter += 1

ITERATION: 1 -> ALPHA = 0

First Step: Compute Similarities of each User of the Group
Second Step: Neighborhood Building
Third Step: Predictions for Each User
Fourth Step: Group Recommendation
Fifth Step: Evaluation
Group Satisfaction Overall: 0.7899387966072635
Group Disagreement Overall: 0.12493190288979572
END OF ITERATION 1 - Elapsed Time: 0:00:34.003683

ITERATION: 2 -> ALPHA = 0.12493190288979572

First Step: Compute Similarities of each User of the Group
Second Step: Neighborhood Building
Third Step: Predictions for Each User
Fourth Step: Group Recommendation
Fifth Step: Evaluation
Group Satisfaction Overall: 0.7908784616550517
Group Disagreement Overall: 0.10393383014396584
END OF ITERATION 2 - Elapsed Time: 0:00:29.202167

ITERATION: 3 -> ALPHA = 0.08293575739813608

First Step: Compute Similarities of each User of the Group
Second Step: Neighborhood Building
Third Step: Predictions for Each User
Fourth Step: Group Recommendation
Fifth Step: Evaluation
Group Satisfaction Overa

# First Execution (seed == 24) Outcomes

In [46]:
# User Satisfaction
df_user_sat

Unnamed: 0,It1,It2,It3,It4,It5
437,0.928552,0.860843,0.899323,0.855591,0.926059
329,0.814799,0.825848,0.667802,0.747851,0.731574
73,0.897134,0.833874,0.911623,0.848346,0.717571


In [47]:
# Overall User Satisfaction
df_user_satO

Unnamed: 0,It1,It2,It3,It4,It5
437,0.928552,0.894697,0.896239,0.886077,0.894073
329,0.814799,0.820324,0.769483,0.764075,0.757575
73,0.897134,0.865504,0.880877,0.872744,0.84171


In [48]:
# Overall Group Satisfaction -> Group Satisfaction = np.mean(User Satisfactions)
df_user_satO.mean()

It1    0.880162
It2    0.860175
It3    0.848866
It4    0.840965
It5    0.831119
dtype: object

In [49]:
# Group Disagreements
group_disagreements

[0.11375288068555445,
 0.07437379966480062,
 0.12675623183656803,
 0.12200209339290824,
 0.13649861254447226]

# Second Execution (seed == 8) Outcomes

In [52]:
df_user_sat

Unnamed: 0,It1,It2,It3,It4,It5
437,0.854957,0.747312,0.826147,0.820023,0.926059
329,0.750762,0.78681,0.713108,0.678346,0.731574
73,0.834453,0.854222,0.770919,0.775883,0.717571


In [53]:
# Overall User Satisfaction
df_user_satO

Unnamed: 0,It1,It2,It3,It4,It5
437,0.854957,0.801134,0.809472,0.81211,0.834899
329,0.750762,0.768786,0.750227,0.732256,0.73212
73,0.834453,0.844337,0.819865,0.808869,0.79061


In [54]:
# Overall Group Satisfaction -> Group Satisfaction = np.mean(User Satisfactions)
df_user_satO.mean()

It1    0.813391
It2    0.804753
It3    0.793188
It4    0.784412
It5    0.785876
dtype: object

In [55]:
# Group Disagreements
group_disagreements

[0.10419490229152739,
 0.07555141559551959,
 0.06963791725207191,
 0.07985327832116362,
 0.1027795604870766]

# Third Execution (seed == 111) Outcomes

In [58]:
df_user_sat

Unnamed: 0,It1,It2,It3,It4,It5
437,0.836998,0.826653,0.87792,0.870483,0.926059
329,0.712067,0.743718,0.74169,0.707682,0.731574
73,0.820751,0.805084,0.770741,0.738964,0.717571


In [59]:
# Overall User Satisfaction
df_user_satO

Unnamed: 0,It1,It2,It3,It4,It5
437,0.836998,0.831826,0.847191,0.853014,0.867623
329,0.712067,0.727892,0.732492,0.726289,0.727346
73,0.820751,0.812917,0.798859,0.783885,0.770622


In [60]:
# Overall Group Satisfaction -> Group Satisfaction = np.mean(User Satisfactions)
df_user_satO.mean()

It1    0.789939
It2    0.790878
It3    0.792847
It4    0.787729
It5     0.78853
dtype: object

In [61]:
# Group Disagreements
group_disagreements

[0.12493190288979572,
 0.10393383014396584,
 0.11469923474483879,
 0.12672474390949973,
 0.14027673295774545]