In [None]:
import pandas as pd
import numpy as np
from src.models.old_content_models.content_handler_v1 import ContentHandler
from src.models.content_handler import ContentHandler as ContentHandler2
from src.models.content_evaluation import split_user_interactions, evaluate_hit_rate, evaluate_intra_list_diversity
from src.pipeline.data_processor import DataProcessor, CleanedData

In [None]:
dp = DataProcessor()
bookmarks_df = dp.load_table(CleanedData.BOOKMARKS) 
shiur_df = dp.load_table(CleanedData.SHIURIM)
user_listens_df = bookmarks_df.merge(shiur_df[['shiur', 'full_details']], on='shiur', how='inner')
user_listens_df = user_listens_df[(user_listens_df['played'] == 1)
                                                     | (user_listens_df['bookmark'] == 'queue')]
user_listens_df['date'] = user_listens_df['date_played'].combine_first(
            user_listens_df['queue_date'])


In [None]:
# Analyze interaction distribution
interaction_counts = user_listens_df['user'].value_counts()
interaction_counts

In [None]:
percentiles = np.percentile(interaction_counts, [25, 50, 75])
percentiles

In [None]:
# Split data
train_df, test_df = split_user_interactions(user_listens_df, interaction_threshold=4)

# Instantiate ContentHandler with training data
content_handler = ContentHandler(train_df)
content_handler2 = ContentHandler2(train_df)


In [None]:
hit_rate1, user_hits1, recommendations1 = evaluate_hit_rate(test_df, content_handler, k=10)
hit_rate2, user_hits2, recommendations2 = evaluate_hit_rate(test_df, content_handler2, k=10)
diversity1 = evaluate_intra_list_diversity(recommendations1, content_handler.shiur_embeddings)
diversity2 = evaluate_intra_list_diversity(recommendations2, content_handler2.shiur_embeddings)

In [None]:
print(f"Hit Rate (Attention): {hit_rate1}")
print(f"Hit Rate (AutoEncoder/Clustering): {hit_rate2}")
print(f"Diversity (Attention): {diversity1}")
print(f"Diversity (AutoEncoder/Clustering): {diversity2}")

In [None]:
print(len(user_hits1))
print(len(user_hits2))
print(set(user_hits1).intersection(set(user_hits2)))

In [None]:
def get_user_shiurs(user_id, df):
    return [title for title in df[df['user'] == user_id]['full_details']]

In [None]:
user_id = 196334
train = get_user_shiurs(user_id, train_df)
test = get_user_shiurs(user_id, test_df)
recommend = content_handler2.recommend_for_user_content(user_id, 7)

print("Recommendations")
for x in recommend.values():
    print(x)


In [None]:
print("Train")
for x in train:
    print(x)


In [None]:
print("Test")
for x in test:
    print(x)


In [None]:
print("Overlap")
for x in recommend.values():
    if x in test:
        print(x)


