In [12]:
# Cell 1
import numpy as np
import matplotlib.pyplot as plt
from tv_recommendation import TVRecommendationEngine
from evaluation_metrics import RecommendationEvaluator

In [13]:
# Cell 2
engine = TVRecommendationEngine()
# TODO: Load data
engine.load_data('data/user-shows.txt', 'data/shows.txt')
print(f"shape of R: {engine.R.shape}")

shape of R: (9985, 563)


In [14]:
# Cell 3
# TODO: Preprocess data
engine.preprocess_data()
print(f"user degree matrix P has shape {engine.P.shape}")
print(f"item degree matrix Q has shape: {engine.Q.shape}")

user degree matrix P has shape (9985, 9985)
item degree matrix Q has shape: (563, 563)


In [15]:
# Cell 4
ALEX_USER_ID = 499
# TODO: Simulate missing data for first 100 shows
missing_indices = list(range(100))

R_alex_original = engine.R[ALEX_USER_ID, :].copy()
engine.R[ALEX_USER_ID, missing_indices] = 0

print(f"simulated missing data for user {ALEX_USER_ID} Alex")


simulated missing data for user 499 Alex


In [16]:
# Cell 5
# TODO: User-user collaborative filtering
uu_scores = engine.user_user_collaborative_filtering(ALEX_USER_ID, missing_indices)
uu_recommendations = engine.get_top_recommendations(uu_scores, missing_indices, 5)

In [17]:
# Cell 6
# TODO: Item-item collaborative filtering
ii_scores = engine.item_item_collaborative_filtering(ALEX_USER_ID, missing_indices)
ii_recommendations = engine.get_top_recommendations(ii_scores, missing_indices, 5)

In [18]:
# Cell 7
# TODO: Display results
print("User-User CF recommendations:")
for i, (idx, score, name) in enumerate(uu_recommendations):
    print(f"{i+1}. {name} (score: {score:.2f})")

print("\nItem-Item CF recommendations:")
for i, (idx, score, name) in enumerate(ii_recommendations):
    print(f"{i+1}. {name} (score: {score:.2f})")

User-User CF recommendations:
1. "FOX 28 News at 10pm" (score: 908.48)
2. "Family Guy" (score: 861.18)
3. "2009 NCAA Basketball Tournament" (score: 827.60)
4. "NBC 4 at Eleven" (score: 784.78)
5. "Two and a Half Men" (score: 757.60)

Item-Item CF recommendations:
1. "FOX 28 News at 10pm" (score: 31.36)
2. "Family Guy" (score: 30.00)
3. "NBC 4 at Eleven" (score: 29.40)
4. "2009 NCAA Basketball Tournament" (score: 29.23)
5. "Access Hollywood" (score: 28.97)


In [19]:
# Cell 8
evaluator = RecommendationEvaluator()

uu_max_score = uu_recommendations[0][1] if uu_recommendations else 0
ii_max_score = ii_recommendations[0][1] if ii_recommendations else 0
evaluator.validate_requirements(uu_max_score, ii_max_score)
overlap = evaluator.analyze_recommendations(uu_recommendations, ii_recommendations)


print(f"\nExperiment Summary:")
print(f"Target User: {ALEX_USER_ID} (Alex)")
print(f"Missing Items: First 100 shows")
print(f"User-User CF Max Score: {uu_max_score:.2f}")
print(f"Item-Item CF Max Score: {ii_max_score:.2f}")
print(f"Recommendations overlap: {len(overlap)}/5")

User-User CF max score: 908.48 (requirement: > 900) ✓
Item-Item CF max score: 31.36 (requirement: > 31) ✓
All requirements passed!
User-User recommendations: 5
Item-Item recommendations: 5
Overlap: 4
Unique to User-User: 1
Unique to Item-Item: 1
Common recommendations:
  - "FOX 28 News at 10pm"
  - "Family Guy"
  - "2009 NCAA Basketball Tournament"
  - "NBC 4 at Eleven"

Experiment Summary:
Target User: 499 (Alex)
Missing Items: First 100 shows
User-User CF Max Score: 908.48
Item-Item CF Max Score: 31.36
Recommendations overlap: 4/5


# Dataset sparsity

In [None]:
# Cell 2
engine = TVRecommendationEngine()
# TODO: Load data
engine.load_data('data/user-shows.txt', 'data/shows.txt')
print(f"Data loaded successfully.")
print(f"Ratings matrix R has shape: {engine.R.shape}")

num_ones = np.sum(engine.R)

total_elements=engine.R.size

num_zeros=total_elements - num_ones

sparsity=(num_zeros / total_elements) *100

print(f"\n--- Dataset Sparsity Analysis ---")
print(f"Total ratings (1s): {num_ones}")
print(f"Missing ratings (0s): {num_zeros}")
print(f"Total possible ratings: {total_elements}")
print(f"Sparsity of the matrix: {sparsity:.2f}%")

Data loaded successfully.
Ratings matrix R has shape: (9985, 563)

--- Dataset Sparsity Analysis ---
Total ratings (1s): 758878
Missing ratings (0s): 4862677
Total possible ratings: 5621555
Sparsity of the matrix: 86.50%


# Compare Cosine Similairty Methods

In [None]:
import time
from sklearn.metrics.pairwise import cosine_similarity

print("--- Verifying Cosine Similarity Calculations (with Refined Timing) ---")

temp_engine = TVRecommendationEngine()
temp_engine.load_data('data/user-shows.txt', 'data/shows.txt')
temp_engine.preprocess_data()
R_matrix = temp_engine.R
P_matrix = temp_engine.P
Q_matrix = temp_engine.Q

start_time_formula = time.time()
Su_formula = temp_engine.sim_computer.compute_user_similarity_matrix(R_matrix, P_matrix)
end_time_formula = time.time()
time_formula_user = end_time_formula - start_time_formula

start_time_sklearn = time.time()
Su_sklearn = cosine_similarity(R_matrix)
end_time_sklearn = time.time()
time_sklearn_user = end_time_sklearn - start_time_sklearn

user_diff =np.abs(Su_formula - Su_sklearn)

print("\nVerification for User Similarity Matrix (Su):")
print(f" Time taken (Our Formula):   {time_formula_user:.4f} seconds")
print(f" Time taken (sklearn):     {time_sklearn_user:.4f} seconds")
print(f" Maximum absolute difference:      {np.max(user_diff):.2e}")


start_time_formula = time.time()
Si_formula = temp_engine.sim_computer.compute_item_similarity_matrix(R_matrix, Q_matrix)
end_time_formula = time.time()
time_formula_item = end_time_formula - start_time_formula

start_time_sklearn = time.time()
Si_sklearn = cosine_similarity(R_matrix.T)
end_time_sklearn = time.time()
time_sklearn_item = end_time_sklearn - start_time_sklearn

item_diff = np.abs(Si_formula - Si_sklearn)

print("\nVerification for Item Similarity Matrix (Si):")
print(f"  Time taken (Our Formula):   {time_formula_item:.4f} seconds")
print(f"  Time taken (sklearn):     {time_sklearn_item:.4f} seconds")
print(f"  Maximum absolute difference:      {np.max(item_diff):.2e}")

--- Verifying Cosine Similarity Calculations (with Refined Timing) ---

Verification for User Similarity Matrix (Su):
  Time taken (Our Formula):   41.8253 seconds
  Time taken (sklearn):       3.6006 seconds
  Maximum absolute difference:      2.11e-15

Verification for Item Similarity Matrix (Si):
  Time taken (Our Formula):   0.4403 seconds
  Time taken (sklearn):       0.1985 seconds
  Maximum absolute difference:      2.66e-15


# Precision & recall

In [None]:
relevant_items={i for i, rating in enumerate(R_alex_original[:100]) if rating == 1}
num_relevant_items=len(relevant_items)

print(f"Alex had {num_relevant_items} relevant shows in the hidden test set.")

recommended_uu={idx for idx, score, name in uu_recommendations}
recommended_ii= {idx for idx, score, name in ii_recommendations}

hits_uu = len(relevant_items.intersection(recommended_uu))

precision_uu = hits_uu /5

recall_uu = hits_uu /num_relevant_items if num_relevant_items >0 else 0

print("\n--- User-User CF Evaluation ---")
print(f"  Hits (correct recommendations): {hits_uu} out of 5")
print(f"  Precision@5: {precision_uu:.2f}")
print(f"  Recall@5:    {recall_uu:.2f}")

hits_ii = len(relevant_items.intersection(recommended_ii))

precision_ii = hits_ii / 5
recall_ii = hits_ii / num_relevant_items if num_relevant_items > 0 else 0

print("\n--- Item-Item CF Evaluation ---")
print(f"  Hits (correct recommendations): {hits_ii} out of 5")
print(f"  Precision@5: {precision_ii:.2f}")
print(f"  Recall@5:    {recall_ii:.2f}")

Alex had 0 relevant shows in the hidden test set.

--- User-User CF Evaluation ---
  Hits (correct recommendations): 0 out of 5
  Precision@5: 0.00
  Recall@5:    0.00

--- Item-Item CF Evaluation ---
  Hits (correct recommendations): 0 out of 5
  Precision@5: 0.00
  Recall@5:    0.00


In [None]:
user_shows_path = 'data/user-shows.txt'

ALEX_USER_ID = 499

R_original=np.loadtxt(user_shows_path, dtype=int)
print(f"Successfully loaded the original ratings matrix of shape: {R_original.shape}\n")

if ALEX_USER_ID < R_original.shape[0]:
    alex_original_row = R_original[ALEX_USER_ID, :]
    print(f"--- Full original viewing history for alex (user index {ALEX_USER_ID}) ---")
    print(repr(alex_original_row))
    shows_watched_in_first_100=np.sum(alex_original_row[:100])
    
    print("\n--- Verification Summary ---")
    print(f"Number of shows Alex watched in the first 100 items: {shows_watched_in_first_100}")

    if shows_watched_in_first_100 == 0:
        print("\nCONCLUSION: The output is correct. Alex had 0 watched shows in the test set.")
    else:
        print("\nCONCLUSION: There is a discrepancy. The data shows Alex did watch shows in this range.")

else:
    print(f"Error: Alex's user ID ({ALEX_USER_ID}) is out of bounds for the matrix.")

Successfully loaded the original ratings matrix of shape: (9985, 563)

--- Full Original Viewing History for Alex (user index 499) ---
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0

# Using a new user

In [None]:
print("--- Running Full Analysis for a New User (User 3) ---")

engine_user3= TVRecommendationEngine()
engine_user3.load_data('data/user-shows.txt', 'data/shows.txt')
engine_user3.preprocess_data()

NEW_USER_ID=3
print(f"Targeting new user with index: {NEW_USER_ID}")

missing_indices=list(range(100))

R_user3_original =engine_user3.R[NEW_USER_ID,:].copy()
engine_user3.R[NEW_USER_ID, missing_indices] = 0

print("\nRunning User-User and Item-Item CF for User 3...")
uu_scores_user3=engine_user3.user_user_collaborative_filtering(NEW_USER_ID, missing_indices)
uu_recs_user3=engine_user3.get_top_recommendations(uu_scores_user3, missing_indices, 5)

ii_scores_user3=engine_user3.item_item_collaborative_filtering(NEW_USER_ID, missing_indices)
ii_recs_user3=engine_user3.get_top_recommendations(ii_scores_user3, missing_indices, 5)

print("\n--- Top 5 Recommendations for User 3 ---")
print("\nUser-User CF recommendations:")
for i, (idx, score, name) in enumerate(uu_recs_user3):
    print(f"{i+1}. {name} (Score: {score:.2f})")

print("\nItem-Item CF recommendations:")
for i, (idx, score, name) in enumerate(ii_recs_user3):
    print(f"{i+1}. {name} (Score: {score:.2f})")
    
print("\n--- Quantitative Evaluation for User 3: Precision & Recall @ 5 ---")

relevant_items_user3={i for i, rating in enumerate(R_user3_original[:100]) if rating == 1}
num_relevant_user3=len(relevant_items_user3)
print(f"User 3 had {num_relevant_user3} relevant shows in the hidden test set.")

recommended_uu_user3={idx for idx, score, name in uu_recs_user3}
recommended_ii_user3={idx for idx, score, name in ii_recs_user3}

hits_uu_user3=len(relevant_items_user3.intersection(recommended_uu_user3))
precision_uu_user3 = hits_uu_user3 / 5
recall_uu_user3 = hits_uu_user3 / num_relevant_user3 if num_relevant_user3 > 0 else 0

print("\n--- User-User CF Evaluation (User 3) ---")
print(f"  Hits (correct recommendations): {hits_uu_user3} out of 5")
print(f" Precision@5: {precision_uu_user3:.2f}")
print(f"  Recall@5:  {recall_uu_user3:.2f}")

hits_ii_user3= len(relevant_items_user3.intersection(recommended_ii_user3))
precision_ii_user3=hits_ii_user3 / 5
recall_ii_user3 = hits_ii_user3 / num_relevant_user3 if num_relevant_user3 > 0 else 0

print("\n--- Item-Item CF Evaluation (User 3) ---")
print(f" Hits (correct recommendations): {hits_ii_user3} out of 5")
print(f" Precision@5: {precision_ii_user3:.2f}")
print(f" Recall@5:  {recall_ii_user3:.2f}")

--- Running Full Analysis for a New User (User 3) ---
Targeting new user with index: 4

Running User-User and Item-Item CF for User 3...

--- Top 5 Recommendations for User 3 ---

User-User CF recommendations:
1. "Family Guy" (Score: 293.23)
2. "2009 NCAA Basketball Tournament" (Score: 282.96)
3. "FOX 28 News at 10pm" (Score: 282.85)
4. "Access Hollywood" (Score: 264.77)
5. "NBC 4 at Eleven" (Score: 252.21)

Item-Item CF recommendations:
1. "Family Guy" (Score: 4.22)
2. "2009 NCAA Basketball Tournament" (Score: 4.15)
3. "SportsCenter" (Score: 4.13)
4. "FOX 28 News at 10pm" (Score: 4.03)
5. "Access Hollywood" (Score: 3.97)

--- Quantitative Evaluation for User 3: Precision & Recall @ 5 ---
User 3 had 9 relevant shows in the hidden test set.

--- User-User CF Evaluation (User 3) ---
  Hits (correct recommendations): 0 out of 5
 Precision@5: 0.00
  Recall@5:  0.00

--- Item-Item CF Evaluation (User 3) ---
 Hits (correct recommendations): 1 out of 5
 Precision@5: 0.20
 Recall@5:  0.11
