In [1]:
# Cell 1
import numpy as np
import matplotlib.pyplot as plt
from tv_recommendation import TVRecommendationEngine
from evaluation_metrics import RecommendationEvaluator

In [2]:
# Cell 2
engine = TVRecommendationEngine()
# TODO: Load data
engine.load_data('data/user-shows.txt', 'data/shows.txt')
print(f"shape of R: {engine.R.shape}")

shape of R: (9985, 563)


In [3]:
# Cell 3
# TODO: Preprocess data
engine.preprocess_data()
print(f"user degree matrix P has shape {engine.P.shape}")
print(f"item degree matrix Q has shape: {engine.Q.shape}")

user degree matrix P has shape (9985, 9985)
item degree matrix Q has shape: (563, 563)


In [4]:
# Cell 4
ALEX_USER_ID = 499
# TODO: Simulate missing data for first 100 shows
missing_indices = list(range(100))

R_alex_original = engine.R[ALEX_USER_ID, :].copy()
engine.R[ALEX_USER_ID, missing_indices] = 0

print(f"simulated missing data for user {ALEX_USER_ID} Alex")


simulated missing data for user 499 Alex


In [5]:
# Cell 5
# TODO: User-user collaborative filtering
uu_scores = engine.user_user_collaborative_filtering(ALEX_USER_ID, missing_indices)
uu_recommendations = engine.get_top_recommendations(uu_scores, missing_indices, 5)

In [6]:
# Cell 6
# TODO: Item-item collaborative filtering
ii_scores = engine.item_item_collaborative_filtering(ALEX_USER_ID, missing_indices)
ii_recommendations = engine.get_top_recommendations(ii_scores, missing_indices, 5)

In [7]:
# Cell 7
# TODO: Display results
print("User-User CF recommendations:")
for i, (idx, score, name) in enumerate(uu_recommendations):
    print(f"{i+1}. {name} (score: {score:.2f})")

print("\nItem-Item CF recommendations:")
for i, (idx, score, name) in enumerate(ii_recommendations):
    print(f"{i+1}. {name} (score: {score:.2f})")

User-User CF recommendations:
1. "FOX 28 News at 10pm" (score: 908.48)
2. "Family Guy" (score: 861.18)
3. "2009 NCAA Basketball Tournament" (score: 827.60)
4. "NBC 4 at Eleven" (score: 784.78)
5. "Two and a Half Men" (score: 757.60)

Item-Item CF recommendations:
1. "FOX 28 News at 10pm" (score: 31.36)
2. "Family Guy" (score: 30.00)
3. "NBC 4 at Eleven" (score: 29.40)
4. "2009 NCAA Basketball Tournament" (score: 29.23)
5. "Access Hollywood" (score: 28.97)


In [8]:
# Cell 8
evaluator = RecommendationEvaluator()

uu_max_score = uu_recommendations[0][1] if uu_recommendations else 0
ii_max_score = ii_recommendations[0][1] if ii_recommendations else 0
evaluator.validate_requirements(uu_max_score, ii_max_score)
overlap = evaluator.analyze_recommendations(uu_recommendations, ii_recommendations)


print(f"\nExperiment Summary:")
print(f"Target User: {ALEX_USER_ID} (Alex)")
print(f"Missing Items: First 100 shows")
print(f"User-User CF Max Score: {uu_max_score:.2f}")
print(f"Item-Item CF Max Score: {ii_max_score:.2f}")
print(f"Recommendations overlap: {len(overlap)}/5")

User-User CF max score: 908.48 (requirement: > 900) ✓
Item-Item CF max score: 31.36 (requirement: > 31) ✓
All requirements passed!
User-User recommendations: 5
Item-Item recommendations: 5
Overlap: 4
Unique to User-User: 1
Unique to Item-Item: 1
Common recommendations:
  - "FOX 28 News at 10pm"
  - "Family Guy"
  - "2009 NCAA Basketball Tournament"
  - "NBC 4 at Eleven"

Experiment Summary:
Target User: 499 (Alex)
Missing Items: First 100 shows
User-User CF Max Score: 908.48
Item-Item CF Max Score: 31.36
Recommendations overlap: 4/5


# Dataset sparsity

In [9]:
# Cell 2
engine = TVRecommendationEngine()
# TODO: Load data
engine.load_data('data/user-shows.txt', 'data/shows.txt')
print(f"Data loaded successfully.")
print(f"Ratings matrix R has shape: {engine.R.shape}")

# --- Code Snippet to Check Sparsity ---
# Count the number of '1's (watched) in the matrix.
num_ones = np.sum(engine.R)

# Get the total number of elements in the matrix.
total_elements = engine.R.size

# Calculate the number of '0's (not watched).
num_zeros = total_elements - num_ones

# Calculate the sparsity of the matrix.
sparsity = (num_zeros / total_elements) * 100

print(f"\n--- Dataset Sparsity Analysis ---")
print(f"Total ratings (1s): {num_ones}")
print(f"Missing ratings (0s): {num_zeros}")
print(f"Total possible ratings: {total_elements}")
print(f"Sparsity of the matrix: {sparsity:.2f}%")

Data loaded successfully.
Ratings matrix R has shape: (9985, 563)

--- Dataset Sparsity Analysis ---
Total ratings (1s): 758878
Missing ratings (0s): 4862677
Total possible ratings: 5621555
Sparsity of the matrix: 86.50%


# Compare Cosine Similairty Methods

In [10]:
# Cell for Verification with Refined Timing

import time
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np # Make sure numpy is imported

print("--- Verifying Cosine Similarity Calculations (with Refined Timing) ---")

# --- Setup: Do all data loading and preprocessing once ---
temp_engine = TVRecommendationEngine()
temp_engine.load_data('data/user-shows.txt', 'data/shows.txt')
temp_engine.preprocess_data()
R_matrix = temp_engine.R
P_matrix = temp_engine.P
Q_matrix = temp_engine.Q

# --- 1. Verify User Similarity (Su) ---

# Time our matrix formula
start_time_formula = time.time()
Su_formula = temp_engine.sim_computer.compute_user_similarity_matrix(R_matrix, P_matrix)
end_time_formula = time.time()
time_formula_user = end_time_formula - start_time_formula

# Time the scikit-learn function
start_time_sklearn = time.time()
Su_sklearn = cosine_similarity(R_matrix)
end_time_sklearn = time.time()
time_sklearn_user = end_time_sklearn - start_time_sklearn

# Calculate the difference for verification
user_diff = np.abs(Su_formula - Su_sklearn)

print("\nVerification for User Similarity Matrix (Su):")
print(f"  Time taken (Our Formula):   {time_formula_user:.4f} seconds")
print(f"  Time taken (sklearn):       {time_sklearn_user:.4f} seconds")
print(f"  Maximum absolute difference:      {np.max(user_diff):.2e}")

# --- 2. Verify Item Similarity (Si) ---

# Time our matrix formula
start_time_formula = time.time()
Si_formula = temp_engine.sim_computer.compute_item_similarity_matrix(R_matrix, Q_matrix)
end_time_formula = time.time()
time_formula_item = end_time_formula - start_time_formula

# Time the scikit-learn function
start_time_sklearn = time.time()
Si_sklearn = cosine_similarity(R_matrix.T)
end_time_sklearn = time.time()
time_sklearn_item = end_time_sklearn - start_time_sklearn

# Calculate the difference for verification
item_diff = np.abs(Si_formula - Si_sklearn)

print("\nVerification for Item Similarity Matrix (Si):")
print(f"  Time taken (Our Formula):   {time_formula_item:.4f} seconds")
print(f"  Time taken (sklearn):       {time_sklearn_item:.4f} seconds")
print(f"  Maximum absolute difference:      {np.max(item_diff):.2e}")

--- Verifying Cosine Similarity Calculations (with Refined Timing) ---

Verification for User Similarity Matrix (Su):
  Time taken (Our Formula):   37.7973 seconds
  Time taken (sklearn):       1.8785 seconds
  Maximum absolute difference:      2.11e-15

Verification for Item Similarity Matrix (Si):
  Time taken (Our Formula):   0.2271 seconds
  Time taken (sklearn):       0.1126 seconds
  Maximum absolute difference:      2.66e-15
