In [5]:
# --- Step 1: Import all necessary libraries ---
from sklearn.manifold import TSNE
import json
import numpy as np
from tv_recommendation import TVRecommendationEngine

print("--- Preparing Data for 3D Visualization ---")

# --- Step 2: Load data and compute both similarity matrices ---
engine = TVRecommendationEngine()
engine.load_data('data/user-shows.txt', 'data/shows.txt')
engine.preprocess_data()

print("Calculating Item Similarity Matrix (Si)...")
Si_matrix = engine.sim_computer.compute_item_similarity_matrix(engine.R, engine.Q)

print("Calculating User Similarity Matrix (Su)...")
Su_matrix = engine.sim_computer.compute_user_similarity_matrix(engine.R, engine.P)

# --- Step 3: Dimensionality Reduction for Shows (Items) ---
print("\nRunning t-SNE for 563 shows...")
tsne_shows = TSNE(n_components=3, perplexity=30, n_iter=1000, random_state=42, init='pca', learning_rate='auto')
show_coords_3d = tsne_shows.fit_transform(1 - Si_matrix)

# --- Step 4: Dimensionality Reduction for a SAMPLE of Users (including Alex) ---
NUM_USERS_TO_VIZ = 50
ALEX_USER_ID = 499 # The actual index of Alex
print(f"\nRunning t-SNE for a sample of {NUM_USERS_TO_VIZ} users, ensuring Alex (user {ALEX_USER_ID}) is included...")

# Create a sample of user indices, with Alex being the first.
# This ensures he is in the visualization and appears at the top of the dropdown.
other_user_indices = [i for i in range(NUM_USERS_TO_VIZ - 1) if i != ALEX_USER_ID]
user_indices_sample = [ALEX_USER_ID] + other_user_indices

# Take a slice of the full user similarity matrix based on our custom sample
Su_sample = Su_matrix[np.ix_(user_indices_sample, user_indices_sample)]

tsne_users = TSNE(n_components=3, perplexity=25, n_iter=1000, random_state=42, init='pca', learning_rate='auto')
user_coords_3d_raw = tsne_users.fit_transform(1 - Su_sample)

# --- Step 5: Apply a Spatial Offset to separate user and show clouds ---
SPATIAL_OFFSET = 80
user_coords_3d = user_coords_3d_raw.copy()
user_coords_3d[:, 0] += SPATIAL_OFFSET
print(f"Applied a spatial offset of {SPATIAL_OFFSET} to user coordinates.")

# --- Step 6: Prepare and Export All Data to a Single JSON File ---
# Prepare show data
shows_data = []
for i, show_name in enumerate(engine.show_names):
    shows_data.append({
        "name": show_name,
        "x": float(show_coords_3d[i, 0]),
        "y": float(show_coords_3d[i, 1]),
        "z": float(show_coords_3d[i, 2])
    })

# Prepare user data for the specific sampled users
users_data = []
for i, original_index in enumerate(user_indices_sample):
    user_name = f"Alex (user_{original_index})" if original_index == ALEX_USER_ID else f"User {original_index}"
    users_data.append({
        "id": f"user_{original_index}",
        "name": user_name,
        "x": float(user_coords_3d[i, 0]),
        "y": float(user_coords_3d[i, 1]),
        "z": float(user_coords_3d[i, 2])
    })

# Prepare ratings data for the same sampled users
ratings_data = {}
original_R = np.loadtxt('data/user-shows.txt', dtype=int)
for user_index in user_indices_sample:
    user_id = f"user_{user_index}"
    watched_show_indices = np.where(original_R[user_index] == 1)[0].tolist()
    ratings_data[user_id] = watched_show_indices

# Combine everything into a single object
final_data = {
    "shows": shows_data,
    "users": users_data,
    "ratings": ratings_data
}

# Save to a new, comprehensive JSON file
with open('visualization_data.json', 'w') as f:
    json.dump(final_data, f, indent=4)

print(f"\nSuccessfully saved all visualization data to 'visualization_data.json'.")

--- Preparing Data for 3D Visualization ---
Calculating Item Similarity Matrix (Si)...
Calculating User Similarity Matrix (Su)...

Running t-SNE for 563 shows...





Running t-SNE for a sample of 50 users, ensuring Alex (user 499) is included...




Applied a spatial offset of 80 to user coordinates.

Successfully saved all visualization data to 'visualization_data.json'.
