In [69]:
# notebooks/recsys_collaborative_filtering.ipynb

# ---------------------------------------------------------------------------
# 0. Setup and Imports
# ---------------------------------------------------------------------------
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix # For creating sparse utility matrices
from sklearn.metrics.pairwise import cosine_similarity # For item-item or user-user similarity
from sklearn.neighbors import NearestNeighbors # For KNN-based CF

# For potential matrix factorization (example using Surprise or implicit)
# from surprise import Dataset, Reader, SVD, NMF # Example: pip install scikit-surprise
# import implicit # Example: pip install implicit

# Adjust Python path to import from src
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
src_path = os.path.join(project_root, 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import your DataLoader and mock_data if needed for initial data
from data_management.data_loader import DataLoader
from data_management.mock_data import create_mock_data

# Configure logging
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s][%(levelname)s] %(message)s')
log = logging.getLogger()

# Pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

print(f"Project root (expected): {project_root}")
print(f"Src path (expected): {src_path}")

Project root (expected): /home/mohamed/trainings/RL_project
Src path (expected): /home/mohamed/trainings/RL_project/src


In [70]:
# ---------------------------------------------------------------------------
# 1. Load Data
# ---------------------------------------------------------------------------
USE_REAL_DATA_FOR_RECSYS = False # Set to True to use your real data, False for mock
TOP_N_CUSTOMERS_RECSYS = 100   # How many customers to use if real data is chosen

if USE_REAL_DATA_FOR_RECSYS:
    log.info("Loading REAL data for RecSys exploration...")
    try:
        data_loader = DataLoader() # Assuming default paths are correct or configured
        data_loader._load_raw_data()
        
        raw_customers_df = data_loader._raw_customers
        raw_assets_df = data_loader._raw_assets # We might not use all asset features for basic CF
        raw_transactions_df = data_loader._raw_transactions

        if TOP_N_CUSTOMERS_RECSYS is not None and raw_transactions_df is not None and not raw_transactions_df.empty:
            customer_activity = raw_transactions_df['customerID'].value_counts()
            if TOP_N_CUSTOMERS_RECSYS >= len(customer_activity):
                top_customer_ids = customer_activity.index.tolist()
            else:
                top_customer_ids = customer_activity.head(TOP_N_CUSTOMERS_RECSYS).index.tolist()
            
            customers_df = raw_customers_df[raw_customers_df['customerID'].isin(top_customer_ids)].copy()
            transactions_df = raw_transactions_df[raw_transactions_df['customerID'].isin(top_customer_ids)].copy()
            assets_df = raw_assets_df # Keep all assets
        else:
            customers_df = raw_customers_df
            transactions_df = raw_transactions_df
            assets_df = raw_assets_df
        log.info(f"Using {len(customers_df)} customers and their {len(transactions_df)} transactions.")

    except Exception as e:
        log.error(f"Error loading real data: {e}. Falling back to synthetic.", exc_info=True)
        USE_SYNTHETIC_DATA_FOR_RECSYS = True # Fallback to synthetic
else:
    USE_SYNTHETIC_DATA_FOR_RECSYS = True # Ensure this flag is set if above fails or is false

if USE_SYNTHETIC_DATA_FOR_RECSYS:
    log.info("Generating SYNTHETIC data for RecSys exploration...")
    SYNTHETIC_N_USERS = 50
    SYNTHETIC_N_ASSETS = 100
    SYNTHETIC_TOTAL_DAYS = 365 * 3
    customers_df, assets_df, _, transactions_df = create_mock_data( # prices_df not strictly needed for basic CF
        seed=42, n_users=SYNTHETIC_N_USERS, n_assets=SYNTHETIC_N_ASSETS, total_days=SYNTHETIC_TOTAL_DAYS
    )
    log.info(f"Generated synthetic data: {len(customers_df)} users, {len(assets_df)} assets, {len(transactions_df)} transactions.")

# Basic cleaning
transactions_df = transactions_df[transactions_df['totalValue'] > 0].copy()
transactions_df['transactionType'] = transactions_df['transactionType'].str.lower()

# We are interested in "Buy" transactions as positive interactions
buy_transactions_df = transactions_df[transactions_df['transactionType'] == 'buy'].copy()

log.info(f"Total 'buy' transactions: {len(buy_transactions_df)}")
log.info(f"Unique customers in buy transactions: {buy_transactions_df['customerID'].nunique()}")
log.info(f"Unique assets in buy transactions: {buy_transactions_df['ISIN'].nunique()}")

if buy_transactions_df.empty:
    raise ValueError("No 'buy' transactions found. Cannot proceed with collaborative filtering.")

[2025-06-02 11:54:25,873][INFO] Generating SYNTHETIC data for RecSys exploration...
[2025-06-02 11:54:25,874][INFO] Generating mock data (seed=42, type='training', n_assets=100, n_users=50).
[2025-06-02 11:54:26,116][INFO] Mock data generation complete.
[2025-06-02 11:54:26,124][INFO] Generated synthetic data: 50 users, 100 assets, 300 transactions.
[2025-06-02 11:54:26,127][INFO] Total 'buy' transactions: 162
[2025-06-02 11:54:26,128][INFO] Unique customers in buy transactions: 48
[2025-06-02 11:54:26,129][INFO] Unique assets in buy transactions: 83


In [71]:
# ---------------------------------------------------------------------------
# 2. User-Item Interaction Matrix / Graph Conceptualization
# ---------------------------------------------------------------------------
# For collaborative filtering, we often start with a user-item interaction matrix.
# Rows: Users, Columns: Assets
# Values: Implicit feedback (e.g., number of buys, total value bought) or binary (1 if bought, 0 otherwise)

# Let's use "number of buy transactions" as the interaction strength for now.
user_item_interaction_counts = buy_transactions_df.groupby(['customerID', 'ISIN']).size().reset_index(name='buy_count')

log.info("User-Item interaction counts (head):")
print(user_item_interaction_counts.head())

# Create a pivot table for the utility matrix
try:
    utility_matrix_df = user_item_interaction_counts.pivot(
        index='customerID',
        columns='ISIN',
        values='buy_count'
    ).fillna(0) # Fill NaN (no interaction) with 0
except Exception as e:
    log.error(f"Error creating pivot table, possibly due to duplicate (customerID, ISIN) entries if not aggregated first: {e}")
    # If groupby wasn't done first, duplicates would cause an error here.
    # But user_item_interaction_counts should have unique (customerID, ISIN) due to groupby.
    raise

log.info(f"Utility matrix shape: {utility_matrix_df.shape} (Users x Assets)")
print(utility_matrix_df.head())

# Convert to a sparse matrix for efficiency, especially if large
utility_matrix_sparse = csr_matrix(utility_matrix_df.values)
# Keep mappings for user/item indices
user_id_to_idx = {user_id: i for i, user_id in enumerate(utility_matrix_df.index)}
item_isin_to_idx = {isin: i for i, isin in enumerate(utility_matrix_df.columns)}
idx_to_user_id = {i: user_id for user_id, i in user_id_to_idx.items()}
idx_to_item_isin = {i: isin for isin, i in item_isin_to_idx.items()}

[2025-06-02 11:54:26,143][INFO] User-Item interaction counts (head):
[2025-06-02 11:54:26,149][INFO] Utility matrix shape: (48, 83) (Users x Assets)


  customerID         ISIN  buy_count
0        TC1  ISIN_MOCK_b          1
1        TC1  ISIN_MOCK_q          1
2        TC1  ISIN_MOCK_y          1
3        TC1  ISIN_MOCK_          1
4        TC1  ISIN_MOCK_          1
ISIN        ISIN_MOCK_A  ISIN_MOCK_B  ISIN_MOCK_C  ISIN_MOCK_D  ISIN_MOCK_E  ISIN_MOCK_F  ISIN_MOCK_H  ISIN_MOCK_I  ISIN_MOCK_K  ISIN_MOCK_L  ISIN_MOCK_M  ISIN_MOCK_O  ISIN_MOCK_P  ISIN_MOCK_R  ISIN_MOCK_S  ISIN_MOCK_T  ISIN_MOCK_U  ISIN_MOCK_X  ISIN_MOCK_Y  ISIN_MOCK_Z  ISIN_MOCK_[  ISIN_MOCK_\  ISIN_MOCK_]  ISIN_MOCK__  ISIN_MOCK_`  ISIN_MOCK_a  ISIN_MOCK_b  ISIN_MOCK_c  ISIN_MOCK_e  ISIN_MOCK_g  ISIN_MOCK_h  ISIN_MOCK_i  ISIN_MOCK_j  ISIN_MOCK_k  ISIN_MOCK_l  ISIN_MOCK_m  ISIN_MOCK_o  ISIN_MOCK_p  ISIN_MOCK_q  ISIN_MOCK_r  ISIN_MOCK_s  ISIN_MOCK_t  ISIN_MOCK_u  ISIN_MOCK_v  ISIN_MOCK_x  ISIN_MOCK_y  ISIN_MOCK_{  ISIN_MOCK_|  ISIN_MOCK_}  ISIN_MOCK_~  ISIN_MOCK_  ISIN_MOCK_  ISIN_MOCK_  ISIN_MOCK_  ISIN_MOCK_  ISIN_MOCK_  ISIN_MOCK_  ISIN_MOCK_  ISIN_MOCK_ 

In [72]:
# ---------------------------------------------------------------------------
# 3. Item-Based Collaborative Filtering (Example using KNN similarity)
# ---------------------------------------------------------------------------
log.info("Implementing Item-Based Collaborative Filtering...")

# Calculate item-item similarity (e.g., cosine similarity on the transpose of utility matrix)
# utility_matrix_df.T gives Items x Users
item_similarity_matrix = cosine_similarity(utility_matrix_df.T) # Item-Item similarity
# item_similarity_matrix is a NumPy array where item_similarity_matrix[i, j] is similarity between item i and item j

# Convert to DataFrame for easier lookup
item_similarity_df = pd.DataFrame(item_similarity_matrix,
                                  index=utility_matrix_df.columns,
                                  columns=utility_matrix_df.columns)

log.info("Item-Item Cosine Similarity Matrix (portion):")
print(item_similarity_df.iloc[:5, :5])

def get_item_based_recommendations(customer_id_target: str,
                                   utility_matrix: pd.DataFrame,
                                   item_similarity: pd.DataFrame,
                                   k_similar_items: int = 5,
                                   n_recommendations: int = 5):
    """
    Generates item-based CF recommendations for a user.
    1. Find items the user has interacted with.
    2. For each interacted item, find its k most similar items.
    3. Aggregate similarity scores for candidate items (not yet interacted by user).
    4. Recommend top N.
    """
    if customer_id_target not in utility_matrix.index:
        log.warning(f"Customer {customer_id_target} not found in utility matrix.")
        return []

    user_interactions = utility_matrix.loc[customer_id_target]
    interacted_items = user_interactions[user_interactions > 0].index.tolist() # ISINs
    
    if not interacted_items:
        log.info(f"Customer {customer_id_target} has no interactions. Cannot provide item-based recommendations.")
        return []

    candidate_scores = {} # Dict to store potential recommendations and their scores

    for item_isin_hist in interacted_items:
        if item_isin_hist not in item_similarity.index: # Should not happen if matrix is consistent
            continue
        # Get similarity scores of this historical item with all other items
        similar_to_item_hist = item_similarity[item_isin_hist].sort_values(ascending=False)
        
        for similar_item_isin, similarity_score in similar_to_item_hist.iloc[1:k_similar_items+1].items(): # Skip itself (sim=1)
            if similar_item_isin not in interacted_items: # Don't recommend items already interacted with
                candidate_scores[similar_item_isin] = candidate_scores.get(similar_item_isin, 0) + similarity_score
    
    # Sort candidates by score
    sorted_candidates = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)
    
    recommendations = [item[0] for item in sorted_candidates[:n_recommendations]]
    return recommendations

# Example: Get recommendations for a user
if not customers_df.empty:
    sample_customer_id = customers_df['customerID'].iloc[0]
    log.info(f"\nGetting item-based recommendations for customer: {sample_customer_id}")
    recommendations = get_item_based_recommendations(sample_customer_id, utility_matrix_df, item_similarity_df)
    if recommendations:
        log.info(f"Recommendations: {recommendations}")
        # You could look up asset names from assets_df here
        recs_with_names = assets_df[assets_df['ISIN'].isin(recommendations)][['ISIN', 'VolatilityQuartile']] # Add AssetName if you have it
        print(recs_with_names)
    else:
        log.info("No recommendations generated (e.g., user has no history or no similar items found).")
else:
    log.warning("No customers loaded, cannot get recommendations.")

[2025-06-02 11:54:26,217][INFO] Implementing Item-Based Collaborative Filtering...
[2025-06-02 11:54:26,221][INFO] Item-Item Cosine Similarity Matrix (portion):
[2025-06-02 11:54:26,228][INFO] 
Getting item-based recommendations for customer: TC1
[2025-06-02 11:54:26,234][INFO] Recommendations: ['ISIN_MOCK__', 'ISIN_MOCK_\x80', 'ISIN_MOCK_`', 'ISIN_MOCK_k', 'ISIN_MOCK_\x89']


ISIN         ISIN_MOCK_A  ISIN_MOCK_B  ISIN_MOCK_C  ISIN_MOCK_D  ISIN_MOCK_E
ISIN                                                                        
ISIN_MOCK_A          1.0          0.0          0.0          0.0          0.0
ISIN_MOCK_B          0.0          1.0          0.0          0.0          0.0
ISIN_MOCK_C          0.0          0.0          1.0          0.0          0.0
ISIN_MOCK_D          0.0          0.0          0.0          1.0          0.0
ISIN_MOCK_E          0.0          0.0          0.0          0.0          1.0
           ISIN  VolatilityQuartile
30  ISIN_MOCK__                   2
31  ISIN_MOCK_`                   1
42  ISIN_MOCK_k                   3
63  ISIN_MOCK_                   4
72  ISIN_MOCK_                   2


In [73]:
# CELL 4 MODIFIED FOR DEBUGGING

# ---------------------------------------------------------------------------
# 3. Item-Based Collaborative Filtering (Example using KNN similarity)
# ---------------------------------------------------------------------------
log.info("Implementing Item-Based Collaborative Filtering...")

# Calculate item-item similarity
if utility_matrix_df.shape[1] == 0: # No items/columns
    log.error("Utility matrix has no items (columns). Cannot compute item similarity.")
    item_similarity_df = pd.DataFrame() # Empty DataFrame
else:
    item_similarity_matrix = cosine_similarity(utility_matrix_df.T)
    item_similarity_df = pd.DataFrame(item_similarity_matrix,
                                      index=utility_matrix_df.columns,
                                      columns=utility_matrix_df.columns)
    log.info("Item-Item Cosine Similarity Matrix (portion):")
    print(item_similarity_df.iloc[:min(5, item_similarity_df.shape[0]), :min(5, item_similarity_df.shape[1])])


def get_item_based_recommendations(customer_id_target: str,
                                   utility_matrix: pd.DataFrame,
                                   item_similarity: pd.DataFrame,
                                   k_similar_items: int = 5,
                                   n_recommendations: int = 5):
    log.debug(f"Attempting item-based recommendations for: {customer_id_target}")
    if customer_id_target not in utility_matrix.index:
        log.warning(f"Customer {customer_id_target} not found in utility matrix.")
        return []

    user_interactions = utility_matrix.loc[customer_id_target]
    interacted_items = user_interactions[user_interactions > 0].index.tolist()
    log.debug(f"Customer {customer_id_target} has interacted with items: {interacted_items}")
    
    if not interacted_items:
        log.info(f"Customer {customer_id_target} has no interactions. No item-based recommendations.")
        return []
    
    if item_similarity.empty:
        log.warning("Item similarity matrix is empty. Cannot generate recommendations.")
        return []

    candidate_scores = {}

    for item_isin_hist in interacted_items:
        log.debug(f"  Processing historical item: {item_isin_hist}")
        if item_isin_hist not in item_similarity.index:
            log.warning(f"    Historical item {item_isin_hist} not found in item_similarity matrix index. Skipping.")
            continue
        
        similar_to_item_hist = item_similarity[item_isin_hist].sort_values(ascending=False)
        log.debug(f"    Top similar items to {item_isin_hist} (before filtering):\n{similar_to_item_hist.head(k_similar_items + 2)}")
        
        # Iterate over top k_similar_items, skipping the item itself
        for similar_item_isin, similarity_score in similar_to_item_hist.iloc[1:k_similar_items+1].items():
            log.debug(f"      Considering similar item: {similar_item_isin} with score {similarity_score:.4f}")
            if similar_item_isin not in interacted_items:
                candidate_scores[similar_item_isin] = candidate_scores.get(similar_item_isin, 0) + similarity_score
                log.debug(f"        Added/Updated score for candidate {similar_item_isin}: {candidate_scores[similar_item_isin]:.4f}")
            else:
                log.debug(f"        Skipping {similar_item_isin} as it was already interacted with by the user.")
    
    if not candidate_scores:
        log.info(f"No new candidate items found for {customer_id_target} after considering similar items.")
        return []

    sorted_candidates = sorted(candidate_scores.items(), key=lambda x: x[1], reverse=True)
    log.debug(f"  Sorted candidate scores: {sorted_candidates[:n_recommendations+5]}")
    
    recommendations = [item[0] for item in sorted_candidates[:n_recommendations]]
    return recommendations

# Example: Get recommendations for a user
if not customers_df.empty and not utility_matrix_df.empty: # Ensure utility_matrix_df is not empty
    sample_customer_id = customers_df['customerID'].iloc[0]
    if sample_customer_id in utility_matrix_df.index: # Check if sample customer is in utility matrix
        log.info(f"\nGetting item-based recommendations for customer: {sample_customer_id}")
        recommendations = get_item_based_recommendations(sample_customer_id, utility_matrix_df, item_similarity_df)
        if recommendations:
            log.info(f"Recommendations: {recommendations}")
            recs_with_names = assets_df[assets_df['ISIN'].isin(recommendations)][['ISIN', 'VolatilityQuartile']]
            print("Recommended Assets Details:")
            print(recs_with_names)
        else:
            log.info("No recommendations generated (e.g., user has no history or no new similar items found).")
    else:
        log.warning(f"Sample customer {sample_customer_id} not found in the utility matrix. Utility matrix rows: {utility_matrix_df.index.tolist()[:5]}")
else:
    log.warning("Customers DataFrame or Utility Matrix is empty, cannot get recommendations.")

[2025-06-02 11:54:26,255][INFO] Implementing Item-Based Collaborative Filtering...
[2025-06-02 11:54:26,258][INFO] Item-Item Cosine Similarity Matrix (portion):
[2025-06-02 11:54:26,262][INFO] 
Getting item-based recommendations for customer: TC1
[2025-06-02 11:54:26,272][INFO] Recommendations: ['ISIN_MOCK__', 'ISIN_MOCK_\x80', 'ISIN_MOCK_`', 'ISIN_MOCK_k', 'ISIN_MOCK_\x89']


ISIN         ISIN_MOCK_A  ISIN_MOCK_B  ISIN_MOCK_C  ISIN_MOCK_D  ISIN_MOCK_E
ISIN                                                                        
ISIN_MOCK_A          1.0          0.0          0.0          0.0          0.0
ISIN_MOCK_B          0.0          1.0          0.0          0.0          0.0
ISIN_MOCK_C          0.0          0.0          1.0          0.0          0.0
ISIN_MOCK_D          0.0          0.0          0.0          1.0          0.0
ISIN_MOCK_E          0.0          0.0          0.0          0.0          1.0
Recommended Assets Details:
           ISIN  VolatilityQuartile
30  ISIN_MOCK__                   2
31  ISIN_MOCK_`                   1
42  ISIN_MOCK_k                   3
63  ISIN_MOCK_                   4
72  ISIN_MOCK_                   2


In [74]:
# ---------------------------------------------------------------------------
# X. Advanced 3D Graph Visualization with VisPy & NetworkX
# ---------------------------------------------------------------------------
import networkx as nx
import numpy as np
import random
from vispy import scene
from vispy.scene import visuals
from vispy.color import Color, ColorArray

log.info("\n--- Attempting 3D Graph Visualization with VisPy ---")

# --- Data Subsetting for Visualization (same as previous examples) ---
N_USERS_FOR_VIS = 15
N_ASSETS_PER_USER_HISTORY_FOR_VIS = 5
N_SIMILAR_ASSETS_FOR_VIS = 3
N_RECOMMENDATIONS_TO_HIGHLIGHT = 2 # For a sample user, highlight N recommendations

if utility_matrix_df.empty or item_similarity_df.empty:
    log.warning("VisPy: Utility matrix or item similarity matrix is empty. Cannot create graph.")
else:
    sample_user_id_for_explanation_vp = None
    explained_recommendations_vp = []
    if len(utility_matrix_df.index) > 0:
        user_interaction_counts_vp = utility_matrix_df.sum(axis=1).sort_values(ascending=False)
        if not user_interaction_counts_vp.empty:
            potential_sample_users_vp = user_interaction_counts_vp.head(min(N_USERS_FOR_VIS * 2, len(user_interaction_counts_vp))).index
            sample_user_id_for_explanation_vp = np.random.choice(potential_sample_users_vp)
            log.info(f"VisPy: Sample user for explanation: {sample_user_id_for_explanation_vp}")
            explained_recommendations_vp = get_item_based_recommendations(
                sample_user_id_for_explanation_vp, utility_matrix_df, item_similarity_df,
                k_similar_items=5, n_recommendations=N_RECOMMENDATIONS_TO_HIGHLIGHT
            )
            log.info(f"VisPy: Recommendations for {sample_user_id_for_explanation_vp} to explain: {explained_recommendations_vp}")
    else:
        log.warning("VisPy: No users in utility matrix.")

    # --- Build the NetworkX Graph (same logic, different variable names) ---
    G_vis_vp = nx.Graph()
    nodes_to_add_vp_set = set() # Using a set to store (node_id, type) tuples to avoid duplicates
    edges_to_add_vp_list = [] # List of edge dicts

    # (Logic for populating nodes_to_add_vp_set and edges_to_add_vp_list is identical to the PyVista example,
    #  just ensure you use the _vp suffixed variables if running both examples in the same notebook)

    # 1. Add the sample user and their historical items
    if sample_user_id_for_explanation_vp and sample_user_id_for_explanation_vp in utility_matrix_df.index:
        nodes_to_add_vp_set.add((sample_user_id_for_explanation_vp, 'sample_user'))
        user_hist_interactions_vp = utility_matrix_df.loc[sample_user_id_for_explanation_vp]
        user_hist_items_vp = user_hist_interactions_vp[user_hist_interactions_vp > 0].sort_values(ascending=False).head(N_ASSETS_PER_USER_HISTORY_FOR_VIS).index.tolist()
        for hist_item_isin in user_hist_items_vp:
            nodes_to_add_vp_set.add((hist_item_isin, 'hist_asset'))
            edges_to_add_vp_list.append({'source': sample_user_id_for_explanation_vp, 'target': hist_item_isin, 'type': 'bought', 'weight': user_hist_interactions_vp[hist_item_isin]})
            if hist_item_isin in item_similarity_df.index:
                similar_items_vp = item_similarity_df[hist_item_isin].sort_values(ascending=False).iloc[1:N_SIMILAR_ASSETS_FOR_VIS+1]
                for similar_isin, sim_score in similar_items_vp.items():
                    is_explained_rec = similar_isin in explained_recommendations_vp
                    asset_type_label = 'explained_rec_source' if is_explained_rec else 'similar_asset'
                    if similar_isin not in [item[0] for item in nodes_to_add_vp_set if item[1] == 'hist_asset']:
                         nodes_to_add_vp_set.add((similar_isin, asset_type_label))
                    edges_to_add_vp_list.append({'source': hist_item_isin, 'target': similar_isin, 'type': 'similar_to', 'weight': sim_score})
                    if is_explained_rec:
                        edges_to_add_vp_list.append({'source': sample_user_id_for_explanation_vp, 'target': similar_isin, 'type': 'recommended_because_similar', 'weight': sim_score})

    # Add other context users and their assets (simplified for brevity)
    other_user_ids_vp = [uid for uid in utility_matrix_df.index if uid != sample_user_id_for_explanation_vp]
    context_user_ids_vp = np.random.choice(other_user_ids_vp, min(N_USERS_FOR_VIS -1, len(other_user_ids_vp)), replace=False) if other_user_ids_vp else []
    for user_id in context_user_ids_vp:
        nodes_to_add_vp_set.add((user_id, 'context_user'))
        # ... (add their items and edges as in previous example) ...


    # --- Populate NetworkX Graph and get positions ---
    node_id_list_vp = [] # Keep an ordered list of node IDs for VisPy
    node_attributes_vp = {} # Store attributes for VisPy

    vispy_color_map = { # VisPy expects RGBA arrays (0-1 float or 0-255 int)
        'sample_user': Color('green').rgba, 'hist_asset': Color('orange').rgba,
        'similar_asset': Color('coral').rgba, 'explained_rec_source': Color('yellow').rgba,
        'context_user': Color('lightblue').rgba, 'context_asset': Color('silver').rgba
    }
    vispy_size_map = { # Relative sizes
        'sample_user': 15.0, 'hist_asset': 10.0, 'similar_asset': 8.0, 'explained_rec_source': 12.0,
        'context_user': 7.0, 'context_asset': 6.0
    }

    for node_id, node_type in nodes_to_add_vp_set:
        G_vis_vp.add_node(node_id)
        node_id_list_vp.append(node_id)
        node_attributes_vp[node_id] = {
            'type': node_type,
            'color': vispy_color_map.get(node_type, Color('gray').rgba),
            'size': vispy_size_map.get(node_type, 5.0),
            'label': f"{node_type.replace('_',' ').title()}: {node_id}" # For potential hover/picking
        }
    
    edge_connect_list_vp = [] # List of (idx1, idx2) for edges
    edge_colors_vp = []
    vispy_edge_colors = {'bought': Color('darkgreen', alpha=0.5).rgba, 'similar_to': Color('darkred', alpha=0.7).rgba,
                         'recommended_because_similar': Color('gold', alpha=0.9).rgba, 'bought_by_context': Color('dimgray', alpha=0.3).rgba}

    # Map node IDs to their index in node_id_list_vp for edge connectivity
    node_to_idx_map_vp = {node_id: i for i, node_id in enumerate(node_id_list_vp)}

    for edge_info in edges_to_add_vp_list:
        source_id, target_id = edge_info['source'], edge_info['target']
        if source_id in node_to_idx_map_vp and target_id in node_to_idx_map_vp: # Ensure both nodes are in our visualized set
            G_vis_vp.add_edge(source_id, target_id, type=edge_info['type'], weight=edge_info['weight'])
            edge_connect_list_vp.append((node_to_idx_map_vp[source_id], node_to_idx_map_vp[target_id]))
            edge_colors_vp.append(vispy_edge_colors.get(edge_info['type'], Color('lightgray', alpha=0.5).rgba))


    if G_vis_vp.number_of_nodes() == 0:
        log.warning("VisPy: Graph for visualization is empty. Skipping VisPy vis.")
    else:
        log.info(f"VisPy: NetworkX graph created: {G_vis_vp.number_of_nodes()} nodes, {len(edge_connect_list_vp)} edges for vis.")
        try:
            pos_3d_vp_dict = nx.spring_layout(G_vis_vp, dim=3, seed=42, k=0.9/np.sqrt(G_vis_vp.number_of_nodes()), iterations=60)
            # Convert dict of positions to NumPy array ordered by node_id_list_vp
            node_pos_array_vp = np.array([pos_3d_vp_dict[node_id] for node_id in node_id_list_vp])
            log.info("VisPy: 3D layout calculated.")
        except Exception as e_layout_vp:
            log.error(f"VisPy: Error calculating 3D layout: {e_layout_vp}. Skipping 3D plot.")
            node_pos_array_vp = None

        if node_pos_array_vp is not None:
            # --- Create VisPy Scene ---
            canvas = scene.SceneCanvas(keys='interactive', show=True, bgcolor='white', size=(800,600))
            view = canvas.central_widget.add_view()
            
            # Camera
            view.camera = 'turntable' # Or 'arcball', 'fly', 'panzoom'
            view.camera.fov = 45
            view.camera.distance = np.max(np.ptp(node_pos_array_vp, axis=0)) * 2.5 # Auto-distance based on point cloud spread

            # Prepare node data for VisPy
            node_colors_array_vp = ColorArray([node_attributes_vp[node_id]['color'] for node_id in node_id_list_vp])
            node_sizes_array_vp = np.array([node_attributes_vp[node_id]['size'] for node_id in node_id_list_vp])

            # 1. Add Nodes (Markers visual)
            nodes_vis = visuals.Markers()
            nodes_vis.set_data(node_pos_array_vp, 
                               face_color=node_colors_array_vp, 
                               edge_color=None, # Can add edge_color for borders
                               size=node_sizes_array_vp,
                               scaling=True) # If True, size is in screen space, if False, in data space
            view.add(nodes_vis)

            # 2. Add Edges (Line visual)
            if edge_connect_list_vp: # Only if there are edges
                edge_pos_flat_vp = node_pos_array_vp[np.array(edge_connect_list_vp).flatten()]
                # Reshape to (N_edges, 2, 3) for segmented lines, or use connect='segments'
                
                lines_vis = visuals.Line(pos=node_pos_array_vp, # All node positions
                                         connect=np.array(edge_connect_list_vp), # Connectivity array
                                         color=ColorArray(edge_colors_vp) if edge_colors_vp else 'gray', 
                                         width=2, # Can be an array too for variable widths
                                         method='gl') # 'agg' is anti-aliased but can be slower
                view.add(lines_vis)

            # Add a 3D axis
            visuals.XYZAxis(parent=view.scene)
            
            # To make it run in Jupyter, you might need to start VisPy's Qt event loop
            # For standalone scripts, canvas.app.run() is used.
            # In Jupyter, just showing the canvas might be enough or you might need:
            # from vispy.app import use_app
            # app = use_app() # Get or create an app instance
            # app.run() # This might block if not handled correctly in a notebook
            log.info("VisPy canvas created. If it doesn't show or is not interactive, backend/event loop might need adjustment for Jupyter.")
            
            # For interactivity (hover/click) with VisPy, it's more involved.
            # You'd typically subclass scene.visuals.Markers and override on_mouse_press/on_mouse_move,
            # then perform picking calculations (e.g., ray casting or checking distance to points).
            # This is significantly more complex than Plotly's or PyVista's built-in picking.
            # For "showing why a recommendation was given", you'd need this custom interaction.

            # Example of a simple mouse move event to print camera state (for debugging interaction)
            # @canvas.events.mouse_move.connect
            # def on_mouse_move(event):
            #     # print(f"Camera distance: {view.camera.distance}, center: {view.camera.center}")
            #     pass

[2025-06-02 11:54:26,295][INFO] 
--- Attempting 3D Graph Visualization with VisPy ---
[2025-06-02 11:54:26,299][INFO] VisPy: Sample user for explanation: TC37
[2025-06-02 11:54:26,305][INFO] VisPy: Recommendations for TC37 to explain: ['ISIN_MOCK_y', 'ISIN_MOCK_R']
[2025-06-02 11:54:26,309][INFO] VisPy: NetworkX graph created: 25 nodes, 18 edges for vis.
[2025-06-02 11:54:26,316][INFO] VisPy: 3D layout calculated.


RuntimeError: Could not import backend "jupyter_rfb":
The jupyter_rfb backend relies on a the jupyter_rfb library: ``pip install jupyter_rfb``