In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import pandas as pd


In [4]:
# Load embeddings for propensity category analysis
full_embeddings = np.load("solutions/best/embeddings.npy")
incorrect_embeddings = np.load("incorrect_embeddings/incorrect_embeddings_propensity_category.npy")

print(f"Full embeddings shape: {full_embeddings.shape}")
print(f"Incorrect embeddings shape: {incorrect_embeddings.shape}")
print(f"Data type: PROPENSITY CATEGORY predictions")


Full embeddings shape: (1000000, 1280)
Incorrect embeddings shape: (83453, 1280)
Data type: PROPENSITY CATEGORY predictions


In [5]:
# Define embedding extraction functions
indexes = {
    "add_to_cart": 0,
    "remove_from_cart": 256,
    "product_buys": 512,
    "search_query": 768,
    "page_visits": 1024,
}

def get_embedding(action):
    return full_embeddings[:, indexes[action]:indexes[action] + 256]

def get_incorrect_embedding(action):
    return incorrect_embeddings[:, indexes[action]:indexes[action] + 256]

def value_counts(x):
    unique, counts = np.unique(x, return_counts=True)
    sorted_indices = np.argsort(-counts)
    unique = unique[sorted_indices]
    counts = counts[sorted_indices]
    print(np.asarray((unique, counts)).T)

def count_zero_vectors(arr):
    zero_rows = np.all(arr == 0, axis=1)
    return np.sum(zero_rows)


In [6]:
# Comprehensive analysis across all action types for PROPENSITY CATEGORY
actions = ["add_to_cart", "remove_from_cart", "product_buys", "search_query", "page_visits"]

def analyze_action_propensity_category(action_name):
    """Comprehensive analysis for a single action type - PROPENSITY CATEGORY focus"""
    print(f"\n{'='*60}")
    print(f"ANALYZING ACTION: {action_name.upper()} (PROPENSITY CATEGORY)")
    print(f"{'='*60}")
    
    # Get embeddings for this action
    correct_emb = get_embedding(action_name)
    incorrect_emb = get_incorrect_embedding(action_name)
    
    print(f"=== BASIC STATISTICS ===")
    print(f"Correct embeddings shape: {correct_emb.shape}")
    print(f"Incorrect embeddings shape: {incorrect_emb.shape}")
    print(f"Zero vectors in correct: {count_zero_vectors(correct_emb)}")
    print(f"Zero vectors in incorrect: {count_zero_vectors(incorrect_emb)}")
    
    # Remove zero vectors for fair comparison
    non_zero_correct = correct_emb[~np.all(correct_emb == 0, axis=1)]
    zero_prop_correct = count_zero_vectors(correct_emb) / len(correct_emb)
    zero_prop_incorrect = count_zero_vectors(incorrect_emb) / len(incorrect_emb)
    
    print(f"Non-zero correct embeddings: {non_zero_correct.shape}")
    print(f"Zero vector proportion - Correct: {zero_prop_correct:.4f}, Incorrect: {zero_prop_incorrect:.4f}")
    
    if len(non_zero_correct) == 0:
        print("WARNING: No non-zero correct embeddings found!")
        return None
    
    print(f"\n=== EMBEDDING MAGNITUDES ===")
    correct_norms = np.linalg.norm(non_zero_correct, axis=1)
    incorrect_norms = np.linalg.norm(incorrect_emb, axis=1)
    
    print(f"Correct embeddings - Mean norm: {np.mean(correct_norms):.4f}, Std: {np.std(correct_norms):.4f}")
    print(f"Incorrect embeddings - Mean norm: {np.mean(incorrect_norms):.4f}, Std: {np.std(incorrect_norms):.4f}")
    
    print(f"\n=== STATISTICAL COMPARISON ===")
    print(f"Correct embeddings - Mean: {np.mean(non_zero_correct):.4f}, Std: {np.std(non_zero_correct):.4f}")
    print(f"Incorrect embeddings - Mean: {np.mean(incorrect_emb):.4f}, Std: {np.std(incorrect_emb):.4f}")
    
    # Sample for detailed analysis
    n_samples = min(5000, len(non_zero_correct), len(incorrect_emb))
    np.random.seed(42)
    
    correct_sample_idx = np.random.choice(len(non_zero_correct), n_samples, replace=False)
    incorrect_sample_idx = np.random.choice(len(incorrect_emb), n_samples, replace=False)
    
    correct_sample = non_zero_correct[correct_sample_idx]
    incorrect_sample = incorrect_emb[incorrect_sample_idx]
    
    # Similarity analysis
    print(f"\n=== SIMILARITY ANALYSIS ===")
    n_sim_samples = min(500, len(correct_sample), len(incorrect_sample))
    
    correct_similarities = cosine_similarity(correct_sample[:n_sim_samples])
    incorrect_similarities = cosine_similarity(incorrect_sample[:n_sim_samples])
    cross_similarities = cosine_similarity(correct_sample[:n_sim_samples], incorrect_sample[:n_sim_samples])
    
    def get_upper_triangle(matrix):
        return matrix[np.triu_indices_from(matrix, k=1)]
    
    correct_sim_values = get_upper_triangle(correct_similarities)
    incorrect_sim_values = get_upper_triangle(incorrect_similarities)
    cross_sim_values = cross_similarities.flatten()
    
    print(f"Correct intra-group similarity - Mean: {np.mean(correct_sim_values):.4f}, Std: {np.std(correct_sim_values):.4f}")
    print(f"Incorrect intra-group similarity - Mean: {np.mean(incorrect_sim_values):.4f}, Std: {np.std(incorrect_sim_values):.4f}")
    print(f"Cross-group similarity - Mean: {np.mean(cross_sim_values):.4f}, Std: {np.std(cross_sim_values):.4f}")
    
    # Statistical tests
    print(f"\n=== STATISTICAL TESTS ===")
    ks_stat, ks_pvalue = stats.ks_2samp(correct_sample.flatten(), incorrect_sample.flatten())
    print(f"KS test - Statistic: {ks_stat:.4f}, p-value: {ks_pvalue:.2e}")
    
    correct_sample_norms = np.linalg.norm(correct_sample, axis=1)
    incorrect_sample_norms = np.linalg.norm(incorrect_sample, axis=1)
    mw_stat, mw_pvalue = stats.mannwhitneyu(correct_sample_norms, incorrect_sample_norms)
    print(f"Mann-Whitney U test on norms - Statistic: {mw_stat:.0f}, p-value: {mw_pvalue:.2e}")
    
    return {
        'action': action_name,
        'zero_prop_correct': zero_prop_correct,
        'zero_prop_incorrect': zero_prop_incorrect,
        'correct_norm_mean': np.mean(correct_norms),
        'incorrect_norm_mean': np.mean(incorrect_norms),
        'correct_sim_mean': np.mean(correct_sim_values),
        'incorrect_sim_mean': np.mean(incorrect_sim_values),
        'cross_sim_mean': np.mean(cross_sim_values),
        'ks_stat': ks_stat,
        'ks_pvalue': ks_pvalue
    }

# Run analysis for all actions
print("Starting comprehensive PROPENSITY CATEGORY analysis across all actions...")
results = []
for action in actions:
    result = analyze_action_propensity_category(action)
    if result is not None:
        results.append(result)


Starting comprehensive PROPENSITY CATEGORY analysis across all actions...

ANALYZING ACTION: ADD_TO_CART (PROPENSITY CATEGORY)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (83453, 256)
Zero vectors in correct: 387456
Zero vectors in incorrect: 18760
Non-zero correct embeddings: (612544, 256)
Zero vector proportion - Correct: 0.3875, Incorrect: 0.2248

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 14.1094, Std: inf
Incorrect embeddings - Mean norm: 10.0392, Std: 5.6800

=== STATISTICAL COMPARISON ===


  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Correct embeddings - Mean: -0.0011, Std: inf
Incorrect embeddings - Mean: -0.0008, Std: 0.7209

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.5732, Std: 0.1400
Incorrect intra-group similarity - Mean: 0.3370, Std: 0.2847
Cross-group similarity - Mean: 0.4366, Std: 0.2576

=== STATISTICAL TESTS ===
KS test - Statistic: 0.1151, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 18676668, p-value: 0.00e+00

ANALYZING ACTION: REMOVE_FROM_CART (PROPENSITY CATEGORY)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (83453, 256)
Zero vectors in correct: 675606
Zero vectors in incorrect: 41485
Non-zero correct embeddings: (324394, 256)
Zero vector proportion - Correct: 0.6756, Incorrect: 0.4971

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 14.0547, Std: inf
Incorrect embeddings - Mean norm: 6.5020, Std: 6.6422

=== STATISTICAL COMPARISON ===


  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Correct embeddings - Mean: -0.0016, Std: inf
Incorrect embeddings - Mean: -0.0008, Std: 0.5809

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.4504, Std: 0.1172
Incorrect intra-group similarity - Mean: 0.0960, Std: 0.1914
Cross-group similarity - Mean: 0.2062, Std: 0.2353

=== STATISTICAL TESTS ===
KS test - Statistic: 0.2485, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 20746153, p-value: 0.00e+00

ANALYZING ACTION: PRODUCT_BUYS (PROPENSITY CATEGORY)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (83453, 256)
Zero vectors in correct: 492448
Zero vectors in incorrect: 68
Non-zero correct embeddings: (507552, 256)
Zero vector proportion - Correct: 0.4924, Incorrect: 0.0008

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 14.8125, Std: inf
Incorrect embeddings - Mean norm: 13.9472, Std: 1.7745

=== STATISTICAL COMPARISON ===
Correct embeddings - Mean: -0.0027, Std: inf
Incorrect embeddings - Mean: -0.0028, Std: 0.8787

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.4733, Std: 0.1347
Incorrect intra-group similarity - Mean: 0.4751, Std: 0.1441
Cross-group similarity - Mean: 0.4723, Std: 0.1393

=== STATISTICAL TESTS ===
KS test - Statistic: 0.0148, p-value: 6.27e-122
Mann-Whitney U test on norms - Statistic: 16418376, p-value: 2.82e-162

ANALYZING ACTION: SEARCH_QUERY (PROPENSITY CATEGORY)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (83453, 256)
Zero vectors in correct: 676832
Zero vectors in incorrect: 38316
Non-zero correct embeddings: (323168, 256)
Zero vector proportion - Correct: 0.6768, Incorrect: 0.4591

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 12.2031, Std: inf
Incorrect embeddings - Mean norm: 6.5569, Std: 6.2339

=== STATISTICAL COMPARISON ===
Correct embeddings - Mean: 0.0029, Std: inf
Incorrect embeddings - Mean: 0.0013, Std: 0.5655

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.4496, Std: 0.1696
Incorrect intra-group similarity - Mean: 0.1394, Std: 0.2700
Cross-group similarity - Mean: 0.2222, Std: 0.2640

=== STATISTICAL TESTS ===
KS test - Statistic: 0.2352, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 18524975, p-value: 0.00e+00

ANALYZING ACTION: PAGE_VISITS (PROPENSITY CATEGORY)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (83453, 256)
Zero vectors in correct: 173519
Zero vectors in incorrect: 15695
Non-zero correct embeddings: (826481, 256)
Zero vector proportion - Correct: 0.1735, Incorrect: 0.1881

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 10.9766, Std: inf
Incorrect embeddings - Mean norm: 8.6168, Std: 4.4545

=== STATISTICAL COMPARISON ===
Correct embeddings - Mean: 0.0018, Std: inf
Incorrect embeddings - Mean: 0.0016, Std: 0.6063

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.5337, Std: 0.1307
Incorrect intra-group similarity - Mean: 0.4017, Std: 0.3005
Cross-group similarity - Mean: 0.4352, Std: 0.2381

=== STATISTICAL TESTS ===
KS test - Statistic: 0.0948, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 16221382, p-value: 1.10e-146


In [7]:
# Create summary comparison across all actions for PROPENSITY CATEGORY
print(f"\n{'='*80}")
print("SUMMARY COMPARISON ACROSS ALL ACTIONS - PROPENSITY CATEGORY")
print(f"{'='*80}")

if results:
    df = pd.DataFrame(results)
    print(df.to_string(index=False, float_format='%.4f'))
    
    # Key insights
    print(f"\n=== KEY INSIGHTS ACROSS ACTIONS (PROPENSITY CATEGORY) ===")
    
    print("\n1. ACTIVITY PATTERNS (Zero Vector Proportions):")
    for _, row in df.iterrows():
        print(f"   {row['action']:15} - Correct: {row['zero_prop_correct']:.1%}, Incorrect: {row['zero_prop_incorrect']:.1%}")
    
    print("\n2. EMBEDDING MAGNITUDE DIFFERENCES:")
    for _, row in df.iterrows():
        norm_diff = row['incorrect_norm_mean'] - row['correct_norm_mean']
        direction = "HIGHER" if norm_diff > 0 else "LOWER"
        print(f"   {row['action']:15} - Incorrect norms {direction} by {abs(norm_diff):.3f}")
    
    print("\n3. SIMILARITY PATTERNS:")
    for _, row in df.iterrows():
        cross_sim = row['cross_sim_mean']
        status = "HIGH" if cross_sim > 0.5 else "MODERATE" if cross_sim > 0.2 else "LOW"
        print(f"   {row['action']:15} - Cross-group similarity: {cross_sim:.3f} ({status})")
    
    print("\n4. STATISTICAL SIGNIFICANCE:")
    for _, row in df.iterrows():
        significance = "SIGNIFICANT" if row['ks_pvalue'] < 0.001 else "NOT SIGNIFICANT"
        print(f"   {row['action']:15} - KS test: {significance} (p={row['ks_pvalue']:.2e})")
        
    # Find most problematic actions
    print("\n5. MOST PROBLEMATIC ACTIONS (ranked by issues):")
    df['problem_score'] = (
        (df['zero_prop_incorrect'] < df['zero_prop_correct']) * 1 +  # Incorrect has fewer zeros
        (df['cross_sim_mean'] > 0.3) * 1 +  # High cross-similarity
        (df['ks_pvalue'] < 0.001) * 1  # Statistically different
    )
    problematic = df.sort_values('problem_score', ascending=False)
    for _, row in problematic.iterrows():
        print(f"   {row['action']:15} - Problem score: {row['problem_score']}/3")

else:
    print("No results to analyze.")



SUMMARY COMPARISON ACROSS ALL ACTIONS - PROPENSITY CATEGORY
          action  zero_prop_correct  zero_prop_incorrect  correct_norm_mean  incorrect_norm_mean  correct_sim_mean  incorrect_sim_mean  cross_sim_mean  ks_stat  ks_pvalue
     add_to_cart             0.3875               0.2248            14.1094              10.0392            0.5732              0.3370          0.4366   0.1151     0.0000
remove_from_cart             0.6756               0.4971            14.0547               6.5020            0.4504              0.0960          0.2062   0.2485     0.0000
    product_buys             0.4924               0.0008            14.8125              13.9472            0.4733              0.4751          0.4723   0.0148     0.0000
    search_query             0.6768               0.4591            12.2031               6.5569            0.4496              0.1394          0.2222   0.2352     0.0000
     page_visits             0.1735               0.1881            10.9766         

In [8]:
# Final analysis and recommendations for PROPENSITY CATEGORY
print(f"\n{'='*80}")
print("FINAL ANALYSIS AND RECOMMENDATIONS - PROPENSITY CATEGORY")
print(f"{'='*80}")

if results:
    # Identify patterns across all actions
    print("\n=== COMMON PATTERNS ACROSS ALL ACTIONS (PROPENSITY CATEGORY) ===")
    
    activity_bias_actions = [r['action'] for r in results if r['zero_prop_incorrect'] < r['zero_prop_correct']]
    high_cross_sim_actions = [r['action'] for r in results if r['cross_sim_mean'] > 0.3]
    norm_difference_actions = [r['action'] for r in results if abs(r['incorrect_norm_mean'] - r['correct_norm_mean']) > 0.1]
    
    print(f"1. ACTIVITY BIAS (incorrect has fewer inactive users): {activity_bias_actions}")
    print(f"2. HIGH CROSS-SIMILARITY (>0.3): {high_cross_sim_actions}")
    print(f"3. SIGNIFICANT NORM DIFFERENCES (>0.1): {norm_difference_actions}")
    
    print(f"\n=== ROOT CAUSE ANALYSIS (PROPENSITY CATEGORY) ===")
    print("The model's propensity category predictions show:")
    
    print("\n1. CATEGORY PREDICTION CHALLENGES:")
    print("   - Model struggles to predict which product categories users will engage with")
    print("   - Category preferences may be more nuanced than captured in embeddings")
    print("   - User behavior patterns may not translate well to category-level predictions")
    
    print("\n2. EMBEDDING SPACE ISSUES:")
    norm_higher_count = sum(1 for r in results if r['incorrect_norm_mean'] > r['correct_norm_mean'])
    if norm_higher_count > len(results) / 2:
        print("   - Incorrect category predictions have higher embedding magnitudes")
        print("   - Model may be overconfident about wrong category preferences")
        print("   - Category representations may be too coarse-grained")
    
    print("\n3. CATEGORY-SPECIFIC PATTERNS:")
    poor_separation_count = sum(1 for r in results if r['cross_sim_mean'] > 0.3)
    if poor_separation_count > 0:
        print(f"   - {poor_separation_count}/{len(results)} actions show poor category prediction separation")
        print("   - User embeddings may not capture category preferences well")
        print("   - Cross-category similarities may be too high")
    
    print(f"\n=== RECOMMENDATIONS FOR PROPENSITY CATEGORY MODELING ===")
    print("1. CATEGORY-SPECIFIC FEATURES:")
    print("   - Add explicit category interaction features")
    print("   - Consider category hierarchy information")
    print("   - Include category co-occurrence patterns")
    
    print("2. MODEL ARCHITECTURE:")
    print("   - Consider category-specific embedding spaces")
    print("   - Implement hierarchical category modeling")
    print("   - Add category attention mechanisms")
    
    print("3. DATA REPRESENTATION:")
    print("   - Improve category embedding quality")
    print("   - Consider temporal category preferences")
    print("   - Add category-level behavioral features")
    
    print("4. EVALUATION APPROACH:")
    print("   - Category-specific evaluation metrics")
    print("   - Consider category diversity in recommendations")
    print("   - Evaluate category coverage and novelty")

else:
    print("No results to analyze.")



FINAL ANALYSIS AND RECOMMENDATIONS - PROPENSITY CATEGORY

=== COMMON PATTERNS ACROSS ALL ACTIONS (PROPENSITY CATEGORY) ===
1. ACTIVITY BIAS (incorrect has fewer inactive users): ['add_to_cart', 'remove_from_cart', 'product_buys', 'search_query']
2. HIGH CROSS-SIMILARITY (>0.3): ['add_to_cart', 'product_buys', 'page_visits']
3. SIGNIFICANT NORM DIFFERENCES (>0.1): ['add_to_cart', 'remove_from_cart', 'product_buys', 'search_query', 'page_visits']

=== ROOT CAUSE ANALYSIS (PROPENSITY CATEGORY) ===
The model's propensity category predictions show:

1. CATEGORY PREDICTION CHALLENGES:
   - Model struggles to predict which product categories users will engage with
   - Category preferences may be more nuanced than captured in embeddings
   - User behavior patterns may not translate well to category-level predictions

2. EMBEDDING SPACE ISSUES:

3. CATEGORY-SPECIFIC PATTERNS:
   - 3/5 actions show poor category prediction separation
   - User embeddings may not capture category preferences we