In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import pandas as pd


In [4]:
# Load embeddings for propensity SKU analysis
full_embeddings = np.load("solutions/best/embeddings.npy")
incorrect_embeddings = np.load("incorrect_embeddings/incorrect_embeddings_propensity_sku.npy")

print(f"Full embeddings shape: {full_embeddings.shape}")
print(f"Incorrect embeddings shape: {incorrect_embeddings.shape}")
print(f"Data type: PROPENSITY SKU predictions")


Full embeddings shape: (1000000, 1280)
Incorrect embeddings shape: (5939, 1280)
Data type: PROPENSITY SKU predictions


In [5]:
# Define embedding extraction functions
indexes = {
    "add_to_cart": 0,
    "remove_from_cart": 256,
    "product_buys": 512,
    "search_query": 768,
    "page_visits": 1024,
}

def get_embedding(action):
    return full_embeddings[:, indexes[action]:indexes[action] + 256]

def get_incorrect_embedding(action):
    return incorrect_embeddings[:, indexes[action]:indexes[action] + 256]

def value_counts(x):
    unique, counts = np.unique(x, return_counts=True)
    sorted_indices = np.argsort(-counts)
    unique = unique[sorted_indices]
    counts = counts[sorted_indices]
    print(np.asarray((unique, counts)).T)

def count_zero_vectors(arr):
    zero_rows = np.all(arr == 0, axis=1)
    return np.sum(zero_rows)


In [6]:
# Comprehensive analysis across all action types for PROPENSITY SKU
actions = ["add_to_cart", "remove_from_cart", "product_buys", "search_query", "page_visits"]

def analyze_action_propensity_sku(action_name):
    """Comprehensive analysis for a single action type - PROPENSITY SKU focus"""
    print(f"\n{'='*60}")
    print(f"ANALYZING ACTION: {action_name.upper()} (PROPENSITY SKU)")
    print(f"{'='*60}")
    
    # Get embeddings for this action
    correct_emb = get_embedding(action_name)
    incorrect_emb = get_incorrect_embedding(action_name)
    
    print(f"=== BASIC STATISTICS ===")
    print(f"Correct embeddings shape: {correct_emb.shape}")
    print(f"Incorrect embeddings shape: {incorrect_emb.shape}")
    print(f"Zero vectors in correct: {count_zero_vectors(correct_emb)}")
    print(f"Zero vectors in incorrect: {count_zero_vectors(incorrect_emb)}")
    
    # Remove zero vectors for fair comparison
    non_zero_correct = correct_emb[~np.all(correct_emb == 0, axis=1)]
    zero_prop_correct = count_zero_vectors(correct_emb) / len(correct_emb)
    zero_prop_incorrect = count_zero_vectors(incorrect_emb) / len(incorrect_emb)
    
    print(f"Non-zero correct embeddings: {non_zero_correct.shape}")
    print(f"Zero vector proportion - Correct: {zero_prop_correct:.4f}, Incorrect: {zero_prop_incorrect:.4f}")
    
    if len(non_zero_correct) == 0:
        print("WARNING: No non-zero correct embeddings found!")
        return None
    
    print(f"\n=== EMBEDDING MAGNITUDES ===")
    correct_norms = np.linalg.norm(non_zero_correct, axis=1)
    incorrect_norms = np.linalg.norm(incorrect_emb, axis=1)
    
    print(f"Correct embeddings - Mean norm: {np.mean(correct_norms):.4f}, Std: {np.std(correct_norms):.4f}")
    print(f"Incorrect embeddings - Mean norm: {np.mean(incorrect_norms):.4f}, Std: {np.std(incorrect_norms):.4f}")
    
    print(f"\n=== STATISTICAL COMPARISON ===")
    print(f"Correct embeddings - Mean: {np.mean(non_zero_correct):.4f}, Std: {np.std(non_zero_correct):.4f}")
    print(f"Incorrect embeddings - Mean: {np.mean(incorrect_emb):.4f}, Std: {np.std(incorrect_emb):.4f}")
    
    # Sample for detailed analysis
    n_samples = min(5000, len(non_zero_correct), len(incorrect_emb))
    np.random.seed(42)
    
    correct_sample_idx = np.random.choice(len(non_zero_correct), n_samples, replace=False)
    incorrect_sample_idx = np.random.choice(len(incorrect_emb), n_samples, replace=False)
    
    correct_sample = non_zero_correct[correct_sample_idx]
    incorrect_sample = incorrect_emb[incorrect_sample_idx]
    
    # Similarity analysis
    print(f"\n=== SIMILARITY ANALYSIS ===")
    n_sim_samples = min(500, len(correct_sample), len(incorrect_sample))
    
    correct_similarities = cosine_similarity(correct_sample[:n_sim_samples])
    incorrect_similarities = cosine_similarity(incorrect_sample[:n_sim_samples])
    cross_similarities = cosine_similarity(correct_sample[:n_sim_samples], incorrect_sample[:n_sim_samples])
    
    def get_upper_triangle(matrix):
        return matrix[np.triu_indices_from(matrix, k=1)]
    
    correct_sim_values = get_upper_triangle(correct_similarities)
    incorrect_sim_values = get_upper_triangle(incorrect_similarities)
    cross_sim_values = cross_similarities.flatten()
    
    print(f"Correct intra-group similarity - Mean: {np.mean(correct_sim_values):.4f}, Std: {np.std(correct_sim_values):.4f}")
    print(f"Incorrect intra-group similarity - Mean: {np.mean(incorrect_sim_values):.4f}, Std: {np.std(incorrect_sim_values):.4f}")
    print(f"Cross-group similarity - Mean: {np.mean(cross_sim_values):.4f}, Std: {np.std(cross_sim_values):.4f}")
    
    # Statistical tests
    print(f"\n=== STATISTICAL TESTS ===")
    ks_stat, ks_pvalue = stats.ks_2samp(correct_sample.flatten(), incorrect_sample.flatten())
    print(f"KS test - Statistic: {ks_stat:.4f}, p-value: {ks_pvalue:.2e}")
    
    correct_sample_norms = np.linalg.norm(correct_sample, axis=1)
    incorrect_sample_norms = np.linalg.norm(incorrect_sample, axis=1)
    mw_stat, mw_pvalue = stats.mannwhitneyu(correct_sample_norms, incorrect_sample_norms)
    print(f"Mann-Whitney U test on norms - Statistic: {mw_stat:.0f}, p-value: {mw_pvalue:.2e}")
    
    return {
        'action': action_name,
        'zero_prop_correct': zero_prop_correct,
        'zero_prop_incorrect': zero_prop_incorrect,
        'correct_norm_mean': np.mean(correct_norms),
        'incorrect_norm_mean': np.mean(incorrect_norms),
        'correct_sim_mean': np.mean(correct_sim_values),
        'incorrect_sim_mean': np.mean(incorrect_sim_values),
        'cross_sim_mean': np.mean(cross_sim_values),
        'ks_stat': ks_stat,
        'ks_pvalue': ks_pvalue
    }

# Run analysis for all actions
print("Starting comprehensive PROPENSITY SKU analysis across all actions...")
results = []
for action in actions:
    result = analyze_action_propensity_sku(action)
    if result is not None:
        results.append(result)


Starting comprehensive PROPENSITY SKU analysis across all actions...

ANALYZING ACTION: ADD_TO_CART (PROPENSITY SKU)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (5939, 256)
Zero vectors in correct: 387456
Zero vectors in incorrect: 1022
Non-zero correct embeddings: (612544, 256)
Zero vector proportion - Correct: 0.3875, Incorrect: 0.1721

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 14.1094, Std: inf
Incorrect embeddings - Mean norm: 9.9501, Std: 4.9345

=== STATISTICAL COMPARISON ===


  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Correct embeddings - Mean: -0.0011, Std: inf
Incorrect embeddings - Mean: -0.0010, Std: 0.6942

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.5732, Std: 0.1400
Incorrect intra-group similarity - Mean: 0.3345, Std: 0.2350
Cross-group similarity - Mean: 0.3968, Std: 0.2114

=== STATISTICAL TESTS ===
KS test - Statistic: 0.0865, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 20523443, p-value: 0.00e+00

ANALYZING ACTION: REMOVE_FROM_CART (PROPENSITY SKU)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (5939, 256)
Zero vectors in correct: 675606
Zero vectors in incorrect: 2431
Non-zero correct embeddings: (324394, 256)
Zero vector proportion - Correct: 0.6756, Incorrect: 0.4093

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 14.0547, Std: inf
Incorrect embeddings - Mean norm: 7.2641, Std: 6.3012

=== STATISTICAL COMPARISON ===


  ret = umr_sum(x, axis, dtype, out, keepdims=keepdims, where=where)


Correct embeddings - Mean: -0.0016, Std: inf
Incorrect embeddings - Mean: -0.0013, Std: 0.6010

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.4504, Std: 0.1172
Incorrect intra-group similarity - Mean: 0.1427, Std: 0.2034
Cross-group similarity - Mean: 0.2406, Std: 0.2173

=== STATISTICAL TESTS ===
KS test - Statistic: 0.2060, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 21111784, p-value: 0.00e+00

ANALYZING ACTION: PRODUCT_BUYS (PROPENSITY SKU)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (5939, 256)
Zero vectors in correct: 492448
Zero vectors in incorrect: 69
Non-zero correct embeddings: (507552, 256)
Zero vector proportion - Correct: 0.4924, Incorrect: 0.0116

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 14.8125, Std: inf
Incorrect embeddings - Mean norm: 12.7140, Std: 2.5182

=== STATISTICAL COMPARISON ===
Correct embeddings - Mean: -0.0027, Std: inf
Incorrect embeddings - Mean: -0.0032, Std: 0.8101

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.4733, Std: 0.1347
Incorrect intra-group similarity - Mean: 0.4089, Std: 0.1314
Cross-group similarity - Mean: 0.3946, Std: 0.1235

=== STATISTICAL TESTS ===
KS test - Statistic: 0.0384, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 20533537, p-value: 0.00e+00

ANALYZING ACTION: SEARCH_QUERY (PROPENSITY SKU)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (5939, 256)
Zero vectors in correct: 676832
Zero vectors in incorrect: 2311
Non-zero correct embeddings: (323168, 256)
Zero vector proportion - Correct: 0.6768, Incorrect: 0.3891

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 12.2031, Std: inf
Incorrect embeddings - Mean norm: 7.4928, Std: 6.1923

=== STATISTICAL COMPARISON ===
Correct embeddings - Mean: 0.0029, Std: inf
Incorrect embeddings - Mean: 0.0014, Std: 0.6075

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.4496, Std: 0.1696
Incorrect intra-group similarity - Mean: 0.1873, Std: 0.2950
Cross-group similarity - Mean: 0.2552, Std: 0.2656

=== STATISTICAL TESTS ===
KS test - Statistic: 0.1953, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 17258687, p-value: 3.98e-240

ANALYZING ACTION: PAGE_VISITS (PROPENSITY SKU)
=== BASIC STATISTICS ===
Correct embeddings shape: (1000000, 256)
Incorrect embeddings shape: (5939, 256)
Zero vectors in correct: 173519
Zero vectors in incorrect: 815
Non-zero correct embeddings: (826481, 256)
Zero vector proportion - Correct: 0.1735, Incorrect: 0.1372

=== EMBEDDING MAGNITUDES ===


  arrmean = umr_sum(arr, axis, dtype, keepdims=True, where=where)


Correct embeddings - Mean norm: 10.9766, Std: inf
Incorrect embeddings - Mean norm: 9.2495, Std: 4.0922

=== STATISTICAL COMPARISON ===
Correct embeddings - Mean: 0.0018, Std: inf
Incorrect embeddings - Mean: 0.0018, Std: 0.6321

=== SIMILARITY ANALYSIS ===
Correct intra-group similarity - Mean: 0.5337, Std: 0.1307
Incorrect intra-group similarity - Mean: 0.4485, Std: 0.2937
Cross-group similarity - Mean: 0.4387, Std: 0.2181

=== STATISTICAL TESTS ===
KS test - Statistic: 0.0703, p-value: 0.00e+00
Mann-Whitney U test on norms - Statistic: 15256561, p-value: 2.52e-81


In [7]:
# Create summary comparison across all actions for PROPENSITY SKU
print(f"\n{'='*80}")
print("SUMMARY COMPARISON ACROSS ALL ACTIONS - PROPENSITY SKU")
print(f"{'='*80}")

if results:
    df = pd.DataFrame(results)
    print(df.to_string(index=False, float_format='%.4f'))
    
    # Key insights
    print(f"\n=== KEY INSIGHTS ACROSS ACTIONS (PROPENSITY SKU) ===")
    
    print("\n1. ACTIVITY PATTERNS (Zero Vector Proportions):")
    for _, row in df.iterrows():
        print(f"   {row['action']:15} - Correct: {row['zero_prop_correct']:.1%}, Incorrect: {row['zero_prop_incorrect']:.1%}")
    
    print("\n2. EMBEDDING MAGNITUDE DIFFERENCES:")
    for _, row in df.iterrows():
        norm_diff = row['incorrect_norm_mean'] - row['correct_norm_mean']
        direction = "HIGHER" if norm_diff > 0 else "LOWER"
        print(f"   {row['action']:15} - Incorrect norms {direction} by {abs(norm_diff):.3f}")
    
    print("\n3. SIMILARITY PATTERNS:")
    for _, row in df.iterrows():
        cross_sim = row['cross_sim_mean']
        status = "HIGH" if cross_sim > 0.5 else "MODERATE" if cross_sim > 0.2 else "LOW"
        print(f"   {row['action']:15} - Cross-group similarity: {cross_sim:.3f} ({status})")
    
    print("\n4. STATISTICAL SIGNIFICANCE:")
    for _, row in df.iterrows():
        significance = "SIGNIFICANT" if row['ks_pvalue'] < 0.001 else "NOT SIGNIFICANT"
        print(f"   {row['action']:15} - KS test: {significance} (p={row['ks_pvalue']:.2e})")
        
    # Find most problematic actions
    print("\n5. MOST PROBLEMATIC ACTIONS (ranked by issues):")
    df['problem_score'] = (
        (df['zero_prop_incorrect'] < df['zero_prop_correct']) * 1 +  # Incorrect has fewer zeros
        (df['cross_sim_mean'] > 0.3) * 1 +  # High cross-similarity
        (df['ks_pvalue'] < 0.001) * 1  # Statistically different
    )
    problematic = df.sort_values('problem_score', ascending=False)
    for _, row in problematic.iterrows():
        print(f"   {row['action']:15} - Problem score: {row['problem_score']}/3")

else:
    print("No results to analyze.")



SUMMARY COMPARISON ACROSS ALL ACTIONS - PROPENSITY SKU
          action  zero_prop_correct  zero_prop_incorrect  correct_norm_mean  incorrect_norm_mean  correct_sim_mean  incorrect_sim_mean  cross_sim_mean  ks_stat  ks_pvalue
     add_to_cart             0.3875               0.1721            14.1094               9.9501            0.5732              0.3345          0.3968   0.0865     0.0000
remove_from_cart             0.6756               0.4093            14.0547               7.2641            0.4504              0.1427          0.2406   0.2060     0.0000
    product_buys             0.4924               0.0116            14.8125              12.7140            0.4733              0.4089          0.3946   0.0384     0.0000
    search_query             0.6768               0.3891            12.2031               7.4928            0.4496              0.1873          0.2552   0.1953     0.0000
     page_visits             0.1735               0.1372            10.9766              

In [8]:
# Final analysis and recommendations for PROPENSITY SKU
print(f"\n{'='*80}")
print("FINAL ANALYSIS AND RECOMMENDATIONS - PROPENSITY SKU")
print(f"{'='*80}")

if results:
    # Identify patterns across all actions
    print("\n=== COMMON PATTERNS ACROSS ALL ACTIONS (PROPENSITY SKU) ===")
    
    activity_bias_actions = [r['action'] for r in results if r['zero_prop_incorrect'] < r['zero_prop_correct']]
    high_cross_sim_actions = [r['action'] for r in results if r['cross_sim_mean'] > 0.3]
    norm_difference_actions = [r['action'] for r in results if abs(r['incorrect_norm_mean'] - r['correct_norm_mean']) > 0.1]
    
    print(f"1. ACTIVITY BIAS (incorrect has fewer inactive users): {activity_bias_actions}")
    print(f"2. HIGH CROSS-SIMILARITY (>0.3): {high_cross_sim_actions}")
    print(f"3. SIGNIFICANT NORM DIFFERENCES (>0.1): {norm_difference_actions}")
    
    print(f"\n=== ROOT CAUSE ANALYSIS (PROPENSITY SKU) ===")
    print("The model's propensity SKU predictions show:")
    
    print("\n1. SKU-LEVEL PREDICTION CHALLENGES:")
    print("   - Model struggles with fine-grained SKU-level predictions")
    print("   - SKU preferences are highly individual and context-dependent")
    print("   - User behavior patterns may not capture specific product affinities")
    print("   - High SKU diversity makes prediction extremely challenging")
    
    print("\n2. EMBEDDING GRANULARITY ISSUES:")
    norm_higher_count = sum(1 for r in results if r['incorrect_norm_mean'] > r['correct_norm_mean'])
    if norm_higher_count > len(results) / 2:
        print("   - Incorrect SKU predictions have higher embedding magnitudes")
        print("   - Model may be overconfident about wrong specific products")
        print("   - SKU-level features may be too sparse or noisy")
    
    print("\n3. PRODUCT-SPECIFIC PATTERNS:")
    poor_separation_count = sum(1 for r in results if r['cross_sim_mean'] > 0.3)
    if poor_separation_count > 0:
        print(f"   - {poor_separation_count}/{len(results)} actions show poor SKU prediction separation")
        print("   - User embeddings may not capture specific product preferences")
        print("   - SKU-level distinctions may be too subtle for current embeddings")
    
    print(f"\n=== RECOMMENDATIONS FOR PROPENSITY SKU MODELING ===")
    print("1. PRODUCT-SPECIFIC FEATURES:")
    print("   - Add detailed product attributes and descriptions")
    print("   - Include product similarity and substitution patterns")
    print("   - Consider product lifecycle and popularity features")
    print("   - Add brand, price, and attribute preferences")
    
    print("2. MODEL ARCHITECTURE:")
    print("   - Implement hierarchical modeling (category -> brand -> SKU)")
    print("   - Use product2vec or similar product embeddings")
    print("   - Consider collaborative filtering for SKU-level predictions")
    print("   - Add product attention mechanisms")
    
    print("3. DATA REPRESENTATION:")
    print("   - Improve SKU embedding quality with richer features")
    print("   - Consider temporal SKU preferences and seasonality")
    print("   - Add cross-SKU interaction patterns")
    print("   - Include purchase context (occasion, seasonality)")
    
    print("4. EVALUATION APPROACH:")
    print("   - SKU-specific evaluation metrics")
    print("   - Consider recommendation diversity vs precision trade-offs")
    print("   - Evaluate long-tail SKU coverage")
    print("   - Use hierarchical evaluation (category accuracy -> SKU accuracy)")
    
    print("5. ALTERNATIVE APPROACHES:")
    print("   - Consider ensemble with category-level predictions")
    print("   - Implement cold-start strategies for new SKUs")
    print("   - Use content-based filtering for sparse SKUs")
    print("   - Consider popularity-based fallbacks")

else:
    print("No results to analyze.")



FINAL ANALYSIS AND RECOMMENDATIONS - PROPENSITY SKU

=== COMMON PATTERNS ACROSS ALL ACTIONS (PROPENSITY SKU) ===
1. ACTIVITY BIAS (incorrect has fewer inactive users): ['add_to_cart', 'remove_from_cart', 'product_buys', 'search_query', 'page_visits']
2. HIGH CROSS-SIMILARITY (>0.3): ['add_to_cart', 'product_buys', 'page_visits']
3. SIGNIFICANT NORM DIFFERENCES (>0.1): ['add_to_cart', 'remove_from_cart', 'product_buys', 'search_query', 'page_visits']

=== ROOT CAUSE ANALYSIS (PROPENSITY SKU) ===
The model's propensity SKU predictions show:

1. SKU-LEVEL PREDICTION CHALLENGES:
   - Model struggles with fine-grained SKU-level predictions
   - SKU preferences are highly individual and context-dependent
   - User behavior patterns may not capture specific product affinities
   - High SKU diversity makes prediction extremely challenging

2. EMBEDDING GRANULARITY ISSUES:

3. PRODUCT-SPECIFIC PATTERNS:
   - 3/5 actions show poor SKU prediction separation
   - User embeddings may not capture s