In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from scipy import stats
import pandas as pd


In [None]:
# Load all types of incorrect embeddings for comparative analysis
full_embeddings = np.load("../solutions/best/embeddings.npy")
churn_embeddings = np.load("incorrect_embeddings_churn.npy")
category_embeddings = np.load("incorrect_embeddings_propensity_category.npy")
sku_embeddings = np.load("incorrect_embeddings_propensity_sku.npy")

print("=== COMPARATIVE ANALYSIS OF INCORRECT PREDICTIONS ===")
print(f"Full embeddings shape: {full_embeddings.shape}")
print(f"Churn incorrect embeddings shape: {churn_embeddings.shape}")
print(f"Category incorrect embeddings shape: {category_embeddings.shape}")
print(f"SKU incorrect embeddings shape: {sku_embeddings.shape}")

prediction_types = {
    'churn': churn_embeddings,
    'category': category_embeddings, 
    'sku': sku_embeddings
}


In [None]:
# Define helper functions
indexes = {
    "add_to_cart": 0,
    "remove_from_cart": 256,
    "product_buys": 512,
    "search_query": 768,
    "page_visits": 1024,
}

def get_embedding(embeddings, action):
    return embeddings[:, indexes[action]:indexes[action] + 256]

def count_zero_vectors(arr):
    zero_rows = np.all(arr == 0, axis=1)
    return np.sum(zero_rows)

def analyze_prediction_type(prediction_type, incorrect_embeddings, correct_embeddings):
    """Analyze a specific prediction type across all actions"""
    print(f"\n{'='*60}")
    print(f"ANALYZING {prediction_type.upper()} PREDICTIONS")
    print(f"{'='*60}")
    
    results = []
    actions = ["add_to_cart", "remove_from_cart", "product_buys", "search_query", "page_visits"]
    
    for action in actions:
        correct_emb = get_embedding(correct_embeddings, action)
        incorrect_emb = get_embedding(incorrect_embeddings, action)
        
        # Basic statistics
        zero_correct = count_zero_vectors(correct_emb) / len(correct_emb)
        zero_incorrect = count_zero_vectors(incorrect_emb) / len(incorrect_emb)
        
        # Remove zero vectors for norm analysis
        non_zero_correct = correct_emb[~np.all(correct_emb == 0, axis=1)]
        if len(non_zero_correct) == 0:
            continue
            
        correct_norms = np.linalg.norm(non_zero_correct, axis=1)
        incorrect_norms = np.linalg.norm(incorrect_emb, axis=1)
        
        # Sample for similarity analysis
        n_samples = min(1000, len(non_zero_correct), len(incorrect_emb))
        np.random.seed(42)
        
        if n_samples > 0:
            correct_sample_idx = np.random.choice(len(non_zero_correct), n_samples, replace=False)\n            incorrect_sample_idx = np.random.choice(len(incorrect_emb), n_samples, replace=False)\n            \n            correct_sample = non_zero_correct[correct_sample_idx]\n            incorrect_sample = incorrect_emb[incorrect_sample_idx]\n            \n            # Cross-similarity\n            cross_sim = cosine_similarity(correct_sample[:100], incorrect_sample[:100])\n            \n            results.append({\n                'prediction_type': prediction_type,\n                'action': action,\n                'zero_prop_correct': zero_correct,\n                'zero_prop_incorrect': zero_incorrect,\n                'correct_norm_mean': np.mean(correct_norms),\n                'incorrect_norm_mean': np.mean(incorrect_norms),\n                'cross_sim_mean': np.mean(cross_sim),\n                'norm_difference': np.mean(incorrect_norms) - np.mean(correct_norms)\n            })\n    \n    return results\n\n# Analyze all prediction types\nall_results = []\nfor pred_type, incorrect_emb in prediction_types.items():\n    results = analyze_prediction_type(pred_type, incorrect_emb, full_embeddings)\n    all_results.extend(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create comprehensive comparison\n",
    "df = pd.DataFrame(all_results)\n",
    "\n",
    "print(f\"\\n{'='*80}\")\n",
    "print(\"COMPREHENSIVE COMPARISON ACROSS PREDICTION TYPES\")\n",
    "print(f\"{'='*80}\")\n",
    "\n",
    "# Create pivot tables for easy comparison\n",
    "pivot_zero_correct = df.pivot(index='action', columns='prediction_type', values='zero_prop_correct')\n",
    "pivot_zero_incorrect = df.pivot(index='action', columns='prediction_type', values='zero_prop_incorrect')\n",
    "pivot_norm_diff = df.pivot(index='action', columns='prediction_type', values='norm_difference')\n",
    "pivot_cross_sim = df.pivot(index='action', columns='prediction_type', values='cross_sim_mean')\n",
    "\n",
    "print(\"\\n=== ZERO VECTOR PROPORTIONS (CORRECT PREDICTIONS) ===\")\n",
    "print(pivot_zero_correct.round(4))\n",
    "\n",
    "print(\"\\n=== ZERO VECTOR PROPORTIONS (INCORRECT PREDICTIONS) ===\")\n",
    "print(pivot_zero_incorrect.round(4))\n",
    "\n",
    "print(\"\\n=== NORM DIFFERENCES (Incorrect - Correct) ===\")\n",
    "print(pivot_norm_diff.round(4))\n",
    "\n",
    "print(\"\\n=== CROSS-GROUP SIMILARITIES ===\")\n",
    "print(pivot_cross_sim.round(4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize comparisons across prediction types\n",
    "fig, axes = plt.subplots(2, 2, figsize=(15, 12))\n",
    "\n",
    "# Plot 1: Zero vector proportions for incorrect predictions\n",
    "pivot_zero_incorrect.plot(kind='bar', ax=axes[0,0], alpha=0.8)\n",
    "axes[0,0].set_title('Zero Vector Proportions (Incorrect Predictions)')\n",
    "axes[0,0].set_ylabel('Proportion')\n",
    "axes[0,0].legend(title='Prediction Type')\n",
    "axes[0,0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Plot 2: Norm differences\n",
    "pivot_norm_diff.plot(kind='bar', ax=axes[0,1], alpha=0.8)\n",
    "axes[0,1].set_title('Norm Differences (Incorrect - Correct)')\n",
    "axes[0,1].set_ylabel('Norm Difference')\n",
    "axes[0,1].legend(title='Prediction Type')\n",
    "axes[0,1].tick_params(axis='x', rotation=45)\n",
    "axes[0,1].axhline(y=0, color='red', linestyle='--', alpha=0.5)\n",
    "\n",
    "# Plot 3: Cross-group similarities\n",
    "pivot_cross_sim.plot(kind='bar', ax=axes[1,0], alpha=0.8)\n",
    "axes[1,0].set_title('Cross-Group Similarities')\n",
    "axes[1,0].set_ylabel('Cosine Similarity')\n",
    "axes[1,0].legend(title='Prediction Type')\n",
    "axes[1,0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Plot 4: Activity bias comparison\n",
    "activity_bias = pivot_zero_correct - pivot_zero_incorrect\n",
    "activity_bias.plot(kind='bar', ax=axes[1,1], alpha=0.8)\n",
    "axes[1,1].set_title('Activity Bias (Correct_zeros - Incorrect_zeros)')\n",
    "axes[1,1].set_ylabel('Proportion Difference')\n",
    "axes[1,1].legend(title='Prediction Type')\n",
    "axes[1,1].tick_params(axis='x', rotation=45)\n",
    "axes[1,1].axhline(y=0, color='red', linestyle='--', alpha=0.5)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cross-prediction type similarity analysis\n",
    "print(f\"\\n{'='*80}\")\n",
    "print(\"CROSS-PREDICTION TYPE SIMILARITY ANALYSIS\")\n",
    "print(f\"{'='*80}\")\n",
    "\n",
    "# Compare similarities between different prediction types\n",
    "for action in [\"product_buys\", \"add_to_cart\", \"page_visits\"]:\n",
    "    print(f\"\\n=== {action.upper()} ACTION ===\")\n",
    "    \n",
    "    # Get embeddings for this action across all prediction types\n",
    "    churn_emb = get_embedding(churn_embeddings, action)\n",
    "    category_emb = get_embedding(category_embeddings, action)\n",
    "    sku_emb = get_embedding(sku_embeddings, action)\n",
    "    \n",
    "    # Sample for analysis\n",
    "    n_samples = min(500, len(churn_emb), len(category_emb), len(sku_emb))\n",
    "    np.random.seed(42)\n",
    "    \n",
    "    indices = np.random.choice(len(churn_emb), n_samples, replace=False)\n",
    "    churn_sample = churn_emb[indices]\n",
    "    category_sample = category_emb[indices]\n",
    "    sku_sample = sku_emb[indices]\n",
    "    \n",
    "    # Calculate cross-similarities\n",
    "    churn_category_sim = cosine_similarity(churn_sample, category_sample)\n",
    "    churn_sku_sim = cosine_similarity(churn_sample, sku_sample)\n",
    "    category_sku_sim = cosine_similarity(category_sample, sku_sample)\n",
    "    \n",
    "    print(f\"Churn vs Category similarity: {np.mean(churn_category_sim):.4f} ± {np.std(churn_category_sim):.4f}\")\n",
    "    print(f\"Churn vs SKU similarity: {np.mean(churn_sku_sim):.4f} ± {np.std(churn_sku_sim):.4f}\")\n",
    "    print(f\"Category vs SKU similarity: {np.mean(category_sku_sim):.4f} ± {np.std(category_sku_sim):.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Final comparative insights and recommendations\n",
    "print(f\"\\n{'='*80}\")\n",
    "print(\"FINAL COMPARATIVE INSIGHTS AND RECOMMENDATIONS\")\n",
    "print(f\"{'='*80}\")\n",
    "\n",
    "print(\"\\n=== KEY DIFFERENCES BETWEEN PREDICTION TYPES ===\")\n",
    "\n",
    "# Analyze patterns across prediction types\n",
    "mean_metrics = df.groupby('prediction_type').agg({\n",
    "    'zero_prop_incorrect': 'mean',\n",
    "    'norm_difference': 'mean', \n",
    "    'cross_sim_mean': 'mean'\n",
    "}).round(4)\n",
    "\n",
    "print(\"\\nAVERAGE METRICS BY PREDICTION TYPE:\")\n",
    "print(mean_metrics)\n",
    "\n",
    "print(\"\\n=== RANKING BY DIFFICULTY ===\")\n",
    "# Create difficulty score based on multiple factors\n",
    "difficulty_scores = df.groupby('prediction_type').agg({\n",
    "    'cross_sim_mean': 'mean',  # Higher = worse separation\n",
    "    'norm_difference': lambda x: np.mean(np.abs(x))  # Higher absolute = more different\n",
    "})\n",
    "\n",
    "difficulty_scores['difficulty_score'] = (\n",
    "    difficulty_scores['cross_sim_mean'] * 2 +  # Weight cross-similarity higher\n",
    "    (1 - difficulty_scores['norm_difference'])  # Lower norm diff = more difficult\n",
    ")\n",
    "\n",
    "difficulty_ranking = difficulty_scores.sort_values('difficulty_score', ascending=False)\n",
    "print(\"\\nPREDICTION DIFFICULTY RANKING (1=hardest):\")\n",
    "for i, (pred_type, _) in enumerate(difficulty_ranking.iterrows(), 1):\n",
    "    print(f\"{i}. {pred_type.upper()} - Score: {difficulty_ranking.loc[pred_type, 'difficulty_score']:.3f}\")\n",
    "\n",
    "print(\"\\n=== SPECIFIC INSIGHTS BY PREDICTION TYPE ===\")\n",
    "\n",
    "print(\"\\n1. CHURN PREDICTIONS:\")\n",
    "churn_data = df[df['prediction_type'] == 'churn']\n",
    "print(f\"   - Average cross-similarity: {churn_data['cross_sim_mean'].mean():.3f}\")\n",
    "print(f\"   - Tends to predict for active users (fewer zero vectors)\")\n",
    "print(f\"   - Model challenge: Distinguishing active users who will churn vs stay\")\n",
    "\n",
    "print(\"\\n2. CATEGORY PREDICTIONS:\")\n",
    "category_data = df[df['prediction_type'] == 'category']\n",
    "print(f\"   - Average cross-similarity: {category_data['cross_sim_mean'].mean():.3f}\")\n",
    "print(f\"   - Challenge: User behavior doesn't strongly indicate category preferences\")\n",
    "print(f\"   - Model needs better category-specific features\")\n",
    "\n",
    "print(\"\\n3. SKU PREDICTIONS:\")\n",
    "sku_data = df[df['prediction_type'] == 'sku']\n",
    "print(f\"   - Average cross-similarity: {sku_data['cross_sim_mean'].mean():.3f}\")\n",
    "print(f\"   - Challenge: Extremely fine-grained predictions with high item diversity\")\n",
    "print(f\"   - Model needs hierarchical approach and product-specific features\")\n",
    "\n",
    "print(\"\\n=== UNIFIED RECOMMENDATIONS ===\")\n",
    "print(\"1. TASK-SPECIFIC ARCHITECTURES:\")\n",
    "print(\"   - Use different model architectures for different prediction granularities\")\n",
    "print(\"   - Churn: Focus on temporal patterns and engagement metrics\")\n",
    "print(\"   - Category: Add category hierarchy and cross-category patterns\")\n",
    "print(\"   - SKU: Implement hierarchical modeling with product embeddings\")\n",
    "\n",
    "print(\"\\n2. EMBEDDING IMPROVEMENTS:\")\n",
    "print(\"   - Current embeddings work better for coarse-grained tasks (churn)\")\n",
    "print(\"   - Need specialized embeddings for fine-grained tasks (SKU, category)\")\n",
    "print(\"   - Consider task-specific embedding spaces\")\n",
    "\n",
    "print(\"\\n3. EVALUATION STRATEGIES:\")\n",
    "print(\"   - Different metrics for different granularities\")\n",
    "print(\"   - Hierarchical evaluation for multi-level predictions\")\n",
    "print(\"   - Consider business impact over pure accuracy\")"
   ]
  }
 ],
 \"metadata\": {
  \"kernelspec\": {
   \"display_name\": \".venv\",
   \"language\": \"python\",
   \"name\": \"python3\"
  },
  \"language_info\": {
   \"codemirror_mode\": {
    \"name\": \"ipython\",
    \"version\": 3
   },
   \"file_extension\": \".py\",
   \"mimetype\": \"text/x-python\",
   \"name\": \"python\",
   \"nbconvert_exporter\": \"python\",
   \"pygments_lexer\": \"ipython3\",
   \"version\": \"3.12.3\"
  }
 },
 \"nbformat\": 4,
 \"nbformat_minor\": 2
}"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
