# Trader Selection Framework - Example Analysis

This notebook demonstrates the complete workflow for analyzing crypto wallet addresses and identifying high-potential traders.

## Setup and Imports

In [None]:
import sys
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from trader_analysis import (
    FeatureEngineer,
    TraderSegmentation,
    HighPotentialPredictor,
    PersonaAssigner,
    ModelEvaluator
)
from trader_analysis.visualization import Visualizer

%matplotlib inline
sns.set_style('whitegrid')

## 2. Load Data

In [None]:
# Load transaction data
df = pd.read_csv('../data/traders_202510140811.csv')

print(f"Loaded {len(df)} transactions")
print(f"Unique wallets: {df['address'].nunique()}")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")

df.head()

## 3. Feature Engineering

In [None]:
engineer = FeatureEngineer(recency_decay=0.1)
features = engineer.engineer_features(df)

print(f"\nEngineered {len(features)} wallet feature vectors")
print(f"Number of features: {len(engineer.get_feature_names())}")

features.head()

## 4. Clustering Analysis

In [None]:
# Prepare features for clustering
feature_cols = [col for col in features.columns if col != 'address']
X = features[feature_cols].values

# Perform clustering
clusterer = TraderSegmentation(random_state=42)
clusterer.fit_kmeans(X, optimize_k=True)

# Evaluate
metrics = clusterer.evaluate_clustering(X)

# Add cluster labels
features['cluster'] = clusterer.labels_

In [None]:
# Visualize clusters
visualizer = Visualizer()
features_2d = clusterer.reduce_dimensions_for_visualization(X)
visualizer.plot_cluster_scatter(features_2d, clusterer.labels_)

## 5. Persona Assignment

In [None]:
persona_assigner = PersonaAssigner()
features = persona_assigner.assign_personas(features)

# Show persona statistics
persona_stats = persona_assigner.get_persona_statistics(features)
persona_stats

In [None]:
# Visualize persona distribution
visualizer.plot_persona_distribution(features)

In [None]:
# Performance by persona
visualizer.plot_performance_by_persona(features, metric='total_pnl')

## 6. Predictive Modeling

In [None]:
predictor = HighPotentialPredictor(random_state=42, use_smote=True)

# Create target labels
target = predictor.create_target_labels(features, top_percentile=0.2)

# Prepare data
X_train, X_test, y_train, y_test = predictor.prepare_data(features, target)

# Train ensemble
predictor.train_ensemble(X_train, y_train)

# Evaluate
metrics = predictor.evaluate(X_test, y_test)

In [None]:
# Feature importance
importance_df = predictor.get_feature_importance()
visualizer.plot_feature_importance(importance_df, top_n=15)

## 7. Identify High-Potential Traders

In [None]:
# Predict on all data
X_all_scaled = predictor.scaler.transform(features[feature_cols].values)
features['high_potential_score'] = predictor.predict_proba_ensemble(X_all_scaled)[:, 1]
features['high_potential'] = predictor.predict(X_all_scaled)

# Select high-potential traders
high_potential = features[features['high_potential'] == 1].sort_values(
    'high_potential_score', ascending=False
)

print(f"Identified {len(high_potential)} high-potential traders")
high_potential[['address', 'persona', 'high_potential_score', 'weighted_pnl', 'roi', 'win_rate']].head(20)

## 8. Statistical Validation

In [None]:
evaluator = ModelEvaluator(confidence_level=0.95)

# Compare personas
persona_comparison = evaluator.compare_personas(features, metric='total_pnl')
persona_comparison.head(10)

In [None]:
# Confidence intervals
confidence_intervals = evaluator.calculate_confidence_intervals(features, metric='total_pnl')
confidence_intervals

## 9. Summary Dashboard

In [None]:
visualizer.create_summary_dashboard(features)

## 10. Export Results

In [None]:
# Save results
features.to_csv('../outputs/complete_analysis_results.csv', index=False)
high_potential.to_csv('../outputs/high_potential_traders.csv', index=False)

print("Results saved to ../outputs/")