In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load Edges (The Network)
print("Loading Network Edges...")
edges = pd.read_csv('../data/raw/elliptic_txs_edgelist.csv')

# 2. Build the Graph
print("Building Graph (this takes a moment)...")
G = nx.from_pandas_edgelist(edges, source='txId1', target='txId2', create_using=nx.DiGraph())

# 3. Calculate Degrees (Your Custom Features)
in_degree = dict(G.in_degree())
out_degree = dict(G.out_degree())

# Convert to DataFrame for plotting
degree_df = pd.DataFrame({
    'txId': list(in_degree.keys()),
    'in_degree': list(in_degree.values()),
    'out_degree': list(out_degree.values())
})

# 4. Load Classes to compare
classes = pd.read_csv('../data/raw/elliptic_txs_classes.csv')
classes['class'] = classes['class'].map({'1': 'Illicit', '2': 'Licit'})
# Merge
merged = pd.merge(degree_df, classes, on='txId').dropna()

# 5. The Proof: Visualize the Difference
# We limit to 50 for visibility because some whales have huge degrees
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
sns.boxplot(x='class', y='in_degree', data=merged[merged['in_degree'] < 50])
plt.title('In-Degree: Fraud vs Licit')

plt.subplot(1, 2, 2)
sns.boxplot(x='class', y='out_degree', data=merged[merged['out_degree'] < 50])
plt.title('Out-Degree: Fraud vs Licit')
plt.show()

print("Observation: Fraudsters often have different connection patterns (outliers) compared to normal users.")