# üèÄ The Joshua Dworkin Deep Dive

### A FiveThirtyEight-style analysis of one player's basketball journey

---

Joshua Dworkin has been lacing up for Eltham since Under-12s. He's not the tallest kid on the court, and he's never been the loudest ‚Äî but the numbers tell a story of quiet, steady evolution. This notebook traces his journey from his first season through to Under-16s, asking the questions that matter: *Is he getting better? How does he compare? And where is he headed?*

Let's find out.

In [None]:
import sqlite3, re, pandas as pd, numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings; warnings.filterwarnings('ignore')

JOSH_ID = 'f1fa18fc-a93f-45b9-ac91-f70652744dd7'

SEASON_ORDER = {
    'Summer 2022/23': 0, 'Winter 2023': 1, 'Summer 2023/24': 2, 'Winter 2024': 3,
    'Summer 2024/25': 4, 'Winter 2025': 5, 'Summer 2025/26': 6,
}

TEMPLATE = 'plotly_white'
JOSH_COLOR = '#E45756'
PEER_COLOR = '#4C78A8'
ACCENT = '#F58518'

conn = sqlite3.connect('../data/playhq.db')

# Load all stats for players who played at least 1 game
all_stats = pd.read_sql("""
    SELECT p.id as pid, p.first_name || ' ' || p.last_name as name,
        s.name as season, g.name as grade, g.id as grade_pk,
        ps.games_played as gp, ps.total_points as pts,
        ps.one_point as ft, ps.two_point as fg2, ps.three_point as fg3,
        ps.total_fouls as fouls, ps.team_name as team, ps.ranking
    FROM player_stats ps
    JOIN players p ON p.id = ps.player_id
    JOIN grades g ON g.id = ps.grade_id
    JOIN seasons s ON s.id = g.season_id
    WHERE ps.games_played > 0
""", conn)
conn.close()

# Derived metrics
all_stats['ppg'] = all_stats['pts'] / all_stats['gp']
all_stats['fpg'] = all_stats['fouls'] / all_stats['gp']
all_stats['ft_pct'] = all_stats['ft'] / all_stats['pts'].replace(0, np.nan)
all_stats['fg2_pct'] = (all_stats['fg2'] * 2) / all_stats['pts'].replace(0, np.nan)
all_stats['fg3_pct'] = (all_stats['fg3'] * 3) / all_stats['pts'].replace(0, np.nan)
all_stats['season_ord'] = all_stats['season'].map(SEASON_ORDER)

# Filter to grading rounds (short stints) vs regular season
all_stats['is_grading'] = all_stats['grade'].str.contains('Grading', case=False)

# Extract age group
all_stats['age_group'] = all_stats['grade'].str.extract(r'(U\d+)', expand=False)

# Joshua's data
josh = all_stats[all_stats['pid'] == JOSH_ID].copy()
josh_regular = josh[~josh['is_grading']].copy()

print(f"Joshua has {len(josh)} total entries across {josh['season'].nunique()} seasons")
print(f"Regular season entries: {len(josh_regular)}")
print(f"Age groups: {sorted(josh['age_group'].unique())}")

---
## 1. The Career Timeline

Every player's career is a patchwork of seasons, age groups, and competition levels. Joshua's path through the EDJBA system tells us where he's been ‚Äî and hints at where he's going.

In [None]:
# Build career timeline
timeline = josh.sort_values('season_ord').copy()
timeline['label'] = timeline['season'] + '<br>' + timeline['grade'] + '<br>' + timeline['team']
timeline['phase'] = timeline['is_grading'].map({True: 'Grading', False: 'Regular Season'})

fig = go.Figure()

for phase, color in [('Regular Season', JOSH_COLOR), ('Grading', '#999999')]:
    mask = timeline['phase'] == phase
    d = timeline[mask]
    fig.add_trace(go.Bar(
        x=d['label'], y=d['ppg'], name=phase,
        marker_color=color, text=d['gp'].apply(lambda x: f"{x}g"),
        textposition='outside',
        hovertemplate='%{x}<br>PPG: %{y:.1f}<br>Games: %{text}<extra></extra>'
    ))

fig.update_layout(
    title='Joshua Dworkin ‚Äî Points Per Game Across Every Competition',
    yaxis_title='Points Per Game', template=TEMPLATE,
    height=500, showlegend=True, barmode='group',
    xaxis_tickangle=-45,
    font=dict(size=11),
)
fig.show()

The grading rounds (grey) are short tryout periods ‚Äî small sample sizes. The red bars are where the real story lives: regular season competition where Joshua had time to settle in and produce.

In [None]:
# Career summary table
summary = josh.sort_values('season_ord')[[
    'season', 'grade', 'team', 'gp', 'pts', 'ppg', 'ft', 'fg2', 'fg3', 'fouls', 'fpg', 'is_grading'
]].copy()
summary['ppg'] = summary['ppg'].round(1)
summary['fpg'] = summary['fpg'].round(1)
summary['phase'] = summary['is_grading'].map({True: 'üè∑Ô∏è Grading', False: 'üèÄ Season'})
summary = summary.drop(columns='is_grading')
summary

---
## 2. Scoring Evolution ‚Äî From Paint to Perimeter

How a player scores tells you more than *how much* they score. A kid who starts by driving to the basket and gradually adds a three-point shot is a kid whose game is expanding. Let's trace how Joshua's scoring recipe has changed.

In [None]:
# Scoring composition over time (regular season only)
scoring = josh_regular.sort_values('season_ord').copy()
scoring['ft_pts'] = scoring['ft']
scoring['fg2_pts'] = scoring['fg2'] * 2
scoring['fg3_pts'] = scoring['fg3'] * 3
scoring['label'] = scoring['season'].str.replace('Summer ', 'S').str.replace('Winter ', 'W') + '<br>' + scoring['age_group']

fig = go.Figure()
for col, name, color in [
    ('ft_pts', 'Free Throws (1pt)', '#76B7B2'),
    ('fg2_pts', '2-Pointers', PEER_COLOR),
    ('fg3_pts', '3-Pointers', JOSH_COLOR),
]:
    fig.add_trace(go.Bar(x=scoring['label'], y=scoring[col], name=name, marker_color=color))

fig.update_layout(
    barmode='stack', template=TEMPLATE,
    title='How Joshua Scores ‚Äî Point Composition by Season',
    yaxis_title='Total Points', height=450,
    font=dict(size=11),
)
fig.show()

In [None]:
# Scoring style as percentages
scoring['total_check'] = scoring['ft_pts'] + scoring['fg2_pts'] + scoring['fg3_pts']
for col in ['ft_pts', 'fg2_pts', 'fg3_pts']:
    scoring[col + '_pct'] = (scoring[col] / scoring['total_check'].replace(0, np.nan) * 100).round(1)

fig = go.Figure()
for col, name, color in [
    ('ft_pts_pct', 'Free Throws', '#76B7B2'),
    ('fg2_pts_pct', '2-Pointers', PEER_COLOR),
    ('fg3_pts_pct', '3-Pointers', JOSH_COLOR),
]:
    fig.add_trace(go.Bar(x=scoring['label'], y=scoring[col], name=name, marker_color=color,
                        text=scoring[col].apply(lambda x: f"{x:.0f}%" if pd.notna(x) else ''),
                        textposition='inside'))

fig.update_layout(
    barmode='stack', template=TEMPLATE,
    title='Scoring Mix (%) ‚Äî Joshua\'s Evolving Style',
    yaxis_title='% of Total Points', height=450,
    yaxis_range=[0, 105],
    font=dict(size=11),
)
fig.show()

**Key insight:** Watch the three-point percentage. If it's growing, Joshua's range is expanding ‚Äî a critical development marker for players transitioning from junior to senior basketball.

---
## 3. Among His Peers ‚Äî Percentile Rankings

Raw numbers are meaningless without context. Scoring 7 points per game in U12 BA might be dominant; scoring 7 in U16 BA might be average. We need to compare Joshua to *everyone else in the same competition*.

In [None]:
# Compute percentile for Joshua in each grade
josh_grades = josh_regular['grade_pk'].unique()
peers_in_grades = all_stats[(all_stats['grade_pk'].isin(josh_grades)) & (~all_stats['is_grading'])].copy()

percentiles = []
for _, row in josh_regular.iterrows():
    grade_peers = peers_in_grades[peers_in_grades['grade_pk'] == row['grade_pk']]
    n_peers = len(grade_peers)
    ppg_pctl = (grade_peers['ppg'] < row['ppg']).sum() / n_peers * 100 if n_peers > 0 else np.nan
    pts_pctl = (grade_peers['pts'] < row['pts']).sum() / n_peers * 100 if n_peers > 0 else np.nan
    percentiles.append({
        'season': row['season'], 'grade': row['grade'], 'age_group': row['age_group'],
        'season_ord': row['season_ord'],
        'ppg': row['ppg'], 'ppg_pctl': ppg_pctl, 'pts_pctl': pts_pctl,
        'n_peers': n_peers, 'gp': row['gp'],
    })

pctl_df = pd.DataFrame(percentiles).sort_values('season_ord')
pctl_df['label'] = pctl_df['season'].str.replace('Summer ', 'S').str.replace('Winter ', 'W') + '<br>' + pctl_df['grade'].str.extract(r'(U\d+ \w+)')[0]

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=pctl_df['label'], y=pctl_df['ppg_pctl'],
    mode='lines+markers+text', marker=dict(size=12, color=JOSH_COLOR),
    line=dict(width=3, color=JOSH_COLOR),
    text=pctl_df['ppg_pctl'].apply(lambda x: f"{x:.0f}th"),
    textposition='top center',
    hovertemplate='%{x}<br>Percentile: %{y:.0f}th<br>PPG: %{customdata[0]:.1f}<br>Peers: %{customdata[1]}<extra></extra>',
    customdata=pctl_df[['ppg', 'n_peers']].values,
))

fig.add_hline(y=50, line_dash='dash', line_color='grey', annotation_text='Median')
fig.add_hline(y=75, line_dash='dot', line_color='green', annotation_text='75th')
fig.add_hline(y=25, line_dash='dot', line_color='orange', annotation_text='25th')

fig.update_layout(
    title='Joshua\'s PPG Percentile ‚Äî Where He Stands Among His Peers',
    yaxis_title='Percentile', yaxis_range=[0, 105],
    template=TEMPLATE, height=500,
    font=dict(size=11),
)
fig.show()

pctl_df[['season', 'grade', 'gp', 'ppg', 'ppg_pctl', 'n_peers']].round(1)

**Reading the chart:** Above the dashed line means Joshua is outscoring more than half his peers. The higher the dot, the more dominant his scoring in that competition. A rising trajectory means he's improving *relative to his peers*, not just in absolute terms.

---
## 4. Performance Projection

Using Joshua's trajectory so far, can we project where he's headed? We'll fit a simple trend to his regular-season PPG and project forward. (Caveat: projections of junior athletes are speculative ‚Äî but the trend direction is meaningful.)

In [None]:
# Trend projection on regular season PPG
proj = josh_regular.sort_values('season_ord').copy()
proj = proj.dropna(subset=['season_ord'])

if len(proj) >= 2:
    # Weighted by games played
    z = np.polyfit(proj['season_ord'], proj['ppg'], 1, w=proj['gp'])
    trend_fn = np.poly1d(z)
    
    # Project 2 more seasons
    max_ord = proj['season_ord'].max()
    future_ords = [max_ord + 1, max_ord + 2]
    future_labels = ['Winter 2026\n(proj)', 'Summer 2026/27\n(proj)']
    
    all_ords = list(proj['season_ord']) + future_ords
    all_labels = list(proj['season'].str.replace('Summer ', 'S').str.replace('Winter ', 'W')) + future_labels
    trend_vals = [trend_fn(x) for x in all_ords]
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=all_labels[:len(proj)], y=proj['ppg'].values,
        mode='markers+lines', name='Actual PPG',
        marker=dict(size=10, color=JOSH_COLOR), line=dict(color=JOSH_COLOR, width=2),
    ))
    fig.add_trace(go.Scatter(
        x=all_labels, y=trend_vals,
        mode='lines', name='Trend + Projection',
        line=dict(color=ACCENT, width=2, dash='dash'),
    ))
    fig.add_trace(go.Scatter(
        x=future_labels, y=[trend_fn(x) for x in future_ords],
        mode='markers', name='Projected',
        marker=dict(size=12, color=ACCENT, symbol='diamond'),
    ))
    
    fig.update_layout(
        title=f'PPG Projection ‚Äî Trend: {z[0]:+.2f} PPG per season',
        yaxis_title='Points Per Game', template=TEMPLATE, height=450,
        font=dict(size=11),
    )
    fig.show()
    
    print(f"\nTrend slope: {z[0]:+.2f} PPG per season")
    print(f"Projected PPG next season: {trend_fn(max_ord + 1):.1f}")
    print(f"Projected PPG season after: {trend_fn(max_ord + 2):.1f}")
else:
    print('Not enough data for projection')

**Important context:** As players move up age groups, the competition gets tougher. A *flat* PPG trend while moving from U14 to U16 is actually a sign of real growth ‚Äî you're holding your own against older, bigger, more experienced players.

---
## 5. Strengths & Weaknesses

Let's build a player profile by comparing Joshua's key metrics against the median of his most recent competition.

In [None]:
# Radar chart comparing Joshua to grade median (most recent regular season)
latest = josh_regular.sort_values('season_ord').iloc[-1]
latest_peers = peers_in_grades[peers_in_grades['grade_pk'] == latest['grade_pk']].copy()

metrics = {
    'PPG': ('ppg', True),
    'Total Points': ('pts', True),
    'Games Played': ('gp', True),
    'Free Throws': ('ft', True),
    '2-Pointers': ('fg2', True),
    '3-Pointers': ('fg3', True),
    'Fouls/Game': ('fpg', False),  # Lower is better
}

categories = list(metrics.keys())
josh_vals = []
median_vals = []

for cat, (col, higher_better) in metrics.items():
    jv = latest[col] if col in latest.index else 0
    mv = latest_peers[col].median() if col in latest_peers.columns else 0
    # Normalize to percentile for radar
    pctl = (latest_peers[col] < jv).sum() / len(latest_peers) * 100 if len(latest_peers) > 0 else 50
    if not higher_better:
        pctl = 100 - pctl  # Invert so higher = better for fouls
    josh_vals.append(pctl)
    median_vals.append(50)  # Median is always 50th percentile

fig = go.Figure()
fig.add_trace(go.Scatterpolar(
    r=josh_vals + [josh_vals[0]], theta=categories + [categories[0]],
    fill='toself', name='Joshua', fillcolor='rgba(228, 87, 86, 0.3)',
    line=dict(color=JOSH_COLOR, width=2),
))
fig.add_trace(go.Scatterpolar(
    r=median_vals + [median_vals[0]], theta=categories + [categories[0]],
    name='Grade Median', line=dict(color='grey', dash='dash'),
))

fig.update_layout(
    polar=dict(radialaxis=dict(visible=True, range=[0, 100])),
    title=f'Player Profile ‚Äî {latest["grade"]} ({latest["season"]})',
    template=TEMPLATE, height=500,
    font=dict(size=11),
)
fig.show()

# Narrative assessment
print("\nüìä STRENGTHS & WEAKNESSES ASSESSMENT")
print("=" * 45)
for cat, pctl in zip(categories, josh_vals):
    if pctl >= 75:
        emoji = 'üí™'
        label = 'STRENGTH'
    elif pctl >= 50:
        emoji = '‚úÖ'
        label = 'Above Average'
    elif pctl >= 25:
        emoji = '‚ö†Ô∏è'
        label = 'Below Average'
    else:
        emoji = 'üîß'
        label = 'AREA TO DEVELOP'
    print(f"{emoji} {cat}: {pctl:.0f}th percentile ‚Äî {label}")

---
## 6. Finding Joshua's "Comps" ‚Äî Similar Player Trajectories

Who are the players that *looked like Joshua* at the same age? If we find players who had similar stat lines in earlier seasons, we can see where *they* ended up ‚Äî a crude but fascinating way to project development.

In [None]:
# Find players with similar PPG in the same age groups Joshua played in
# Strategy: find players who played in the same grade types and had similar PPG

# Get Joshua's regular season profile by age group
josh_profile = josh_regular.groupby('age_group').agg(
    ppg_mean=('ppg', 'mean'),
    total_gp=('gp', 'sum'),
    total_pts=('pts', 'sum'),
).reset_index()

# Get all players' profiles by age group (regular season only)
reg = all_stats[~all_stats['is_grading']].copy()
player_profiles = reg.groupby(['pid', 'name', 'age_group']).agg(
    ppg_mean=('ppg', 'mean'),
    total_gp=('gp', 'sum'),
    total_pts=('pts', 'sum'),
    seasons=('season', 'nunique'),
).reset_index()

# Find players who played in multiple of Joshua's age groups
josh_ages = set(josh_profile['age_group'])
multi_age = player_profiles[player_profiles['age_group'].isin(josh_ages)].copy()
player_age_counts = multi_age.groupby('pid')['age_group'].nunique()
multi_players = player_age_counts[player_age_counts >= 2].index  # at least 2 overlapping age groups

# Score similarity
similarities = []
for pid in multi_players:
    if pid == JOSH_ID:
        continue
    p_data = player_profiles[player_profiles['pid'] == pid]
    overlap_ages = set(p_data['age_group']) & josh_ages
    if len(overlap_ages) < 2:
        continue
    
    diffs = []
    for age in overlap_ages:
        j_ppg = josh_profile[josh_profile['age_group'] == age]['ppg_mean'].values
        p_ppg = p_data[p_data['age_group'] == age]['ppg_mean'].values
        if len(j_ppg) > 0 and len(p_ppg) > 0:
            diffs.append(abs(j_ppg[0] - p_ppg[0]))
    
    if diffs:
        name = p_data['name'].iloc[0]
        similarities.append({
            'pid': pid, 'name': name,
            'avg_ppg_diff': np.mean(diffs),
            'overlap_ages': len(overlap_ages),
            'total_gp': p_data['total_gp'].sum(),
        })

sim_df = pd.DataFrame(similarities)
if len(sim_df) > 0:
    # Weight by overlap and closeness
    sim_df = sim_df.sort_values('avg_ppg_diff')
    top_comps = sim_df.head(10)
    
    print("üîç TOP 10 PLAYER COMPARISONS (by PPG similarity across shared age groups)")
    print("=" * 70)
    for _, row in top_comps.iterrows():
        print(f"  {row['name']:25s} | PPG diff: {row['avg_ppg_diff']:.2f} | Ages: {row['overlap_ages']} | GP: {row['total_gp']}")
else:
    print('Not enough comparable players found')

In [None]:
# Visualize Joshua vs top 3 comps trajectory
if len(sim_df) >= 3:
    top3 = sim_df.head(3)['pid'].tolist()
    comp_data = reg[reg['pid'].isin(top3 + [JOSH_ID])].copy()
    comp_agg = comp_data.groupby(['pid', 'name', 'age_group']).agg(
        ppg=('ppg', 'mean'), gp=('gp', 'sum')
    ).reset_index()
    
    age_order = {'U10': 0, 'U11': 1, 'U12': 2, 'U13': 3, 'U14': 4, 'U15': 5, 'U16': 6, 'U17': 7, 'U18': 8, 'U19': 9}
    comp_agg['age_ord'] = comp_agg['age_group'].map(age_order)
    comp_agg = comp_agg.sort_values('age_ord')
    
    fig = go.Figure()
    colors = [JOSH_COLOR, PEER_COLOR, ACCENT, '#76B7B2']
    widths = [4, 2, 2, 2]
    
    for i, pid in enumerate([JOSH_ID] + top3):
        d = comp_agg[comp_agg['pid'] == pid]
        name = 'Joshua Dworkin' if pid == JOSH_ID else d['name'].iloc[0]
        fig.add_trace(go.Scatter(
            x=d['age_group'], y=d['ppg'],
            mode='lines+markers', name=name,
            line=dict(width=widths[i], color=colors[i]),
            marker=dict(size=8 if i > 0 else 12),
        ))
    
    fig.update_layout(
        title='Trajectory Comparison ‚Äî Joshua vs. Most Similar Players',
        xaxis_title='Age Group', yaxis_title='PPG (avg across seasons)',
        template=TEMPLATE, height=450,
        font=dict(size=11),
    )
    fig.show()

---
## 7. The Foul Story

Fouls are the hidden tax on aggression. Too many, and you're a liability. Too few, and you might not be playing hard enough. Let's see where Joshua sits.

In [None]:
# Fouls per game over time
fouls_data = josh_regular.sort_values('season_ord').copy()
fouls_data['label'] = fouls_data['season'].str.replace('Summer ', 'S').str.replace('Winter ', 'W') + '<br>' + fouls_data['age_group']

fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Bar(
    x=fouls_data['label'], y=fouls_data['fpg'],
    name='Fouls/Game', marker_color=ACCENT, opacity=0.7,
), secondary_y=False)

fig.add_trace(go.Scatter(
    x=fouls_data['label'], y=fouls_data['ppg'],
    name='PPG', mode='lines+markers',
    line=dict(color=JOSH_COLOR, width=3), marker=dict(size=8),
), secondary_y=True)

fig.update_layout(
    title='The Scoring-Fouling Balance',
    template=TEMPLATE, height=450,
    font=dict(size=11),
)
fig.update_yaxes(title_text='Fouls Per Game', secondary_y=False)
fig.update_yaxes(title_text='Points Per Game', secondary_y=True)
fig.show()

---
## 8. The Bottom Line

### What the numbers say about Joshua Dworkin

In [None]:
# Generate narrative summary
total_games = josh['gp'].sum()
total_points = josh['pts'].sum()
seasons_played = josh['season'].nunique()
age_groups = sorted(josh['age_group'].unique())

# Career PPG trend
reg_sorted = josh_regular.sort_values('season_ord')
first_ppg = reg_sorted['ppg'].iloc[0] if len(reg_sorted) > 0 else 0
last_ppg = reg_sorted['ppg'].iloc[-1] if len(reg_sorted) > 0 else 0

# Latest percentile
latest_pctl = pctl_df['ppg_pctl'].iloc[-1] if len(pctl_df) > 0 else 50

# 3-point evolution
early_3pct = scoring['fg3_pts_pct'].iloc[0] if len(scoring) > 0 else 0
late_3pct = scoring['fg3_pts_pct'].iloc[-1] if len(scoring) > 0 else 0

print("üìã JOSHUA DWORKIN ‚Äî CAREER SUMMARY")
print("=" * 50)
print(f"")
print(f"  üèÄ {total_games} games across {seasons_played} seasons")
print(f"  üìä {total_points} career points")
print(f"  üìà Age groups: {' ‚Üí '.join(age_groups)}")
print(f"")
print(f"  First regular-season PPG: {first_ppg:.1f}")
print(f"  Most recent PPG: {last_ppg:.1f}")
print(f"  Change: {last_ppg - first_ppg:+.1f}")
print(f"")
print(f"  Current peer percentile: {latest_pctl:.0f}th")
print(f"")

if last_ppg > first_ppg:
    print("  üìà TRAJECTORY: RISING")
    print("  Joshua is scoring more efficiently as he ages up.")
elif last_ppg >= first_ppg * 0.8:
    print("  ‚û°Ô∏è TRAJECTORY: STEADY")
    print("  Holding his own against tougher competition ‚Äî a quiet win.")
else:
    print("  üìâ TRAJECTORY: ADJUSTING")
    print("  Tougher competition is a factor, but the tools are there.")

print(f"")
print("  üéØ KEY DEVELOPMENT AREAS:")
if late_3pct and late_3pct > 0:
    print(f"     ‚Ä¢ 3-point shooting now accounts for {late_3pct:.0f}% of scoring")
print(f"     ‚Ä¢ Foul management: {fouls_data['fpg'].iloc[-1]:.1f} fouls/game (latest)")
print(f"     ‚Ä¢ Consistency: played {reg_sorted['gp'].iloc[-1]} games in latest season")

---

*This analysis was built with data from PlayHQ via the EDJBA competition system. All percentiles and comparisons are against players in the same grade and season. Projections are linear extrapolations and should be taken as directional indicators, not predictions.*

*Built with ‚ù§Ô∏è for the Dworkin family.*