# Country efficiency — earnings vs players (Battle Royale)

This notebook recreates the **log–log scatterplot**:

- Each point is a country in the genre **"Battle Royale"**.
- **x‑axis** – number of players (log scale)
- **y‑axis** – total prize earnings (log scale)
- A dashed line shows a simple log–log regression (trend line).
- The point for **China** is highlighted.

The notebook expects the CSV files `esports_games.csv` and `country_esports.csv` to be in the same folder as this notebook.

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px

pd.options.display.max_rows = 10


In [2]:
# Load the two CSV files
games_df = pd.read_csv('esports_games.csv')
countries_df = pd.read_csv('country_esports.csv')

games_df.head(), countries_df.head()

(                    Game  ReleaseDate     Genre  TotalEarnings  \
 0         Age of Empires         1997  Strategy      736284.75   
 1      Age of Empires II         1999  Strategy     3898508.73   
 2     Age of Empires III         2005  Strategy      122256.72   
 3      Age of Empires IV         2021  Strategy     1190813.44   
 4  Age of Empires Online         2011  Strategy       11462.98   
 
    OfflineEarnings  PercentOffline  TotalPlayers  TotalTournaments  
 0        522378.17        0.709478           624               341  
 1       1361409.22        0.349213          2256              1939  
 2         44472.60        0.363764           172               179  
 3        439117.93        0.368755           643               423  
 4           775.00        0.067609            52                68  ,
                     country  total_earnings  player_count               game  \
 0                     China    3.309117e+08          9333             Dota 2   
 1  United St

In [3]:
# Map each game to its genre based on esports_games.csv
game_to_genre = games_df.set_index('Game')['Genre'].to_dict()
countries_df['Genre'] = countries_df['game'].map(game_to_genre)

# Filter only Battle Royale rows
genre = 'Battle Royale'
battle = countries_df[countries_df['Genre'] == genre].copy()
print(f'{len(battle)} country–game rows for', genre)

# Aggregate per country: total earnings and players in this genre
agg = (
    battle.groupby('country', as_index=False)
    .agg(
        earnings=('game_earnings', 'sum'),
        players=('player_count', 'sum'),
    )
)

# Remove countries with zero or missing data
agg = agg.replace([np.inf, -np.inf], np.nan).dropna(subset=['earnings', 'players'])
agg = agg[(agg['earnings'] > 0) & (agg['players'] > 0)].copy()

print('Countries in scatter:', len(agg))
agg.sort_values('earnings', ascending=False).head()

57 country–game rows for Battle Royale
Countries in scatter: 57


Unnamed: 0,country,earnings,players
55,United States of America,52369833.18,29198
11,Canada,9174904.96,4361
54,United Kingdom,8783157.13,5672
30,Japan,5800971.79,3982
26,Indonesia,5106526.21,2650


In [4]:
# --- Log–log regression: log(players) -> log(earnings) ---
x = np.log(agg['players'].values)
y = np.log(agg['earnings'].values)

if len(agg) >= 3:
    m, b = np.polyfit(x, y, 1)  # slope & intercept
    print('Regression line (log–log): y =', round(m, 3), '* x +', round(b, 3))

    # Build smooth line across observed player range
    x_line = np.linspace(x.min(), x.max(), 200)
    y_line = m * x_line + b
    players_line = np.exp(x_line)
    earnings_line = np.exp(y_line)
else:
    players_line = earnings_line = None

Regression line (log–log): y = 1.093 * x + 6.915


In [5]:
# Helper for shorter money labels in hover
def format_money(val):
    if val >= 1_000_000_000:
        return f"${val/1_000_000_000:.1f}B"
    if val >= 1_000_000:
        return f"${val/1_000_000:.1f}M"
    if val >= 1_000:
        return f"${val/1_000:.1f}K"
    return f"${val:.0f}"

# Make a copy so we can add hover text columns
plot_df = agg.copy()
plot_df['earnings_label'] = plot_df['earnings'].apply(format_money)
plot_df['players_label'] = plot_df['players'].map(lambda v: f"{v:,.0f}")

# Highlight China by a different marker size/color
plot_df['is_china'] = plot_df['country'].eq('China')

fig = go.Figure()

# All non‑China countries
non_china = plot_df[~plot_df['is_china']]
fig.add_trace(
    go.Scatter(
        x=non_china['players'],
        y=non_china['earnings'],
        mode='markers',
        name='Countries',
        marker=dict(size=7, opacity=0.85),
        hovertemplate=(
            '<b>%{customdata[0]}</b><br>'
            'Players: %{customdata[1]}<br>'
            'Total earnings: %{customdata[2]}<extra></extra>'
        ),
        customdata=np.stack(
            [
                non_china['country'],
                non_china['players_label'],
                non_china['earnings_label'],
            ],
            axis=-1,
        ),
    )
)

# China highlighted
china = plot_df[plot_df['is_china']]
if not china.empty:
    fig.add_trace(
        go.Scatter(
            x=china['players'],
            y=china['earnings'],
            mode='markers',
            name='China',
            marker=dict(size=11, symbol='circle', line=dict(width=2)),
            hovertemplate=(
                '<b>%{customdata[0]}</b><br>'
                'Players: %{customdata[1]}<br>'
                'Total earnings: %{customdata[2]}<extra></extra>'
            ),
            customdata=np.stack(
                [
                    china['country'],
                    china['players_label'],
                    china['earnings_label'],
                ],
                axis=-1,
            ),
        )
    )

# Regression line
if players_line is not None:
    fig.add_trace(
        go.Scatter(
            x=players_line,
            y=earnings_line,
            mode='lines',
            name='Trend line',
            line=dict(dash='dash'),
            hoverinfo='skip',
        )
    )

# Layout: log scales + styling
fig.update_layout(
    title=f'Country efficiency — earnings vs players ({genre})',
    xaxis=dict(
        type='log',
        title='Number of players (log scale)',
        tickformat=',',
    ),
    yaxis=dict(
        type='log',
        title='Total earnings (log scale)',
        tickprefix='$',
    ),
    template='plotly_dark',
    legend=dict(x=0.02, y=0.98),
    height=500,
)

fig.show()