# Beyond The Crosshair

In [None]:
import pandas as pd
import numpy as np
import altair as alt

alt.data_transformers.enable("default", max_rows=None)
alt.renderers.enable("mimetype")

RendererRegistry.enable('mimetype')

## Clean Data Loading

In [3]:
cs16_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_1.6_Clean.csv", parse_dates=["month"])
css_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Source_Clean.csv", parse_dates=["month"])
cscz_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Condition_Zero_Clean.csv", parse_dates=["month"])
csgo_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Global_Offensive_Clean.csv", parse_dates=["month"])
cs2_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_2_Clean.csv", parse_dates=["month"])
cs_merged_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Clean.csv", parse_dates=["month"])

v_data = pd.read_csv("../assets/clean/GG_Valorant_Clean.csv", parse_dates=["month"])
rss_data = pd.read_csv("../assets/clean/GG_Rainbow_Six_Siege_Clean.csv", parse_dates=["month"])
cod_data = pd.read_csv("../assets/clean/SteamDB_Call_of_Duty_Clean.csv", parse_dates=["month"])
bf_data = pd.read_csv("../assets/clean/SteamDB_Battlefield_Clean.csv", parse_dates=["month"])

igdb_data = pd.read_csv("../assets/clean/IGDB_Clean.csv")
twitch_cs_data = pd.read_csv("../assets/clean/Twitch_Counter-Strike_Clean.csv", parse_dates=["month"])
youtube_cs_data = pd.read_csv("../assets/clean/Youtube_Counter-Strike_Clean.csv", parse_dates=["month"])

In [None]:
merged_cs_youtube = pd.merge(cs_merged_data[['month','peak']], youtube_cs_data[['month','viewCount']], on="month", how="inner")
merged_cs_youtube = merged_cs_youtube.sort_values(by="viewCount", ascending=False)

merged_cs_twitch = pd.merge(cs_merged_data[['month','peak']], twitch_cs_data[['month','peak viewers']], on="month", how="inner")
merged_cs_twitch = merged_cs_twitch.sort_values(by="peak viewers", ascending=False)

## Visualizations

### Counter-Strike Playerbase by Version Timeseries

In [5]:
series = {
    # "All Counter-Strike (aggregate)": cs_merged_grouped,
    "Counter-Strike (1.6)": cs16_data,
    "Counter-Strike: Source": css_data,
    "Counter-Strike: Condition Zero": cscz_data,
    "Counter-Strike: Global Offensive": csgo_data,
    "Counter-Strike 2": cs2_data,
}

colors = {
    # "All Counter-Strike (aggregate)":  "#0057B8",  # deep royal blue (highlighted)
    "Counter-Strike (1.6)":            "#FDD397",  # green
    "Counter-Strike: Condition Zero":  "#21283B",  # medium gray (subtle)
    "Counter-Strike: Source":          "#5952C6",  # sky blue
    "Counter-Strike: Global Offensive":"#EF3800",  # vermillion (strong contrast vs blue)
    "Counter-Strike 2":                "#E58716",  # magenta
}

order = [
#     "All Counter-Strike (aggregate)",
    "Counter-Strike (1.6)",
    "Counter-Strike: Condition Zero",
    "Counter-Strike: Source",
    "Counter-Strike: Global Offensive",
    "Counter-Strike 2",
]

# Combine all series into one tidy DataFrame
frames = []
for name, df in series.items():
    df["game"] = name
    frames.append(df[["month", "peak", "game"]])

combined = pd.concat(frames, ignore_index=True)

# Build Altair chart
chart = (
    alt.Chart(combined)
    .mark_line()
    .encode(
        x=alt.X("month:T", title=None),
        y=alt.Y("peak:Q", title=None),
        color=alt.Color("game:N", title="Game/Version", scale=alt.Scale(domain=order, range=[colors[k] for k in order])),
        tooltip=["game", "month", "peak"],
    )
    .properties(width=600, height=300)
    .configure(background="white")
    .configure_axis(
        labelColor="#3b415c"  # change tick label color
    )
    .configure_legend(
        labelColor="#3b415c",  # legend text color
        titleColor="#3b415c"   # legend title color
    )
)

chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


### Event Annotated Time Series

In [None]:
cs_early_period = cs_merged_data[cs_merged_data['month'] <= pd.to_datetime("2011-11-01")].copy()
cs_go_period = cs_merged_data[cs_merged_data['month'] >= pd.to_datetime("2011-11-01")].copy()
cs_go_period = cs_go_period[cs_go_period['month'] <= pd.to_datetime("2023-10-01")].copy()
cs_2_period = cs_merged_data[cs_merged_data['month'] >= pd.to_datetime("2023-10-01")].copy()    

# Label periods
early = cs_early_period.assign(period="Pre-CS:GO (≤ Oct 2011)")
go    = cs_go_period.assign(period="CS:GO Era (Nov 2011–Sep 26, 2023)")
cs2   = cs_2_period.assign(period="CS2 Era (≥ Sep 27, 2023)")

combined_periods = pd.concat([early, go, cs2], ignore_index=True)

# Plot three connected lines (one per period)
chart = (
    alt.Chart(combined_periods, background="white")
    .mark_line(strokeWidth=5)
    .encode(
        x=alt.X('month:T', title=None, axis=alt.Axis(grid=False, domain=True)),
        y=alt.Y('peak:Q', axis=alt.Axis(title="Peak CCU", titleColor="#3b415c", grid=False, domain=True,
          titleFontSize=22)),
        color=alt.Color(
            'period:N',
            title=None,
            legend=None,
            scale=alt.Scale(
                domain=["Pre-CS:GO (≤ Oct 2011)", "CS:GO Era (Nov 2011–Sep 26, 2023)", "CS2 Era (≥ Sep 27, 2023)"],
                range=["#6A7FDB", "#EF3800", "#E58716"]
            ),
        ),
        tooltip=["period:N", "month:T", "peak:Q"]
    )
    .properties(width=1920, height=400)
    .configure_view(strokeWidth=0)
    .configure_axis(
        labelColor="#3b415c",
        labelFontSize=18,
        domainColor="#3b415c",
        tickColor="#3b415c"
    )
)

chart




<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


### Rival Title Player Trends Heatmap

In [None]:
# Month-by-month heatmap (each cell = one game-month), normalized per game
def _prep(df, name):
    d = df[['month','peak']].copy()
    d['game'] = name
    # collapse to one value per year-month in case there are duplicates
    d['ym'] = d['month'].dt.to_period('M').dt.to_timestamp()
    d = d.groupby(['game','ym'], as_index=False)['peak'].max()
    return d

frames = [
    _prep(cs_merged_data, "Counter-Strike"),
    _prep(v_data, "Valorant"),
    _prep(rss_data, "Rainbow Six Siege"),
    _prep(cod_data, "Call of Duty"),
    _prep(bf_data, "Battlefield"),
]
df = pd.concat(frames, ignore_index=True)

end_ym = df['ym'].max()
start_ym = (end_ym - pd.DateOffset(years=5)).to_period('M').to_timestamp()
df = df[(df['ym'] >= start_ym) & (df['ym'] <= end_ym)]

# normalize per game within the shown window
df['norm_peak'] = df.groupby('game')['peak'].transform(lambda x: x / x.max())

all_months = pd.date_range(start_ym, end_ym, freq='MS')
games = ["Counter-Strike","Valorant","Rainbow Six Siege","Call of Duty","Battlefield"]
full = (
    pd.MultiIndex.from_product([games, all_months], names=['game','ym'])
    .to_frame(index=False)
    .merge(df, on=['game','ym'], how='left')
)

n_months = full['ym'].nunique()
cell_px = 12
chart_width = int(n_months * cell_px)

heat_monthly = (
    alt.Chart(full)
    .mark_rect()
    .encode(
        x=alt.X(
            'yearmonth(ym):O',
            title='Month',
            axis=alt.Axis(
                labelAngle=-45,
                format='%Y-%m',
                labelFontSize=12,
                titleFontSize=14,
                labelColor='#3b415c',
                titleColor='#3b415c'
            )
        ),
        y=alt.Y(
            'game:N',
            title='Game',
            sort=games,
            axis=alt.Axis(
                labelFontSize=12,
                titleFontSize=14,
                labelColor='#3b415c',
                titleColor='#3b415c'
            )
        ),
        color=alt.Color(
            'norm_peak:Q',
            title='Peak (normalized globally)',
            scale=alt.Scale(domain=[0, 1], scheme='oranges'),
            legend=alt.Legend(titleFontSize=14, labelFontSize=12,
                              titleColor='#3b415c', labelColor='#3b415c')
        ),
        tooltip=[
            alt.Tooltip('game:N', title='Game'),
            alt.Tooltip('yearmonth(ym):T', title='Month', format='%Y-%m'),
            alt.Tooltip('peak:Q', title='Peak (raw)', format=','),
            alt.Tooltip('norm_peak:Q', title='% of Global Max', format='.0%')
        ]
    )
    .properties(
        width=chart_width,
        height=220,
        background='white'
    )
)

heat_monthly


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


### Counter-Strike Viewership on Youtube and Twitch

#### Lollipop Plots

In [None]:
# ----- Twitch -----
tw = twitch_cs_data.copy()
tw['twitch_viewers'] = pd.to_numeric(tw['peak viewers'], errors='coerce')
tw = tw.dropna(subset=['month', 'twitch_viewers'])

tw_base = alt.Chart(tw).encode(x=alt.X('month:T', title=None, axis=alt.Axis(grid=False)))

# Stems live here (Twitch)
tw_sticks = tw_base.mark_rule(strokeWidth=1, color='#888').encode(
    y=alt.Y('zero:Q',
            axis=alt.Axis(orient='right',
                          title='Twitch Peak Viewers',
                          titleColor='#9146FF',
                          titleFontSize=18,
                          format='.2s', tickCount=7, labelFontSize=12),
            ),
    y2=alt.Y2('twitch_viewers:Q')
).transform_calculate(zero='0')

# Dots (no axis on points; uses same scale)
tw_points = tw_base.mark_point(filled=True, size=60, color='#9146FF').encode(
    y=alt.Y('twitch_viewers:Q', axis=None),
    tooltip=[
        alt.Tooltip('month:T', title='Date'),
        alt.Tooltip('twitch_viewers:Q', title='Twitch Peak Concurrent Viewers', format=',')
    ]
)

# ----- Counter-Strike -----
START = pd.Timestamp('2016-11-01')
cs = cs_merged_data[['month', 'peak']].rename(columns={'peak': 'cs_players'}).copy()
cs = cs[cs['month'] >= START]

SMOOTH = 3
if SMOOTH > 1:
    cs['cs_players_s'] = cs['cs_players'].rolling(SMOOTH, min_periods=1, center=True).mean()
    y_cs = 'cs_players_s'
else:
    y_cs = 'cs_players'

# CS line (left axis)
cs_line = alt.Chart(cs).mark_line(strokeWidth=3, color="#E58716").encode(
    x=alt.X('month:T', axis=alt.Axis(grid=False, labelFontSize=12)),
    y=alt.Y(f'{y_cs}:Q',
            axis=alt.Axis(orient='left',
                          title='Peak CCU (millions)',
                          titleColor='#E58716',
                          format='.2s', tickCount=6, grid=False, titleFontSize=18, labelFontSize=12))
)

# ----- Layer -----
chart_twitch = \
    alt.layer(cs_line, tw_sticks, tw_points) \
        .resolve_scale(y='independent') \
        .properties(width=900, height=300) \
        .configure(
            background='white',
            padding=5
        ) \
        .configure_view(
            strokeWidth=0,
            fill='white'
        ) \
        .configure_axis(
            grid=False,
            labelColor='#3b415c',
            domainColor='#3b415c',
            tickColor='#3b415c'
        ) \
        .configure_title(color='black')

chart_twitch


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [None]:
# ----- YouTube -----
yt = youtube_cs_data.copy()
START = pd.Timestamp('2016-11-01')
yt = yt[yt['month'] >= START]

yt_base = alt.Chart(yt).encode(x=alt.X('month:T', title=None))

# stems lives here (YouTube)
yt_sticks = yt_base.mark_rule(strokeWidth=1, color='#888').encode(
    y=alt.Y('zero:Q',
            axis=alt.Axis(orient='right', title="Youtube View Count (millions)", titleColor='#cc0000', titleFontSize=18,
                          format='.2s', tickCount=7, labelFontSize=12),
            scale=alt.Scale(type='sqrt')),
    y2=alt.Y2('viewCount:Q')
).transform_calculate(zero='0')

# No axis on points (uses same scale but doesn’t draw an axis)
yt_points = yt_base.mark_point(filled=True, size=60, color='#cc0000').encode(
    y=alt.Y('viewCount:Q', axis=None, scale=alt.Scale(type='sqrt')),
    tooltip=[
        alt.Tooltip('month:T', title='Date'),
        alt.Tooltip('viewCount:Q', title='Views', format=','),
        alt.Tooltip('likeCount:Q', title='Likes', format=','),
        alt.Tooltip('commentCount:Q', title='Comments', format=',')
    ]
)

# ----- Counter-Strike -----
cs = cs_merged_data[['month', 'peak']].rename(columns={'peak': 'cs_players'}).copy()
cs = cs[cs['month'] >= START]

SMOOTH = 3
if SMOOTH > 1:
    cs['cs_players_s'] = cs['cs_players'].rolling(SMOOTH, min_periods=1, center=True).mean()
    y_cs = 'cs_players_s'
else:
    y_cs = 'cs_players'

# RIGHT axis lives here (Players)
cs_line = alt.Chart(cs).mark_line(strokeWidth=3, color='#E58716').encode(
    x=alt.X('month:T', axis=alt.Axis(grid=False, labelFontSize=12)),
    y=alt.Y(f'{y_cs}:Q',
            axis=alt.Axis(orient='left', title="Peak CCU (millions)",
                          titleColor='#E58716', titleFontSize=18, format='.2s', tickCount=6, labelFontSize=12, grid=False))
)

chart_youtube = (
    alt.layer(cs_line, yt_sticks, yt_points)
      .resolve_scale(y='independent')
      .properties(width=900, height=300)
      .configure(
          background='white',
          padding=5
      )
      .configure_view(
          strokeWidth=0,
          fill='white'
      )
      .configure_axis(
          grid=False,
          labelColor='#3b415c',
          domainColor='#3b415c',
          tickColor='#3b415c'
      )
      .configure_title(color='black')
)

chart_youtube



<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [None]:
AX_COLOR = '#3b415c'

cs_df = (
    cs_merged_data[['month','peak']]
    .rename(columns={'peak':'cs'})
    .assign(month=lambda d: pd.to_datetime(d['month']))
)

yt = (
    youtube_cs_data[['month','viewCount']].copy()
    .rename(columns={'viewCount':'youtube'})
    .assign(month=lambda d: pd.to_datetime(d['month']))
    .merge(cs_df, on='month', how='inner')
)

tw = (
    twitch_cs_data[['month','peak viewers']].copy()
    .rename(columns={'peak viewers':'twitch'})
    .assign(month=lambda d: pd.to_datetime(d['month']))
    .merge(cs_df, on='month', how='inner')
)

# ---------- 2) Plotter ----------
def make_scatter_players_legend(df, xcol, xtitle, labelx_color):
    d = df.copy()
    d[xcol] = pd.to_numeric(d[xcol], errors='coerce')
    d['cs']  = pd.to_numeric(d['cs'],  errors='coerce')
    
    # OLS fit: y = m*x + b
    x = d[xcol].astype(float).to_numpy()
    y = d['cs'].astype(float).to_numpy()
    m, b = np.polyfit(x, y, 1)

    # predictions, residuals, % diff (clipped to ±75%)
    d['pred_cs']  = m * d[xcol] + b
    eps = 1e-9
    d['pct_diff'] = ((d['cs'] - d['pred_cs']) /
                     np.maximum(d['pred_cs'].abs(), eps)).clip(-0.75, 0.75)

    # stats for title
    pear  = float(d[xcol].corr(d['cs'], method='pearson'))
    spear = float(d[xcol].corr(d['cs'], method='spearman'))
    r2    = pear**2
    n     = int(len(d))
    panel_title = f'{xtitle} vs CS — r={pear:.2f}, ρ={spear:.2f}, R²={r2:.2f}, n={n}'

    axis_x = alt.Axis(
        grid=False, ticks=True, domain=True,
        labelColor=AX_COLOR, titleColor=labelx_color,
        tickColor=AX_COLOR, domainColor=AX_COLOR,
        tickCount=7, labelFontSize=18, titleFontSize=20, titlePadding=10
    )
    axis_y = alt.Axis(
        grid=False, ticks=True, domain=True,
        labelColor=AX_COLOR, titleColor="#E58716",
        tickColor=AX_COLOR, domainColor=AX_COLOR,
        tickCount=7, labelFontSize=18, titleFontSize=20, titlePadding=10
    )

    # diverging color scale for pct_diff
    domain_stops = [-0.9, -0.7, -0.35, 0.0, 0.35, 0.7, 0.9]
    range_colors = ["#7f0000","#b30000","#fcae91","#f0f0f0","#9ecae1","#3182bd","#08519c"]

    tooltips = [
        alt.Tooltip('month:T', title='Month', format='%Y %b'),
        alt.Tooltip(f'{xcol}:Q', title=xtitle, format=',.0f'),
        alt.Tooltip('cs:Q',      title='CS players', format=',.0f'),
        alt.Tooltip('pred_cs:Q', title='Predicted players', format=',.0f'),
        alt.Tooltip('pct_diff:Q',title='% above/below', format='.1%'),
    ]

    pts = (
        alt.Chart(d)
        .mark_circle(size=90, opacity=0.95, stroke='white', strokeWidth=0.6)
        .encode(
            x=alt.X(f'{xcol}:Q', title=xtitle, axis=axis_x),
            y=alt.Y('cs:Q', title='CS peak players', axis=axis_y),
            color=alt.Color(
                'pct_diff:Q',
                title='Above/below expected (%)',
                scale=alt.Scale(domain=domain_stops, range=range_colors),
                legend=alt.Legend(
                    orient='right', direction='vertical', gradientLength=220,
                    labelColor=AX_COLOR, titleColor=AX_COLOR,
                    titleFontSize=14, labelFontSize=12, format='.0%'
                )
            ),
            tooltip=tooltips
        )
        .properties(width=640, height=460, title=panel_title)
    )

    # regression line from min/max x
    x_min = float(d[xcol].min()); x_max = float(d[xcol].max())
    line_df = pd.DataFrame({'x': [x_min, x_max], 'y': [m*x_min + b, m*x_max + b]})
    line = alt.Chart(line_df).mark_line(color="#000000", size=2).encode(x='x:Q', y='y:Q')

    return (pts + line)\
        .configure_view(stroke=None)\
        .configure_axis(grid=False)\
        .configure_title(color=AX_COLOR, fontSize=18, anchor='start')\
        .configure(background='white')

# ---------- 3) Build charts ----------
youtube_chart = make_scatter_players_legend(yt, 'youtube', 'YouTube view count', '#cc0000')
twitch_chart  = make_scatter_players_legend(tw, 'twitch',  'Twitch peak viewers', '#9146FF')


#### Youtube-CS Scatterplot

In [37]:
youtube_chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


#### Twitch-CS Scatterplot

In [38]:
twitch_chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


#### Statistics Tables

In [None]:
# Stats for scatterplots: Pearson r, Spearman ρ, R²
AX_COLOR = '#3b415c'

def corr_stats(df, xcol):
    d = df[[xcol, 'cs']].copy()
    d[xcol] = pd.to_numeric(d[xcol], errors='coerce')
    d['cs']  = pd.to_numeric(d['cs'],  errors='coerce')
    d = d.dropna()
    n = len(d)
    if n < 2:
        raise ValueError(f"Not enough rows for stats after dropna for '{xcol}' (n={n}).")

    pear = float(d[xcol].corr(d['cs'], method='pearson'))
    r2 = pear ** 2

    out = pd.DataFrame({
        'metric': ['Pearson r', 'R²', 'n'],
        'value':  [pear, r2, n]
    })
    return out

def stats_chart(stats_df, title):
    # format numbers for display (n stays integer)
    df = stats_df.copy()
    df['display'] = df.apply(
        lambda r: f"{r['value']:.3f}" if r['metric'] != 'n' else f"{int(r['value'])}",
        axis=1
    )
    # simple “table” with text marks
    return (
        alt.Chart(df)
        .mark_text(align='left', baseline='middle', fontSize=14, color=AX_COLOR)
        .encode(
            y=alt.Y('metric:N', title=None, sort=['Pearson r','Spearman ρ','R²','n'],
                    axis=alt.Axis(labelColor=AX_COLOR)),
            text='display:N'
        )
        .properties(width=180, height=110, title=alt.TitleParams(text=title, color=AX_COLOR, fontSize=14))
    )

# ---- compute & show ----
yt_stats = corr_stats(yt, 'youtube')
tw_stats = corr_stats(tw, 'twitch')


#### Youtube-CS Statistics

In [None]:
display(yt_stats)

YouTube vs CS


Unnamed: 0,metric,value
0,Pearson r,0.655496
1,Spearman ρ,0.529118
2,R²,0.429675
3,n,224.0


#### Twitch-CS Statistics

In [None]:
display(tw_stats)


Twitch vs CS


Unnamed: 0,metric,value
0,Pearson r,0.235795
1,Spearman ρ,0.394473
2,R²,0.055599
3,n,106.0
