# Beyond The Crosshair

In [None]:
import pandas as pd
import numpy as np
import altair as alt
import matplotlib.pyplot as plt
import json
from functools import reduce

alt.data_transformers.enable("default", max_rows=None)
alt.renderers.enable("mimetype")
alt.theme.enable("fivethirtyeight")

"""
#342425 (Deep Charcoal Brown)

#e38416 (Vivid Orange)

#d4d3d3 (Light Gray) 

#050303 (Near Black)

#e8bc84 (Warm Beige)

#7b4f26 (Earthy Brown)

#00000d (Blue Black)

#f73c07 (Bright Red)
"""

'\n#342425 (Deep Charcoal Brown)\n\n#e38416 (Vivid Orange)\n\n#d4d3d3 (Light Gray)\n\n#050303 (Near Black)\n\n#e8bc84 (Warm Beige)\n\n#7b4f26 (Earthy Brown)\n\n#00000d (Black)\n'

## Clean Data Loading

In [2]:
cs16_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_1.6_Clean.csv", parse_dates=["month"])
css_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Source_Clean.csv", parse_dates=["month"])
cscz_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Condition_Zero_Clean.csv", parse_dates=["month"])
csgo_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Global_Offensive_Clean.csv", parse_dates=["month"])
cs2_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_2_Clean.csv", parse_dates=["month"])
cs_merged_data = pd.read_csv("../assets/clean/SteamDB_Counter-Strike_Clean.csv", parse_dates=["month"])

v_data = pd.read_csv("../assets/clean/GG_Valorant_Clean.csv", parse_dates=["month"])
fn_data = pd.read_csv("../assets/clean/GG_Fortnite_Clean.csv", parse_dates=["month"])
rss_data = pd.read_csv("../assets/clean/GG_Rainbow_Six_Siege_Clean.csv", parse_dates=["month"])
cod_data = pd.read_csv("../assets/clean/SteamDB_Call_of_Duty_Clean.csv", parse_dates=["month"])
bf_data = pd.read_csv("../assets/clean/SteamDB_Battlefield_Clean.csv", parse_dates=["month"])

igdb_data = pd.read_csv("../assets/clean/IGDB_Clean.csv")
twitch_cs_data = pd.read_csv("../assets/clean/Twitch_Counter-Strike_Clean.csv", parse_dates=["month"])
youtube_cs_data = pd.read_csv("../assets/clean/Youtube_Counter-Strike_Clean.csv", parse_dates=["month"])

In [3]:
merged_youtube_twitch = pd.concat([youtube_cs_data.set_index("month"), twitch_cs_data.set_index("month")], axis=1).reset_index()
merged_youtube_twitch = merged_youtube_twitch[merged_youtube_twitch['month'] >= pd.to_datetime("2016-11-01")]

merged_cs_youtube = pd.merge(cs_merged_data[['month','peak']], youtube_cs_data[['month','viewCount']], on="month", how="inner")
merged_cs_youtube = merged_cs_youtube.sort_values(by="viewCount", ascending=False)

merged_cs_twitch = pd.merge(cs_merged_data[['month','peak']], twitch_cs_data[['month','peak viewers']], on="month", how="inner")
merged_cs_twitch = merged_cs_twitch.sort_values(by="peak viewers", ascending=False)

## Visualizations

### Counter-Strike Playerbase Timeseries

In [4]:
series = {
    # "All Counter-Strike (aggregate)": cs_merged_grouped,
    "Counter-Strike (1.6)": cs16_data,
    "Counter-Strike: Source": css_data,
    "Counter-Strike: Condition Zero": cscz_data,
    "Counter-Strike: Global Offensive": csgo_data,
    "Counter-Strike 2": cs2_data,
}

colors = {
    # "All Counter-Strike (aggregate)":  "#0057B8",  # deep royal blue (highlighted)
    "Counter-Strike (1.6)":            "#009E73",  # green
    "Counter-Strike: Condition Zero":  "#7F7F7F",  # medium gray (subtle)
    "Counter-Strike: Source":          "#56B4E9",  # sky blue
    "Counter-Strike: Global Offensive":"#D55E00",  # vermillion (strong contrast vs blue)
    "Counter-Strike 2":                "#CC79A7",  # magenta
}

order = [
#     "All Counter-Strike (aggregate)",
    "Counter-Strike (1.6)",
    "Counter-Strike: Condition Zero",
    "Counter-Strike: Source",
    "Counter-Strike: Global Offensive",
    "Counter-Strike 2",
]

# Combine all series into one tidy DataFrame
frames = []
for name, df in series.items():
    df["game"] = name
    frames.append(df[["month", "peak", "game"]])

combined = pd.concat(frames, ignore_index=True)

# Build Altair chart
chart = (
    alt.Chart(combined)
    .mark_line()
    .encode(
        x=alt.X("month:T", title="Year"),
        y=alt.Y("peak:Q", title="Peak players"),
        color=alt.Color("game:N", title="Game/Version", scale=alt.Scale(domain=order, range=[colors[k] for k in order])),
        tooltip=["game", "month", "peak"],
    )
    .properties(width=950, height=450, title="Counter-Strike Playerbase Over Time")
)

chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


### Counter-Strike Viewership on Youtube and Twitch

In [5]:
# First line: views (left y-axis)
twitch_line = alt.Chart(merged_youtube_twitch).mark_line(color="purple").encode(
    x=alt.X("month:T", title="Month"),
    y=alt.Y("peak viewers:Q", title="Twitch", axis=alt.Axis(titleColor="purple")),
    tooltip=["month", "peak viewers"]
)

# Second line: likes (right y-axis)
youtube_line = alt.Chart(merged_youtube_twitch).mark_line(color="red").encode(
    x="month:T",
    y=alt.Y("viewCount:Q", title="Youtube", axis=alt.Axis(titleColor="red")),
    tooltip=["month", "viewCount"],
)

# Layer the two charts
chart = alt.layer(youtube_line, twitch_line).resolve_scale(
    y="independent"  # independent y-axes so they can differ
).properties(width=950, height=450, title="Counter-Strike Viewership Over Time")

chart

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [20]:
# --- rename only (assumes your inputs are already cleaned)
yt = merged_cs_youtube[['month','peak','viewCount']].rename(columns={'peak':'cs','viewCount':'youtube'}).copy()
tw = merged_cs_twitch[['month','peak','peak viewers']].rename(columns={'peak':'cs','peak viewers':'twitch'}).copy()

# ensure datetime (for tooltip only)
yt['month'] = pd.to_datetime(yt['month'], errors='coerce')
tw['month'] = pd.to_datetime(tw['month'], errors='coerce')

def make_scatter_players_legend(df, xcol, xtitle):
    df = df.copy()
    df[xcol] = pd.to_numeric(df[xcol], errors='coerce')
    df['cs']  = pd.to_numeric(df['cs'],  errors='coerce')
    df = df.dropna(subset=[xcol, 'cs'])

    x = df[xcol].to_numpy(float); y = df['cs'].to_numpy(float)
    m, b = np.polyfit(x, y, 1)
    df['pred_cs']  = m*df[xcol] + b
    df['resid_cs'] = df['cs'] - df['pred_cs']

    # symmetric color domain around 0
    M = float(max(abs(df['resid_cs'].min()), abs(df['resid_cs'].max())))

    pts = alt.Chart(df).mark_circle(size=60, opacity=0.85).encode(
        x=alt.X(f'{xcol}:Q', title=xtitle),
        y=alt.Y('cs:Q', title='CS peak players'),
        color=alt.Color('resid_cs:Q',
                        title='Players vs expected (players)',
                        scale=alt.Scale(domain=[-M, -2*M/3, -M/3, 0, M/3, 2*M/3, M],
                                        range=["#D93D3D", "#F88383", "#FCC2C2", "#B4B4B4", "#8B96F8", "#5F4EFA", "#0011FC"]),
                        legend=alt.Legend(format=',d')),
        tooltip=[
            alt.Tooltip('month:T', title='Month', format='%Y %b'),
            alt.Tooltip(f'{xcol}:Q', title=xtitle, format=','),
            alt.Tooltip('cs:Q',      title='CS players', format=','),
            alt.Tooltip('pred_cs:Q', title='Predicted players', format=',.0f'),
            alt.Tooltip('resid_cs:Q',title='Residual (players)', format=',.0f'),
        ]
    ).properties(width=420, height=360)

    line = alt.Chart(pd.DataFrame({
        'x':[df[xcol].min(), df[xcol].max()],
        'y':[m*df[xcol].min()+b, m*df[xcol].max()+b]
    })).mark_line(color="#000000").encode(x='x:Q', y='y:Q')

    return pts + line


left  = make_scatter_players_legend(yt, 'youtube', 'YouTube view count')
right = make_scatter_players_legend(tw, 'twitch',  'Twitch peak viewers')


combo = (left | right).configure_title(anchor='middle').properties(
    title=alt.TitleParams(
        text='CS playerbase vs platform viewership (color = players vs expected)',
        anchor='middle',   # center it
        offset=12,         # spacing from the plots
        fontSize=16
    )
)

combo

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


### FPS Competitors

In [9]:
# load the heatmap matrix from assets
pair_heat = pd.read_csv("../assets/raw/trends_heatmap_pairwise.csv", index_col=0)

In [10]:
# --- tidy + DATE CLEANING ---
long = (
    pair_heat.reset_index()
             .rename(columns={"index": "peak_row"})
             .melt(id_vars="peak_row", var_name="game", value_name="interest")
)

# Extract "peak game" and "(YYYY-MM)" then parse to datetime
long["peak_game"] = long["peak_row"].str.extract(r"peak@(.+?) \(")
long["peak_month_str"] = long["peak_row"].str.extract(r"\((\d{4}-\d{2})\)")
long["peak_month"] = pd.to_datetime(long["peak_month_str"], format="%Y-%m", errors="coerce")

# Nicer y labels: "Game (Mon YYYY)"
long["peak_label"] = np.where(
    long["peak_month"].notna(),
    long["peak_game"] + " (" + long["peak_month"].dt.strftime("%b %Y") + ")",
    long["peak_row"],
)

# --- ordering ---
# Rows: chronological by peak month (change to .sort_values("interest", ascending=False) for strongest-first)
row_order = (
    long.loc[long["game"] == long["peak_game"], ["peak_label", "peak_month"]]
        .drop_duplicates(subset=["peak_label"])  # guard against dup joins
        .sort_values("peak_month")["peak_label"]
        .tolist()
)

# Columns: by max interest across rows (helps readability)
col_order = (
    long.groupby("game")["interest"]
        .max()
        .sort_values(ascending=False)
        .index
        .tolist()
)


In [11]:
# --- color scaling ---
vmin = float(np.nanmin(long["interest"].values)) if not long["interest"].dropna().empty else 0.0
vmax = float(np.nanmax(long["interest"].values)) if not long["interest"].dropna().empty else 100.0
# In pairwise scaling, diagonal is 100 by construction; off-diagonals can exceed 100 if j > i in i's peak month.
# Use a dynamic upper bound so we don't clip >100 values.
color_scale = alt.Scale(domain=[max(0, vmin), vmax], scheme="blues")

# --- heatmap ---
heatmap = (
    alt.Chart(long)
    .mark_rect()
    .encode(
        y=alt.Y("peak_label:N", title="Peak month per game", sort=row_order),
        x=alt.X("game:N", title="Game", sort=col_order),
        color=alt.Color("interest:Q", title="Pairwise-scaled interest", scale=color_scale),
        tooltip=[
            alt.Tooltip("peak_game:N", title="Row peak of"),
            alt.Tooltip("peak_month:T", title="Peak month"),
            alt.Tooltip("game:N", title="Column game"),
            alt.Tooltip("interest:Q", title="Interest", format=".0f"),
        ],
    )
    .properties(
        title="Google Trends — Pairwise-at-peak Heatmap (2-term requests; row-diagonal = 100)",
        width={"step": 30},
        height={"step": 30},
    )
)

# Optional numeric labels (auto-contrasting color)
labels = (
    alt.Chart(long)
    .mark_text(size=9)
    .encode(
        y=alt.Y("peak_label:N", sort=row_order),
        x=alt.X("game:N", sort=col_order),
        text=alt.Text("interest:Q", format=".0f"),
        color=alt.condition(
            alt.datum.interest >= (0.6 * vmax), alt.value("white"), alt.value("black")
        ),
    )
)

(heatmap + labels).configure_axis(labelLimit=280).configure_view(stroke=None)

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


### Heatmap Visualizations

In [12]:
cs_merged_data_post2020 = cs_merged_data[cs_merged_data['month'] >= '2020-06-01']
cs_merged_data_post2020

Unnamed: 0,month,peak,gain,% gain
178,2020-06-01,986980.0,-233105.0,-104.3
179,2020-07-01,887871.0,-99109.0,-32.1
180,2020-08-01,955869.0,67998.0,-28.0
181,2020-09-01,1003755.0,47886.0,-8.9
182,2020-10-01,977120.0,-26635.0,24.6
...,...,...,...,...
236,2025-04-01,1898525.0,34097.0,-19.3
237,2025-05-01,1752347.0,-146178.0,-47.0
238,2025-06-01,1766572.0,14225.0,29.8
239,2025-07-01,1506027.0,-260545.0,194.0


In [13]:
def prep(df, name):
    out = df[['month','peak', 'gain', '% gain']].copy()
    return out.rename(columns={'peak': name, 'gain': name+' gain', '% gain': name+' % gain'})

In [14]:
cs   = prep(cs_merged_data_post2020, 'cs')
cod  = prep(cod_data, 'cod')
val  = prep(v_data, 'valorant')
sie  = prep(rss_data, 'siege')
fn   = prep(fn_data, 'fortnite')
bf   = prep(bf_data, 'battlefield')

dfs = [cs, cod, val, sie, fn, bf]
merged = reduce(lambda l, r: pd.merge(l, r, on='month', how='outer', suffixes=("", "_dup")), dfs).sort_values('month')

# (optional) limit to 2020+ across all games to match CS slice
merged = merged[merged['month'] >= pd.Timestamp('2020-06-01')]
merged

Unnamed: 0,month,cs,cs gain,cs % gain,cod,cod gain,cod % gain,valorant,valorant gain,valorant % gain,siege,siege gain,siege % gain,fortnite,fortnite gain,fortnite % gain,battlefield,battlefield gain,battlefield % gain
0,2020-06-01,986980.0,-233105.0,-104.3,8382.0,-3681.0,-22.7,183072.0,0.0,0.0,126511.0,-20062.0,-13.7,,,,5172.0,0.0,0.0
1,2020-07-01,887871.0,-99109.0,-32.1,7746.0,-636.0,-7.6,964278.0,781206.0,426.7,122461.0,-4050.0,-3.2,,,,4696.0,-476.0,-9.2
2,2020-08-01,955869.0,67998.0,-28.0,8607.0,861.0,11.1,2022100.0,1057822.0,109.7,131847.0,9386.0,7.7,,,,16020.0,11324.0,241.1
3,2020-09-01,1003755.0,47886.0,-8.9,8060.0,-547.0,-6.4,2076758.0,54658.0,2.7,94285.0,-37562.0,-28.5,,,,7273.0,-8747.0,-54.6
4,2020-10-01,977120.0,-26635.0,24.6,13277.0,5217.0,64.7,2056779.0,-19979.0,-1.0,115584.0,21299.0,22.6,,,,15854.0,8581.0,118.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,2025-04-01,1898525.0,34097.0,-19.3,152660.0,61207.0,66.9,5391338.0,-572938.0,-9.6,107312.0,-38178.0,-26.2,2132493.0,-624636.0,-22.7,52747.0,-28341.0,-35.0
59,2025-05-01,1752347.0,-146178.0,-47.0,128330.0,-24330.0,-15.9,5207109.0,-184229.0,-3.4,89179.0,-18133.0,-16.9,2356174.0,223681.0,10.5,59146.0,6399.0,12.1
60,2025-06-01,1766572.0,14225.0,29.8,106348.0,-21982.0,-17.1,5290508.0,83399.0,1.6,141870.0,52691.0,59.1,5948454.0,3592280.0,152.5,45532.0,-13614.0,-23.0
61,2025-07-01,1506027.0,-260545.0,194.0,105203.0,-1145.0,-1.1,5202237.0,-88271.0,-1.7,111052.0,-30818.0,-21.7,2396423.0,-3552031.0,-59.7,44634.0,-898.0,-2.0


In [15]:
long = merged.melt(id_vars='month', var_name='game', value_name='players')

line = alt.Chart(long).mark_line().encode(
    x=alt.X('month:T', title='Month'),
    y=alt.Y('players:Q', title='Peak players'),
    color=alt.Color('game:N', title='Game'),
    tooltip=['month:T','game:N','players:Q']
).properties(width=900, height=350, title='Playerbase trends since 2020')

line


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [16]:
norm = long.groupby('game', group_keys=False).apply(
    lambda d: d.assign(players_z=(d['players']-d['players'].mean())/d['players'].std())
)

line_norm = alt.Chart(norm).mark_line().encode(
    x='month:T', y=alt.Y('players_z:Q', title='Z-score'),
    color='game:N', tooltip=['month:T','game:N','players_z:Q']
).properties(width=900, height=350, title='Normalized (z-score) playerbase trends')

line_norm


  norm = long.groupby('game', group_keys=False).apply(


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [17]:
# compute Pearson correlations on the wide numeric block
corr = merged.drop(columns=['month']).corr()

# tidy for Altair
corr_tidy = corr.stack().rename('corr').reset_index().rename(
    columns={'level_0':'game_x','level_1':'game_y'}
)

heat = alt.Chart(corr_tidy).mark_rect().encode(
    x=alt.X('game_x:N', sort=corr.columns.tolist(), title=''),
    y=alt.Y('game_y:N', sort=corr.columns.tolist(), title=''),
    color=alt.Color('corr:Q', scale=alt.Scale(scheme='redblue', domain=[-1,1]), title='Pearson r'),
    tooltip=['game_x:N','game_y:N', alt.Tooltip('corr:Q', format='.2f')]
).properties(width=400, height=400, title='Correlation of monthly peak players')

text = alt.Chart(corr_tidy).mark_text(baseline='middle').encode(
    x='game_x:N', y='game_y:N',
    text=alt.Text('corr:Q', format='.2f'),
    color=alt.condition('datum.corr > 0.5 || datum.corr < -0.5',
                        alt.value('black'),
                        alt.value('gray'))
)

heat + text


<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [18]:
cs_only = corr_tidy.query("game_x == 'cs' & game_y != 'cs'").copy()
cs_only['game_y'] = cs_only['game_y'].str.title()  # cosmetic

cs_heat = alt.Chart(cs_only).mark_rect().encode(
    x=alt.X('game_y:N', title='Game'),
    y=alt.Y('game_x:N', title='', axis=alt.Axis(labels=False, ticks=False)),
    color=alt.Color('corr:Q', scale=alt.Scale(scheme='redblue', domain=[-1,1])),
    tooltip=['game_y:N', alt.Tooltip('corr:Q', format='.2f')]
).properties(width=400, height=60, title='Correlation with CS')

cs_text = alt.Chart(cs_only).mark_text(baseline='middle').encode(
    x='game_y:N', y='game_x:N', text=alt.Text('corr:Q', format='.2f')
)

cs_heat + cs_text

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting


In [19]:
def sel_and_fix(df, name):
    out = df[['month', 'peak']].copy()

    # robust datetime coercion (handles 'YYYY-MM', 'YYYY/MM/DD', ints, etc.)
    out['month'] = pd.to_datetime(out['month'], errors='coerce', infer_datetime_format=True)

    # drop any bad rows that couldn't be parsed
    out = out.dropna(subset=['month']).sort_values('month')

    # remove timezone if present
    try:
        out['month'] = out['month'].dt.tz_localize(None)
    except (TypeError, AttributeError):
        pass

    return out.rename(columns={'peak': name})

# Fix all six
cs  = sel_and_fix(pb_cs_data,  'cs')
cod = sel_and_fix(pb_cod_data, 'cod')
val = sel_and_fix(pb_val_data, 'valorant')
sie = sel_and_fix(pb_siege_data,'siege')
fn  = sel_and_fix(pb_fn_data,  'fortnite')
bf  = sel_and_fix(pb_bf_data,  'battlefield')

# quick sanity check (all should show datetime64[ns])
print(cs.dtypes, cod.dtypes, val.dtypes, sie.dtypes, fn.dtypes, bf.dtypes)

# now this merge will succeed
merged = reduce(lambda l, r: pd.merge(l, r, on='month', how='outer'),
                [cs, cod, val, sie, fn, bf]).sort_values('month')

NameError: name 'pb_cs_data' is not defined

In [None]:
heat = alt.Chart(long_z).mark_rect().encode(
    # yearmonth() makes x discrete → one tile per month
    x=alt.X('yearmonth(month):T', title='Month',
            axis=alt.Axis(format='%Y %b')),
    y=alt.Y('game:N', title='Game', sort=['battlefield','cod','fortnite','siege','valorant','cs']),
    color=alt.Color('z_score:Q',
                    scale=alt.Scale(scheme='redblue', domain=[-2, 2]),
                    title='Z-score'),
    tooltip=[
        alt.Tooltip('yearmonth(month):T', title='Month', format='%Y %b'),
        alt.Tooltip('game:N', title='Game'),
        alt.Tooltip('z_score:Q', title='Z', format='.2f')
    ]
).properties(width=900, height=240, title='Z-scored Playerbases Over Time')


NameError: name 'long_z' is not defined

In [None]:
heat = heat.transform_timeunit(ym='yearmonth(month)').transform_impute(
    impute='z_score', key='ym', groupby=['game'], value=None
).encode(x='ym:T')

heat

<VegaLite 5 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/display_frontends.html#troubleshooting
