# SVOD Subscribers – US Market Overview

This report summarizes the key findings from an exploratory analysis of SVOD subscriber data in the United States. The analysis was performed entirely in Python, based on a cleaned dataset derived from the original Excel source.

We focus on market size, company growth, and volatility across time.


---

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# Paths (relative to the project root / notebook location)
BASE_DIR = Path().resolve()
DATA_PROCESSED_DIR = BASE_DIR / "data" / "processed"
REPORTS_DIR = BASE_DIR / "reports"
REPORTS_DIR.mkdir(exist_ok=True)

# Load processed datasets
ts = pd.read_csv(DATA_PROCESSED_DIR / "timeseries_metrics.csv", parse_dates=["fact_date"])
summary = pd.read_csv(DATA_PROCESSED_DIR / "company_summary.csv")

# Make sure we work with a clean 'Company' label
summary = summary.rename(columns={"actor_label": "Company"})


In [37]:
import matplotlib.cm as cm
import numpy as np

# Generate distinct colors per company
def get_colors(n):
    cmap = cm.get_cmap('tab10')
    return [cmap(i % 10) for i in range(n)]

# ---- CUSTOM BAR WITH LABELS AND LEGEND ----
def draw_bar_chart(ax, df, value_col, title, unit="", fmt="{:.1f}"):

    companies = df["Company"].tolist()
    values = df[value_col].tolist()
    colors = get_colors(len(df))

    ax.bar(companies, values, color=colors)

    # Labels on top of bars
    for i, (c, v) in enumerate(zip(companies, values)):
        label = fmt.format(v)
        if unit:
            label += unit
        ax.text(i, v, label, ha="center", va="bottom", fontsize=8, weight="bold")

    # Remove X labels (we replace with legend)
    ax.set_xticks([])

    # Inline legend box
    ax.legend(companies, fontsize=7, loc="upper right", framealpha=0.6)

    ax.set_title(title, fontsize=11)
    ax.grid(axis="y", linestyle="--", alpha=0.3)


In [14]:
# Ensure numeric types
numeric_cols = [
    "subs_initial", "subs_final", "total_growth_abs", "total_growth_pct",
    "cagr", "volatility_pct", "positive_quarters", "negative_quarters",
    "max_qoq_pct", "min_qoq_pct"
]

for col in numeric_cols:
    if col in summary.columns:
        summary[col] = pd.to_numeric(summary[col], errors="coerce")

# Winners per metric (only those that add analytical value)

# 1) Final subscribers (size)
winner_subs = summary.loc[summary["subs_final"].idxmax()]

# 2) Total absolute growth
winner_growth_abs = summary.loc[summary["total_growth_abs"].idxmax()]

# 3) Total percentage growth
winner_growth_pct = summary.loc[summary["total_growth_pct"].idxmax()]

# 4) CAGR
winner_cagr = summary.loc[summary["cagr"].idxmax()]

# 5) Volatility (highest)
winner_vol = summary.loc[summary["volatility_pct"].idxmax()]

# 6) Most positive quarters
winner_pos_q = summary.loc[summary["positive_quarters"].idxmax()]

# 7) Most negative quarters
winner_neg_q = summary.loc[summary["negative_quarters"].idxmax()]

# 8) Best single-quarter growth (%)
winner_max_qoq = summary.loc[summary["max_qoq_pct"].idxmax()]

# 9) Worst single-quarter drop (%)
winner_min_qoq = summary.loc[summary["min_qoq_pct"].idxmin()]

In [None]:
# --- METRICS TEXT WITHOUT * AND WITHOUT CAGR ---
metrics_bullets = [
    (
        "• Final subscribers (latest subscriber base) = "{winner_subs.Company} ({winner_subs.subs_final:,.0f})"
    ),
    (
        "• Total absolute growth (net increase) = ",
        f"{winner_growth_abs.Company} (+{winner_growth_abs.total_growth_abs:,.0f})"
    ),
    (
        "• Total percentage growth (first→last) = ",
        f"{winner_growth_pct.Company} ({winner_growth_pct.total_growth_pct:.1f}%)"
    ),
    (
        "• Volatility (std dev of QoQ % change) = ",
        f"{winner_vol.Company} ({winner_vol.volatility_pct:.1f} pp)"
    ),
    (
        "• Positive quarters (growth periods) = ",
        f"{winner_pos_q.Company} ({winner_pos_q.positive_quarters:.0f})"
    ),
    (
        "• Negative quarters (decline periods) = ",
        f"{winner_neg_q.Company} ({winner_neg_q.negative_quarters:.0f})"
    ),
    (
        "• Best single-quarter growth (highest QoQ %) = ",
        f"{winner_max_qoq.Company} ({winner_max_qoq.max_qoq_pct:.1f}%)"
    ),
    (
        "• Worst single-quarter drop (lowest QoQ %) = ",
        f"{winner_min_qoq.Company} ({winner_min_qoq.min_qoq_pct:.1f}%)"
    ),
]


fig = plt.figure(figsize=(8.27, 11.69))

# ---------- TEXT BLOCK ----------
text_ax = fig.add_axes([0.06, 0.58, 0.88, 0.36])
text_ax.axis("off")

fig.suptitle(
    "SVOD Subscribers – US Market Performance (One-Page Summary)",
    fontsize=17, weight="bold", y=0.985
)

text_ax.text(0.0, 0.97, "Overview:", fontsize=13, weight="bold", va="top")

text_ax.text(
    0.0, 0.90,
    "This report analyzes subscriber performance across US SVOD companies using quarterly data. "
    "Nine metrics were computed to evaluate scale, growth, and stability.",
    fontsize=11, va="top"
)

text_ax.text(
    0.0, 0.82,
    "Metrics analyzed:",
    fontsize=13, weight="bold", va="top"
)

# --- METRICS WITH BOLD WINNERS, FULL-WIDTH, CLEAN LAYOUT ---
y = 0.75
for i in range(0, len(metrics_bullets), 2):
    label = metrics_bullets[i]
    bold_value = metrics_bullets[i+1]

    text_ax.text(0.0, y, label, fontsize=11, va="top")
    text_ax.text(0.04, y - 0.035, bold_value, fontsize=11, weight="bold", va="top")

    y -= 0.095  # more vertical spacing

# ---------- GRAPH AREA (BIGGER, LOWER, NOT OVERLAPPING) ----------
# ROW 1
ax1 = fig.add_axes([0.06, 0.32, 0.42, 0.18])
ax2 = fig.add_axes([0.54, 0.32, 0.42, 0.18])

# ROW 2
ax3 = fig.add_axes([0.06, 0.10, 0.42, 0.18])
ax4 = fig.add_axes([0.54, 0.10, 0.42, 0.18])

# 1) Final subscribers (millions)
draw_bar_chart(
    ax1,
    plot_subs,
    value_col="subs_final_millions",
    title="Final subscribers (millions)",
    fmt="{:.1f}",
    unit="M"
)

# 2) Total % growth
draw_bar_chart(
    ax2,
    plot_growth_pct,
    value_col="total_growth_pct",
    title="Total % growth",
    fmt="{:.0f}",
    unit="%"
)

# 3) Total absolute growth (millions)
draw_bar_chart(
    ax3,
    plot_growth_abs,
    value_col="total_growth_abs_millions",
    title="Total absolute growth (millions)",
    fmt="{:.1f}",
    unit="M"
)

# 4) Volatility
draw_bar_chart(
    ax4,
    plot_vol,
    value_col="volatility_pct",
    title="Volatility (std dev QoQ %)",
    fmt="{:.1f}",
    unit="pp"
)


# ---------- EXPORT ----------
pdf_path = REPORTS_DIR / "SVOD_ONE_PAGE_FINAL_FINAL.pdf"
fig.savefig(pdf_path, bbox_inches="tight")
plt.close(fig)

pdf_path


  cmap = cm.get_cmap('tab10')


WindowsPath('D:/GitHub/DataProjects/Dataxis/reports/SVOD_ONE_PAGE_FINAL_FINAL.pdf')