In [1]:
# ==== Imports & .env ====
import os
from pathlib import Path
import json
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dotenv import load_dotenv

plt.rcParams["figure.figsize"] = (12, 5)

load_dotenv(dotenv_path=Path(".") / ".env")
API_URL    = os.getenv("API_URL")
DATE_BEGIN = os.getenv("DATE_BEGIN")
DATE_END   = os.getenv("DATE_END")
assert API_URL and DATE_BEGIN and DATE_END, "Проверь .env: нужны API_URL/DATE_BEGIN/DATE_END"

PARAMS = {"begin": DATE_BEGIN, "end": DATE_END}

In [2]:
# ==== Data loaders ====
def load_visits(api_url: str, params: dict) -> pd.DataFrame:
    r = requests.get(f"{api_url}/visits", params=params, timeout=60)
    r.raise_for_status()
    df = pd.DataFrame(r.json()).rename(columns={"visit_id":"uuid", "datetime":"date"})
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    for c in ["uuid","platform","user_agent"]:
        df[c] = df[c].astype(str).str.strip().str.lower()
    # убираем ботов
    return df[~df["user_agent"].str.contains("bot", case=False, na=False)].copy()

def load_regs(api_url: str, params: dict) -> pd.DataFrame:
    r = requests.get(f"{api_url}/registrations", params=params, timeout=60)
    r.raise_for_status()
    df = pd.DataFrame(r.json()).rename(columns={"datetime":"date"})
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    for c in ["user_id","email","platform","registration_type"]:
        df[c] = df[c].astype(str).str.strip().str.lower()
    return df

def aggregate_visits_last_per_uuid(visits: pd.DataFrame) -> pd.DataFrame:
    # последний визит по uuid + платформа последнего визита
    v = visits.sort_values("date").drop_duplicates("uuid", keep="last").copy()
    v["date_group"] = v["date"].dt.date
    return v.groupby(["date_group","platform"], as_index=False).agg(visits=("uuid","count"))

def aggregate_regs(regs: pd.DataFrame) -> pd.DataFrame:
    r = regs.copy()
    r["date_group"] = r["date"].dt.date
    return r.groupby(["date_group","platform"], as_index=False).agg(registrations=("user_id","count"))

def build_conversion(visits_daily: pd.DataFrame, regs_daily: pd.DataFrame) -> pd.DataFrame:
    merged = visits_daily.merge(regs_daily, on=["date_group","platform"], how="outer").fillna(0)
    merged["visits"] = merged["visits"].astype(int)
    merged["registrations"] = merged["registrations"].astype(int)
    merged["conversion"] = (
        merged["registrations"] / merged["visits"].replace(0, np.nan) * 100
    ).fillna(0).astype(float)  # без округления!
    return merged.sort_values(["date_group","platform"]).reset_index(drop=True)

def load_ads_csv(path: str = "./ads.csv") -> pd.DataFrame:
    ads = pd.read_csv(path)
    ads.columns = [c.strip().lower() for c in ads.columns]
    ads["date"] = pd.to_datetime(ads["date"], errors="coerce")
    ads["date_group"] = ads["date"].dt.date
    return ads.groupby(["date_group","utm_campaign"], as_index=False)["cost"].sum()

def daily_totals(conv: pd.DataFrame) -> pd.DataFrame:
    return conv.groupby("date_group", as_index=False)[["visits","registrations"]].sum()

def merge_with_ads(daily: pd.DataFrame, ads_g: pd.DataFrame) -> pd.DataFrame:
    final = daily.merge(ads_g, on="date_group", how="left")
    final["utm_campaign"] = final["utm_campaign"].fillna("none")
    final["cost"] = final["cost"].fillna(0).astype(float)
    return final

def save_json(conv: pd.DataFrame, final: pd.DataFrame):
    conv[["date_group","platform","visits","registrations","conversion"]].to_json("./conversion.json", double_precision=10)
    final[["date_group","visits","registrations","cost","utm_campaign"]].to_json("./ads.json", double_precision=10)

In [3]:
# ==== Charts ====
from matplotlib.ticker import MaxNLocator

def ensure_charts_dir() -> Path:
    p = Path("./charts"); p.mkdir(parents=True, exist_ok=True); return p

def bar_with_labels(x, y, title, ylabel, fname):
    charts = ensure_charts_dir()
    fig, ax = plt.subplots()
    ax.bar(x, y)
    ax.set_title(title)
    ax.set_ylabel(ylabel)
    ax.set_xlabel("date_group")
    ax.grid(True, axis="y", linestyle=":", alpha=.4)
    ax.xaxis.set_tick_params(rotation=45)
    for xi, yi in zip(x, y):
        ax.text(xi, yi, f"{int(yi)}", ha="center", va="bottom", fontsize=8)
    fig.tight_layout()
    fig.savefig(charts / fname, dpi=150); plt.close(fig)

def stacked_by_platform(conv: pd.DataFrame, value_col: str, title: str, fname: str):
    charts = ensure_charts_dir()
    pivoted = conv.pivot_table(index="date_group", columns="platform", values=value_col, aggfunc="sum").fillna(0)
    fig, ax = plt.subplots()
    bottom = None
    for plat in ["android","ios","web"]:
        vals = pivoted.get(plat, pd.Series(0, index=pivoted.index))
        ax.bar(pivoted.index, vals, bottom=bottom, label=plat)
        bottom = (vals if bottom is None else bottom + vals)
    ax.set_title(title); ax.set_ylabel(value_col); ax.set_xlabel("date_group")
    ax.legend(title="platform"); ax.grid(True, axis="y", linestyle=":", alpha=.4)
    ax.xaxis.set_tick_params(rotation=45)
    fig.tight_layout()
    fig.savefig(charts / fname, dpi=150); plt.close(fig)

def conversion_overall(conv: pd.DataFrame, fname: str):
    charts = ensure_charts_dir()
    s = (conv.groupby("date_group")[["registrations","visits"]].sum()
           .assign(conv=lambda x: x["registrations"] / x["visits"].replace(0, np.nan) * 100)["conv"]
           .fillna(0).sort_index())
    fig, ax = plt.subplots()
    ax.plot(s.index, s.values, marker="o", label="Общая конверсия")
    ax.set_title("Overall Conversion")
    ax.set_xlabel("Date"); ax.set_ylabel("Conversion (%)")
    ax.grid(True, linestyle=":", alpha=.4); ax.legend()
    ax.xaxis.set_tick_params(rotation=45)
    for x_i, y_i in zip(s.index, s.values):
        ax.text(x_i, y_i, f"{round(y_i)}%", ha="center", va="bottom", fontsize=8)
    fig.tight_layout()
    fig.savefig(charts / fname, dpi=150); plt.close(fig)

def conversion_by_platform(conv: pd.DataFrame):
    # сохраняем три отдельные фигуры (в примерах были «три панели», но файл/фигура на платформу проще)
    charts = ensure_charts_dir()
    for plat, dfp in conv.groupby("platform"):
        s = (dfp.groupby("date_group")[["registrations","visits"]].sum()
               .assign(conv=lambda x: x["registrations"]/x["visits"].replace(0,np.nan)*100)["conv"]
               .fillna(0).sort_index())
        fig, ax = plt.subplots()
        ax.plot(s.index, s.values, marker="o", label=plat)
        ax.set_title(f"Conversion {plat}")
        ax.set_xlabel("Date"); ax.set_ylabel("Conversion (%)")
        ax.grid(True, linestyle=":", alpha=.4); ax.legend()
        ax.xaxis.set_tick_params(rotation=45)
        for x_i, y_i in zip(s.index, s.values):
            ax.text(x_i, y_i, f"{round(y_i)}%", ha="center", va="bottom", fontsize=7)
        fig.tight_layout()
        fig.savefig(charts / f"conversion_{plat}.png", dpi=150); plt.close(fig)

def ads_costs_line(ads_g: pd.DataFrame, fname: str):
    charts = ensure_charts_dir()
    daily_cost = ads_g.groupby("date_group")["cost"].sum().sort_index()
    fig, ax = plt.subplots()
    ax.plot(daily_cost.index, daily_cost.values, marker="o")
    ax.set_title("Aggregated Ad Campaign Costs (by day)")
    ax.set_xlabel("Date"); ax.set_ylabel("Cost (RUB)")
    ax.grid(True, linestyle=":", alpha=.4)
    ax.xaxis.set_tick_params(rotation=45)
    for x_i, y_i in zip(daily_cost.index, daily_cost.values):
        ax.text(x_i, y_i, f"{int(y_i)} RUB", ha="center", va="bottom", fontsize=7)
    fig.tight_layout()
    fig.savefig(charts / fname, dpi=150); plt.close(fig)

In [4]:
# ==== MAIN ====
vis = load_visits(API_URL, PARAMS)
reg = load_regs(API_URL, PARAMS)

vis_d = aggregate_visits_last_per_uuid(vis)
reg_d = aggregate_regs(reg)
conv  = build_conversion(vis_d, reg_d)

daily = daily_totals(conv)
ads_g = load_ads_csv("./ads.csv")
final = merge_with_ads(daily, ads_g)

# сохраняем JSON (точность без округления)
save_json(conv, final)

# графики: менее агрегированные, как в примерах
bar_with_labels(daily["date_group"], daily["visits"], "Total Visits", "visits", "visits_total.png")
stacked_by_platform(conv, "visits", "Visits by Platform (Stacked)", "visits_stacked.png")
bar_with_labels(daily["date_group"], daily["registrations"], "Total Registrations", "registrations", "registrations_total.png")
stacked_by_platform(conv, "registrations", "Weekly Registrations by Platform (Stacked)", "registrations_stacked.png")
conversion_overall(conv, "conversion_overall.png")
conversion_by_platform(conv)
ads_costs_line(ads_g, "ads_costs.png")

conv.head(), final.head()

(   date_group platform  visits  registrations  conversion
 0  2023-03-01  android      75             61   81.333333
 1  2023-03-01      ios      22             18   81.818182
 2  2023-03-01      web     279              8    2.867384
 3  2023-03-02  android      67             59   88.059701
 4  2023-03-02      ios      31             24   77.419355,
    date_group  visits  registrations                utm_campaign   cost
 0  2023-03-01     376             87  advanced_algorithms_series  212.0
 1  2023-03-02     613            106  advanced_algorithms_series  252.0
 2  2023-03-03     683            107  advanced_algorithms_series  202.0
 3  2023-03-04     647            159  advanced_algorithms_series  223.0
 4  2023-03-05     707            115  advanced_algorithms_series  265.0)

In [5]:
from pathlib import Path
print("Saved:", sorted([p.name for p in Path("./charts").glob("*.png")]))
print("conversion.json:", Path("conversion.json").exists(), "ads.json:", Path("ads.json").exists())

Saved: ['_smoke.png', 'ads_cost_daily.png', 'ads_costs.png', 'conversion_android.png', 'conversion_average.png', 'conversion_by_platform_daily.png', 'conversion_ios.png', 'conversion_overall.png', 'conversion_web.png', 'registrations_by_platform_total.png', 'registrations_stacked.png', 'registrations_total.png', 'registrations_with_campaigns.png', 'visits_by_platform_total.png', 'visits_stacked.png', 'visits_total.png', 'visits_with_campaigns.png']
conversion.json: True ads.json: True
