In [2]:
from media_impact_monitor.impact import get_impact, ImpactSearch, time_series_regression
from media_impact_monitor.events import get_events, EventSearch
from datetime import date
from media_impact_monitor.trend import get_trend, TrendSearch
import pandas as pd
from great_tables import GT
from IPython.display import display, HTML

organizers = ["Fridays for Future", "Extinction Rebellion", "Greenpeace"]
organizers = [organizers[0]]
start_date = date(2022, 1, 1)
end_date = date(2024, 1, 1)
rows = []
for method in ["time_series_regression"]:
    trend = get_trend(
        TrendSearch(
            trend_type="keywords",
            media_source="news_print",
            topic="climate_change",
            start_date=start_date,
            end_date=end_date,
        ),
        as_json=False,
    )["climate activism"].rename("count")
    for organizer in organizers:
        events = get_events(
            EventSearch(
                source="acled",
                organizers=[organizer],
                start_date=start_date,
                end_date=end_date,
            )
        )
        aggregation = "daily"
        match method:
            case "time_series_regression":
                for lags, ewmas in [
                    ([], []),
                    ([1], []),
                    ([1, 2, 3, 4, 5], []),
                    ([4, 5, 6, 7, 8], []),
                    ([], [1, 7, 28, 84]),
                ]:
                    ts, limitations = time_series_regression(
                        events=events,
                        article_counts=trend,
                        aggregation=aggregation,
                        lags=lags,
                    )
                    row = dict(
                        organizer=organizer,
                        method=method,
                        aggregation=aggregation,
                        lags=lags,
                        ewmas=ewmas,
                    )
                    row["rmse"] = ts["rmse"].loc[7]
                    row["rmse_std"] = ts["rmse_std"].loc[7]
                    row["ame"] = ts["ame"].loc[7]
                    row["ame_std"] = ts["ame_std"].loc[7]
                    row["time_series"] = " ".join(ts["mean"].values.astype(str))
                    rows.append(row)
df = pd.DataFrame(rows)
for organizer in organizers:
    df_ = df[df["organizer"] == organizer].drop(columns="organizer")
    html = (
        GT(df_)
        .fmt_number(columns=["rmse", "rmse_std", "ame", "ame_std"], decimals=2)
        .fmt_nanoplot(
            columns="time_series",
        )  # restriction in great_tables, only supports one nanoplot column
        .render("html")
    )
    display(HTML(f"<h1>{organizer}</h1>"))
    display(HTML(html))


0,1,2,3,4,5,6,7,8
time_series_regression,daily,[],[],491.89,180.76,385.97,146.39,14.4−2.560.850.32−0.42−0.57−1.00−1.14−1.62−1.98−2.52−2.35−2.55−2.56−1.58−0.962.437.417.658.959.569.9410.311.712.813.013.413.813.914.4
time_series_regression,daily,[1],[],457.14,116.78,348.89,94.52,11.5−0.824.774.123.232.952.352.031.380.760.025−0.034−0.52−0.82−0.19−4.0E−162.026.666.617.628.008.188.289.5710.510.610.811.111.111.5
time_series_regression,daily,"[1, 2, 3, 4, 5]",[],435.81,124.38,330.72,93.64,11.2−0.0364.653.983.062.802.241.851.250.67−0.036−1.4E−14−2.0E−14−4.9E−15−6.0E−154.9E−151.996.566.477.497.818.018.119.4010.210.310.510.810.811.2
time_series_regression,daily,"[4, 5, 6, 7, 8]",[],468.44,148.94,367.39,118.00,14.9−2.65−0.69−1.12−1.90−1.94−2.30−2.28−2.62−2.62−2.62−2.62−2.65−2.65−1.60−0.982.587.587.919.229.9410.310.712.213.213.513.914.314.314.9
time_series_regression,daily,[],"[1, 7, 28, 84]",491.89,180.76,385.97,146.39,14.4−2.560.850.32−0.42−0.57−1.00−1.14−1.62−1.98−2.52−2.35−2.55−2.56−1.58−0.962.437.417.658.959.569.9410.311.712.813.013.413.813.914.4
method,aggregation,lags,ewmas,rmse,rmse_std,ame,ame_std,time_series
