In [3]:
from media_impact_monitor.impact import get_impact, ImpactSearch, time_series_regression
from media_impact_monitor.events import get_events, EventSearch
from datetime import date
from media_impact_monitor.trend import get_trend, TrendSearch
import pandas as pd
from great_tables import GT
from IPython.display import display, HTML

organizers = ["Fridays for Future", "Extinction Rebellion", "Greenpeace"]
start_date = date(2022, 1, 1)
end_date = date(2024, 1, 1)
rows = []
for method in ["time_series_regression"]:
    trend = get_trend(
        TrendSearch(
            trend_type="keywords",
            media_source="news_print",
            topic="climate_change",
            start_date=start_date,
            end_date=end_date,
        ),
        as_json=False,
    )["climate activism"].rename("count")
    for organizer in organizers:
        events = get_events(
            EventSearch(
                source="acled",
                organizers=[organizer],
                start_date=start_date,
                end_date=end_date,
            )
        )

        match method:
            case "time_series_regression":
                for lags, ewmas in [
                    ([], []),
                    ([1], []),
                    ([1, 2, 3, 4, 5], []),
                    ([4, 5, 6, 7, 8], []),
                    ([], [7]),
                    ([], [1, 7, 28]),
                ]:
                    ts, limitations = time_series_regression(
                        events=events,
                        article_counts=trend,
                        aggregation="daily",
                        lags=lags,
                    )
                    row = dict(
                        organizer=organizer,
                        method=method,
                        lags=lags,
                        ewmas=ewmas,
                    )
                    row["rmse"] = ts["rmse"].loc[7]
                    row["time_series"] = " ".join(ts["mean"].values.astype(str))
                    rows.append(row)
df = pd.DataFrame(rows)
for organizer in organizers:
    df_ = df[df["organizer"] == organizer].drop(columns="organizer")
    html = (
        GT(df_)
        .fmt_number(columns=["rmse"], decimals=2)
        .fmt_nanoplot(
            columns="time_series",
        )  # restriction in great_tables, only supports one nanoplot column
        .render("html")
    )
    display(HTML(f"<h1>{organizer}</h1>"))
    display(HTML(html))


0,1,2,3,4
time_series_regression,[],[],491.89,14.4−2.560.850.32−0.42−0.57−1.00−1.14−1.62−1.98−2.52−2.35−2.55−2.56−1.58−0.962.437.417.658.959.569.9410.311.712.813.013.413.813.914.4
time_series_regression,[1],[],457.14,11.5−0.824.774.123.232.952.352.031.380.760.025−0.034−0.52−0.82−0.19−4.0E−162.026.666.617.628.008.188.289.5710.510.610.811.111.111.5
time_series_regression,"[1, 2, 3, 4, 5]",[],435.81,11.2−0.0364.653.983.062.802.241.851.250.67−0.036−1.4E−14−2.0E−14−4.9E−15−6.0E−154.9E−151.996.566.477.497.818.018.119.4010.210.310.510.810.811.2
time_series_regression,"[4, 5, 6, 7, 8]",[],468.44,14.9−2.65−0.69−1.12−1.90−1.94−2.30−2.28−2.62−2.62−2.62−2.62−2.65−2.65−1.60−0.982.587.587.919.229.9410.310.712.213.213.513.914.314.314.9
time_series_regression,[],[7],491.89,14.4−2.560.850.32−0.42−0.57−1.00−1.14−1.62−1.98−2.52−2.35−2.55−2.56−1.58−0.962.437.417.658.959.569.9410.311.712.813.013.413.813.914.4
time_series_regression,[],"[1, 7, 28]",491.89,14.4−2.560.850.32−0.42−0.57−1.00−1.14−1.62−1.98−2.52−2.35−2.55−2.56−1.58−0.962.437.417.658.959.569.9410.311.712.813.013.413.813.914.4
method,lags,ewmas,rmse,time_series


0,1,2,3,4
time_series_regression,[],[],491.39,74.3−80.3−80.3−61.0−53.8−52.0−46.0−56.6−53.9−42.5−42.7−29.9−22.4−7.70−3.88−7.784.5415.812.314.120.716.723.533.526.727.046.556.261.274.3
time_series_regression,[1],[],454.51,58.0−60.3−60.3−42.2−34.3−30.8−25.6−37.4−35.1−23.4−25.4−14.4−6.056.487.22−1.8E−131.4810.35.544.9910.64.499.7319.111.912.130.139.244.858.0
time_series_regression,"[1, 2, 3, 4, 5]",[],429.63,55.5−30.5−30.5−11.3−3.69−1.421.06−11.2−10.8−2.97−6.54−4.3E−13−4.0E−13−3.2E−13−1.2E−13−1.3E−132.209.926.093.137.742.858.0616.57.678.0426.235.541.755.5
time_series_regression,"[4, 5, 6, 7, 8]",[],462.89,50.0−2.333.0322.026.026.928.714.010.710.710.710.710.710.77.10−2.332.588.283.170.415.68−0.844.4314.04.114.7023.332.437.050.0
time_series_regression,[],[7],491.39,74.3−80.3−80.3−61.0−53.8−52.0−46.0−56.6−53.9−42.5−42.7−29.9−22.4−7.70−3.88−7.784.5415.812.314.120.716.723.533.526.727.046.556.261.274.3
time_series_regression,[],"[1, 7, 28]",491.39,74.3−80.3−80.3−61.0−53.8−52.0−46.0−56.6−53.9−42.5−42.7−29.9−22.4−7.70−3.88−7.784.5415.812.314.120.716.723.533.526.727.046.556.261.274.3
method,lags,ewmas,rmse,time_series


0,1,2,3,4
time_series_regression,[],[],484.53,114−101−93.7−101−100−97.0−92.2−101−97.5−87.8−67.3−44.3−32.7−23.3−3.294.324.5540.451.254.359.662.063.687.796.9106107114104107
time_series_regression,[1],[],449.42,128−117−111−117−113−110−104−110−106−99.3−79.0−55.0−42.7−33.2−11.0−7.1E−144.1142.354.758.765.970.170.696.3107117119128120123
time_series_regression,"[1, 2, 3, 4, 5]",[],428.86,89.4−33.2−11.9−22.7−22.8−24.2−21.4−33.2−32.9−30.2−16.63.2E−132.4E−132.1E−131.3E−137.3E−14−0.8432.940.641.544.146.043.766.474.682.281.889.480.281.9
time_series_regression,"[4, 5, 6, 7, 8]",[],465.41,74.4−3.2644.931.726.321.317.42.32−3.26−3.26−3.26−3.26−3.26−3.2610.310.7−0.3128.835.432.535.032.331.655.063.371.667.774.461.460.3
time_series_regression,[],[7],484.53,114−101−93.7−101−100−97.0−92.2−101−97.5−87.8−67.3−44.3−32.7−23.3−3.294.324.5540.451.254.359.662.063.687.796.9106107114104107
time_series_regression,[],"[1, 7, 28]",484.53,114−101−93.7−101−100−97.0−92.2−101−97.5−87.8−67.3−44.3−32.7−23.3−3.294.324.5540.451.254.359.662.063.687.796.9106107114104107
method,lags,ewmas,rmse,time_series
