In [8]:
from media_impact_monitor.impact import (
    get_impact,
    ImpactSearch,
    correlation,
    time_series_regression,
    interrupted_time_series,
)
from media_impact_monitor.events import get_events, EventSearch
from datetime import date
from media_impact_monitor.trend import get_trend, TrendSearch
import pandas as pd
from great_tables import GT
from IPython.display import display, HTML

organizers = ["Fridays for Future", "Extinction Rebellion", "Greenpeace"]
# organizers = [organizers[0]]
methods = ["correlation", "time_series_regression", "interrupted_time_series"]
# methods = [methods[2]]
start_date = date(2022, 1, 1)
end_date = date(2024, 1, 1)
rows = []
for method in methods:
    trend = get_trend(
        TrendSearch(
            trend_type="keywords",
            media_source="news_print",
            topic="climate_change",
            start_date=start_date,
            end_date=end_date,
        ),
        as_json=False,
    )["climate activism"].rename("count")
    for organizer in organizers:
        events = get_events(
            EventSearch(
                source="acled",
                organizers=[organizer],
                start_date=start_date,
                end_date=end_date,
            )
        )
        aggregation = "daily"
        match method:
            case "correlation":
                row = dict(
                    organizer=organizer,
                    method=method,
                    aggregation=aggregation,
                )
                ts, limitations = correlation(
                    events=events,
                    article_counts=trend,
                )
                row["time_series"] = " ".join(ts["correlation"].values.astype(str))
                rows.append(row)
            case "time_series_regression":
                for lags, ewmas in [
                    ([], []),
                    ([1], []),
                    ([1, 2, 3, 4, 5], []),
                    ([4, 5, 6, 7, 8], []),
                    ([], [1, 7, 28, 84]),
                ]:
                    ts, limitations = time_series_regression(
                        events=events,
                        article_counts=trend,
                        lags=lags,
                    )
                    row = dict(
                        organizer=organizer,
                        method=method,
                        aggregation=aggregation,
                        lags=lags,
                        ewmas=ewmas,
                    )
                    row["rmse"] = ts["rmse"].loc[7]
                    row["rmse_std"] = ts["rmse_std"].loc[7]
                    row["ame"] = ts["ame"].loc[7]
                    row["ame_std"] = ts["ame_std"].loc[7]
                    row["time_series"] = " ".join(ts["mean"].values.astype(str))
                    rows.append(row)
            case "interrupted_time_series":
                for hidden_days_before_protest in [0, 1]:
                    row = dict(
                        organizer=organizer,
                        method=method,
                        aggregation=aggregation,
                        hidden_days_before_protest=hidden_days_before_protest,
                    )
                    ts, lims = interrupted_time_series(
                        events=events,
                        article_counts=trend,
                        horizon=14,
                        hidden_days_before_protest=hidden_days_before_protest,
                        aggregation=aggregation,
                    )
                    row["time_series"] = " ".join(ts["mean"].values.astype(str))
                    rows.append(row)

df = pd.DataFrame(rows)
for organizer in organizers:
    df_ = df[df["organizer"] == organizer].drop(columns="organizer")
    # make "time_series" the last column
    df_ = df_[[c for c in df_.columns if c != "time_series"] + ["time_series"]]
    html = (
        GT(df_)
        .fmt_number(columns=["rmse", "rmse_std", "ame", "ame_std"], decimals=2)
        .fmt_nanoplot(
            columns="time_series",
        )  # restriction in great_tables, only supports one nanoplot column
        .render("html")
    )
    display(HTML(f"<h1>{organizer}</h1>"))
    display(HTML(html))


  0%|          | 0/61 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

0,1,2,3,4,5,6,7,8,9
correlation,daily,,,,,,,,0.28−0.0856.0E−304.7E−308.1E−30−0.013−0.020−0.023−0.023−0.027−0.033−0.016−0.067−0.085−0.081−0.0600.140.280.160.160.140.140.140.140.150.120.110.110.110.11
time_series_regression,daily,[],[],491.89,180.76,385.97,146.39,,14.4−2.560.850.32−0.42−0.57−1.00−1.14−1.62−1.98−2.52−2.35−2.55−2.56−1.58−0.962.437.417.658.959.569.9410.311.712.813.013.413.813.914.4
time_series_regression,daily,[1],[],457.14,116.78,348.89,94.52,,11.5−0.824.774.123.232.952.352.031.380.760.025−0.034−0.52−0.82−0.19−4.0E−162.026.666.617.628.008.188.289.5710.510.610.811.111.111.5
time_series_regression,daily,"[1, 2, 3, 4, 5]",[],435.81,124.38,330.72,93.64,,11.2−0.0364.653.983.062.802.241.851.250.67−0.036−1.4E−14−2.0E−14−4.9E−15−6.0E−154.9E−151.996.566.477.497.818.018.119.4010.210.310.510.810.811.2
time_series_regression,daily,"[4, 5, 6, 7, 8]",[],468.44,148.94,367.39,118.00,,14.9−2.65−0.69−1.12−1.90−1.94−2.30−2.28−2.62−2.62−2.62−2.62−2.65−2.65−1.60−0.982.587.587.919.229.9410.310.712.213.213.513.914.314.314.9
time_series_regression,daily,[],"[1, 7, 28, 84]",491.89,180.76,385.97,146.39,,14.4−2.560.850.32−0.42−0.57−1.00−1.14−1.62−1.98−2.52−2.35−2.55−2.56−1.58−0.962.437.417.658.959.569.9410.311.712.813.013.413.813.914.4
interrupted_time_series,daily,,,,,,,0.0,43186.186.1277276318329339337387419418421429426431
interrupted_time_series,daily,,,,,,,1.0,54324.724.7121318322369386401406462500505514527530543
method,aggregation,lags,ewmas,rmse,rmse_std,ame,ame_std,hidden_days_before_protest,time_series


0,1,2,3,4,5,6,7,8,9
correlation,daily,,,,,,,,0.039−0.045−0.034−0.023−0.031−0.033−0.038−0.045−0.039−0.031−0.028−0.040−0.040−0.043−0.040−0.0340.0397.0E−30−1.8E−30−0.0111.8E−309.8E−300.0170.0288.8E−307.5E−300.0110.0180.0250.032
time_series_regression,daily,[],[],491.39,177.79,382.43,141.96,,74.3−80.3−80.3−61.0−53.8−52.0−46.0−56.6−53.9−42.5−42.7−29.9−22.4−7.70−3.88−7.784.5415.812.314.120.716.723.533.526.727.046.556.261.274.3
time_series_regression,daily,[1],[],454.51,117.56,343.99,93.79,,58.0−60.3−60.3−42.2−34.3−30.8−25.6−37.4−35.1−23.4−25.4−14.4−6.056.487.22−1.8E−131.4810.35.544.9910.64.499.7319.111.912.130.139.244.858.0
time_series_regression,daily,"[1, 2, 3, 4, 5]",[],429.63,112.62,331.16,91.32,,55.5−30.5−30.5−11.3−3.69−1.421.06−11.2−10.8−2.97−6.54−4.3E−13−4.0E−13−3.2E−13−1.2E−13−1.3E−132.209.926.093.137.742.858.0616.57.678.0426.235.541.755.5
time_series_regression,daily,"[4, 5, 6, 7, 8]",[],462.89,148.80,367.49,119.84,,50.0−2.333.0322.026.026.928.714.010.710.710.710.710.710.77.10−2.332.588.283.170.415.68−0.844.4314.04.114.7023.332.437.050.0
time_series_regression,daily,[],"[1, 7, 28, 84]",491.39,177.79,382.43,141.96,,74.3−80.3−80.3−61.0−53.8−52.0−46.0−56.6−53.9−42.5−42.7−29.9−22.4−7.70−3.88−7.784.5415.812.314.120.716.723.533.526.727.046.556.261.274.3
interrupted_time_series,daily,,,,,,,0.0,8.03−34.2−0.978.03−6.77−14.0−7.22−16.5−18.1−13.3−23.8−34.2−23.1−13.3−13.4−8.45
interrupted_time_series,daily,,,,,,,1.0,11.8−24.42.092.0511.8−2.32−8.78−1.32−9.91−10.6−5.01−14.8−24.4−12.5−1.97−1.244.68
method,aggregation,lags,ewmas,rmse,rmse_std,ame,ame_std,hidden_days_before_protest,time_series


0,1,2,3,4,5,6,7,8,9
correlation,daily,,,,,,,,0.091−0.086−0.050−0.053−0.059−0.065−0.070−0.083−0.086−0.081−0.061−0.050−0.052−0.054−0.029−0.0100.0410.0910.0770.0620.0580.0570.0590.0790.0740.0700.0630.0620.0550.058
time_series_regression,daily,[],[],484.53,167.95,373.88,140.81,,114−101−93.7−101−100−97.0−92.2−101−97.5−87.8−67.3−44.3−32.7−23.3−3.294.324.5540.451.254.359.662.063.687.796.9106107114104107
time_series_regression,daily,[1],[],449.42,119.18,336.21,100.58,,128−117−111−117−113−110−104−110−106−99.3−79.0−55.0−42.7−33.2−11.0−7.1E−144.1142.354.758.765.970.170.696.3107117119128120123
time_series_regression,daily,"[1, 2, 3, 4, 5]",[],428.86,109.36,324.26,88.74,,89.4−33.2−11.9−22.7−22.8−24.2−21.4−33.2−32.9−30.2−16.63.2E−132.4E−132.1E−131.3E−137.3E−14−0.8432.940.641.544.146.043.766.474.682.281.889.480.281.9
time_series_regression,daily,"[4, 5, 6, 7, 8]",[],465.41,158.02,365.84,131.10,,74.4−3.2644.931.726.321.317.42.32−3.26−3.26−3.26−3.26−3.26−3.2610.310.7−0.3128.835.432.535.032.331.655.063.371.667.774.461.460.3
time_series_regression,daily,[],"[1, 7, 28, 84]",484.53,167.95,373.88,140.81,,114−101−93.7−101−100−97.0−92.2−101−97.5−87.8−67.3−44.3−32.7−23.3−3.294.324.5540.451.254.359.662.063.687.796.9106107114104107
interrupted_time_series,daily,,,,,,,0.0,43.7−2.92−2.9231.635.331.031.525.421.040.841.243.736.338.017.014.8
interrupted_time_series,daily,,,,,,,1.0,4.81−59.1−16.2−25.64.744.81−3.01−6.02−15.6−24.8−9.12−12.5−13.8−25.0−27.1−51.8−59.1
method,aggregation,lags,ewmas,rmse,rmse_std,ame,ame_std,hidden_days_before_protest,time_series
