In [5]:
import logging
import os
import time

import dill
import numpy as np
import pandas as pd
import plotly.io as pio
import pyro
import pyro.distributions as dist
import pyro.optim as optim
import torch
import torch.nn.functional as F
from plotly import express as px
from pyro.infer import SVI, Predictive, Trace_ELBO
from pyro.infer.autoguide import AutoMultivariateNormal, init_to_mean

from collab.foraging import toolkit as ft
from collab.utils import find_repo_root

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
root = find_repo_root()

smoke_test = "CI" in os.environ
num_svi_iters = 50 if smoke_test else 1000
num_samples = 50 if smoke_test else 1000
keys = [50] if smoke_test else [10, 20, 30, 40, 50, 60, 70, 80]
sampling_rate = 0.01 if smoke_test else 0.01

In [2]:
# this file is generated using `centrap_park_birds_predictors.ipynb`
path = os.path.join(
    root,
    f"data/foraging/central_park_birds_cleaned_2022/central_park_objects_sampling_rate_{sampling_rate}.pkl",
)
with open(path, "rb") as file:
    central_park_objects = dill.load(file)

In [3]:
def cp_prep_data_for_iference(obj):
    df = obj.how_farDF.copy()
    print("Initial dataset size:", len(df))
    df.dropna(inplace=True)
    print("After dropping NAs:", len(df))

    columns_to_normalize = [
        "distance",
        "proximity_standardized",
    ]

    for column in columns_to_normalize:
        df[column] = ft.normalize(df[column])

    return (
        torch.tensor(df["distance"].values),
        torch.tensor(df["proximity_standardized"].values),
        torch.tensor(df["how_far_squared_scaled"].values),
    )

In [4]:
def model_sigmavar_proximity(distance, proximity, how_far):
    d = pyro.sample("d", dist.Normal(0, 0.6))
    p = pyro.sample("p", dist.Normal(0, 0.6))
    b = pyro.sample("b", dist.Normal(0.5, 0.6))

    ds = pyro.sample("ds", dist.Normal(0, 0.6))
    ps = pyro.sample("ps", dist.Normal(0, 0.6))
    bs = pyro.sample("bs", dist.Normal(0.2, 0.6))

    sigmaRaw = bs + ds * distance + ps * proximity
    sigma = pyro.deterministic("sigma", F.softplus(sigmaRaw))
    mean = b + d * distance + p * proximity

    with pyro.plate("data", len(how_far)):
        pyro.sample("obs", dist.Normal(mean, sigma), obs=how_far)

In [5]:
def get_samples(
    distance,
    proximity,
    how_far,
    model=model_sigmavar_proximity,
    num_svi_iters=num_svi_iters,
    num_samples=num_samples,
):
    guide = AutoMultivariateNormal(model, init_loc_fn=init_to_mean)
    svi = SVI(
        model_sigmavar_proximity, guide, optim.Adam({"lr": 0.01}), loss=Trace_ELBO()
    )

    iterations = []
    losses = []

    logging.info(f"Starting SVI inference with {num_svi_iters} iterations.")
    start_time = time.time()
    pyro.clear_param_store()
    for i in range(num_svi_iters):
        elbo = svi.step(distance, proximity, how_far)
        iterations.append(i)
        losses.append(elbo)
        if i % 50 == 0:
            logging.info("Elbo loss: {}".format(elbo))
    end_time = time.time()
    elapsed_time = end_time - start_time
    logging.info("SVI inference completed in %.2f seconds.", elapsed_time)

    fig = px.line(x=iterations, y=losses, title="ELBO loss", template="presentation")
    labels = {"iterations": "iteration", "losses": "loss"}
    fig.update_xaxes(showgrid=False, title_text=labels["iterations"])
    fig.update_yaxes(showgrid=False, title_text=labels["losses"])
    fig.update_layout(width=700)
    fig.show()

    predictive = Predictive(model, guide=guide, num_samples=num_samples)

    proximity_svi = {
        k: v.flatten().reshape(num_samples, -1).detach().cpu().numpy()
        for k, v in predictive(distance, proximity, how_far).items()
        if k != "obs"
    }

    print("SVI-based coefficient marginals:")
    for site, values in ft.summary(proximity_svi, ["d", "p"]).items():
        print("Site: {}".format(site))
        print(values, "\n")

    return {
        "svi_samples": proximity_svi,
        "svi_guide": guide,
        "svi_predictive": predictive,
    }

In [7]:
ducks_objects = central_park_objects[0]
# for ducks starting that low might not make sense
# [19, 46, 85]
duck_outcomes = {}

for key in keys:
    obj = ducks_objects[key]
    print(f"Working on ducks with optimal={key}")
    distance, proximity, how_far = cp_prep_data_for_iference(obj)
    ft.visualise_forager_predictors(
        distance,
        proximity,
        how_far,
        vis_sampling_rate=0.05,
        titles=[f"Distance (ducks)", f"Proximity (ducks, optimal={key})"],
        x_axis_labels=["distance", "proximity"],
    )
    duck_outcomes[key] = get_samples(distance, proximity, how_far)

path = os.path.join(
    root, "data/foraging/central_park_birds_cleaned_2022/duck_outcomes.pkl"
)

if not smoke_test:
    if not os.path.exists(path):
        with open(path, "wb") as file:
            dill.dump(duck_outcomes, file)

Working on ducks with optimal=10
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:30:52,119 - Starting SVI inference with 1000 iterations.
2024-02-28 10:30:52,147 - Elbo loss: 88392.64030475242
2024-02-28 10:30:53,100 - Elbo loss: -5171.960993208447
2024-02-28 10:30:53,984 - Elbo loss: -72691.11161848641
2024-02-28 10:30:54,862 - Elbo loss: -92226.80065953688
2024-02-28 10:30:55,412 - Elbo loss: -99632.43990517125
2024-02-28 10:30:55,894 - Elbo loss: -93972.42554694571
2024-02-28 10:30:56,393 - Elbo loss: -99536.53282101094
2024-02-28 10:30:56,880 - Elbo loss: -99694.3586460086
2024-02-28 10:30:57,374 - Elbo loss: -96294.13737888995
2024-02-28 10:30:57,855 - Elbo loss: -98029.28366527538
2024-02-28 10:30:58,337 - Elbo loss: -103549.6953052851
2024-02-28 10:30:58,802 - Elbo loss: -105062.36057730847
2024-02-28 10:30:59,250 - Elbo loss: -105136.24444983198
2024-02-28 10:30:59,696 - Elbo loss: -104869.49128524368
2024-02-28 10:31:00,162 - Elbo loss: -101746.67259781924
2024-02-28 10:31:00,607 - Elbo loss: -105341.95241929556
2024-02-28 10:31:01,055 - Elbo

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.352808  0.021886 -0.388245 -0.367696 -0.353739 -0.338117 -0.316197 

Site: p
       mean       std        5%      25%       50%       75%       95%
0  0.034335  0.027993 -0.011785  0.01553  0.034157  0.052187  0.081856 

Working on ducks with optimal=20
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:31:06,425 - Starting SVI inference with 1000 iterations.
2024-02-28 10:31:06,447 - Elbo loss: 101192.48307622678
2024-02-28 10:31:07,129 - Elbo loss: 7357.475642933282
2024-02-28 10:31:07,952 - Elbo loss: -33969.9777511019
2024-02-28 10:31:08,426 - Elbo loss: -83428.72717731158
2024-02-28 10:31:08,896 - Elbo loss: -98188.03349420994
2024-02-28 10:31:09,357 - Elbo loss: -98244.58351942251
2024-02-28 10:31:09,829 - Elbo loss: -104463.73303462421
2024-02-28 10:31:10,292 - Elbo loss: -105497.65522791386
2024-02-28 10:31:10,758 - Elbo loss: -102713.54587689802
2024-02-28 10:31:11,221 - Elbo loss: -107699.17613950794
2024-02-28 10:31:11,686 - Elbo loss: -101029.48476960522
2024-02-28 10:31:12,154 - Elbo loss: -107638.92920289887
2024-02-28 10:31:12,622 - Elbo loss: -105385.92915919767
2024-02-28 10:31:13,140 - Elbo loss: -106149.97377449299
2024-02-28 10:31:13,631 - Elbo loss: -104662.19901904708
2024-02-28 10:31:14,146 - Elbo loss: -107753.70032377218
2024-02-28 10:31:14,629 -

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%      75%       95%
0 -0.340392  0.020116 -0.373184 -0.353521 -0.341299 -0.32665 -0.306833 

Site: p
       mean       std        5%       25%       50%       75%       95%
0  0.106177  0.024567  0.064349  0.090762  0.106607  0.123681  0.144642 

Working on ducks with optimal=30
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:31:21,935 - Starting SVI inference with 1000 iterations.
2024-02-28 10:31:21,960 - Elbo loss: 72673.55370764148
2024-02-28 10:31:22,659 - Elbo loss: 68.91472434176438
2024-02-28 10:31:23,138 - Elbo loss: -65525.09550507268
2024-02-28 10:31:23,647 - Elbo loss: -86126.29148277435
2024-02-28 10:31:24,123 - Elbo loss: -83592.35043199292
2024-02-28 10:31:24,604 - Elbo loss: -101086.56442735324
2024-02-28 10:31:25,079 - Elbo loss: -100209.61239745034
2024-02-28 10:31:25,553 - Elbo loss: -108273.50419711975
2024-02-28 10:31:26,036 - Elbo loss: -109431.3580081852
2024-02-28 10:31:26,512 - Elbo loss: -107077.98714543303
2024-02-28 10:31:26,986 - Elbo loss: -105182.04039175017
2024-02-28 10:31:27,465 - Elbo loss: -109509.17460956433
2024-02-28 10:31:27,945 - Elbo loss: -107737.79505805337
2024-02-28 10:31:28,422 - Elbo loss: -106679.75277820206
2024-02-28 10:31:28,901 - Elbo loss: -108729.50653268964
2024-02-28 10:31:29,371 - Elbo loss: -103878.69487016085
2024-02-28 10:31:29,850 -

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.316094  0.024716 -0.356217 -0.332521 -0.316081 -0.298727 -0.276476 

Site: p
       mean      std        5%       25%       50%       75%       95%
0  0.151487  0.02545  0.113137  0.134763  0.151501  0.167875  0.191576 

Working on ducks with optimal=40
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:31:35,419 - Starting SVI inference with 1000 iterations.
2024-02-28 10:31:35,453 - Elbo loss: 77730.40224544721
2024-02-28 10:31:36,115 - Elbo loss: -5047.742949941205
2024-02-28 10:31:36,822 - Elbo loss: -40301.294752887334
2024-02-28 10:31:37,652 - Elbo loss: -81107.49760201699
2024-02-28 10:31:38,517 - Elbo loss: -78697.98136677561
2024-02-28 10:31:38,980 - Elbo loss: -100207.60292430266
2024-02-28 10:31:39,446 - Elbo loss: -93212.77019197741
2024-02-28 10:31:39,935 - Elbo loss: -104045.83901918704
2024-02-28 10:31:40,398 - Elbo loss: -100581.9983030824
2024-02-28 10:31:40,866 - Elbo loss: -107030.97984673965
2024-02-28 10:31:41,334 - Elbo loss: -109397.85219397188
2024-02-28 10:31:41,805 - Elbo loss: -106064.71725290362
2024-02-28 10:31:42,269 - Elbo loss: -105486.91345149485
2024-02-28 10:31:42,731 - Elbo loss: -110275.04270591204
2024-02-28 10:31:43,201 - Elbo loss: -106249.07670020444
2024-02-28 10:31:43,670 - Elbo loss: -109397.24627492705
2024-02-28 10:31:44,138 

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.319877  0.026184 -0.362728 -0.337361 -0.320171 -0.301335 -0.276092 

Site: p
       mean       std        5%       25%       50%       75%       95%
0  0.144689  0.023767  0.103896  0.129498  0.145683  0.159698  0.183153 

Working on ducks with optimal=50
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:31:49,497 - Starting SVI inference with 1000 iterations.
2024-02-28 10:31:49,522 - Elbo loss: 98202.35924524696
2024-02-28 10:31:50,497 - Elbo loss: -624.9860868596154
2024-02-28 10:31:51,322 - Elbo loss: -82296.29929832918
2024-02-28 10:31:52,162 - Elbo loss: -85592.57381522456
2024-02-28 10:31:52,984 - Elbo loss: -96126.80057107258
2024-02-28 10:31:53,777 - Elbo loss: -100172.49955887228
2024-02-28 10:31:54,601 - Elbo loss: -100079.01291566493
2024-02-28 10:31:55,428 - Elbo loss: -102175.39587730117
2024-02-28 10:31:56,253 - Elbo loss: -107788.4661297879
2024-02-28 10:31:56,716 - Elbo loss: -104494.73594007539
2024-02-28 10:31:57,178 - Elbo loss: -107554.74511029175
2024-02-28 10:31:57,636 - Elbo loss: -101156.66052016435
2024-02-28 10:31:58,088 - Elbo loss: -108044.29879574571
2024-02-28 10:31:58,549 - Elbo loss: -95952.98535261641
2024-02-28 10:31:59,009 - Elbo loss: -108435.37746935416
2024-02-28 10:31:59,463 - Elbo loss: -108505.9479042945
2024-02-28 10:31:59,925 - 

SVI-based coefficient marginals:
Site: d
       mean       std        5%      25%       50%       75%       95%
0 -0.349031  0.026702 -0.393108 -0.36681 -0.348551 -0.329365 -0.306144 

Site: p
       mean       std        5%      25%       50%       75%       95%
0  0.096986  0.023864  0.057136  0.08051  0.096588  0.113638  0.134247 

Working on ducks with optimal=60
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:32:06,841 - Starting SVI inference with 1000 iterations.
2024-02-28 10:32:06,866 - Elbo loss: 73337.20209614414
2024-02-28 10:32:07,854 - Elbo loss: -4442.853828360795
2024-02-28 10:32:08,655 - Elbo loss: -56901.304785373155
2024-02-28 10:32:09,455 - Elbo loss: -83860.66961512495
2024-02-28 10:32:10,259 - Elbo loss: -93654.93324268021
2024-02-28 10:32:11,064 - Elbo loss: -97472.47462355043
2024-02-28 10:32:11,865 - Elbo loss: -87099.0337849627
2024-02-28 10:32:12,667 - Elbo loss: -99076.01401425063
2024-02-28 10:32:13,469 - Elbo loss: -95654.64372877132
2024-02-28 10:32:14,271 - Elbo loss: -101954.77056510405
2024-02-28 10:32:15,072 - Elbo loss: -102967.11715872178
2024-02-28 10:32:15,874 - Elbo loss: -104835.57257899885
2024-02-28 10:32:16,677 - Elbo loss: -97281.94164859943
2024-02-28 10:32:17,479 - Elbo loss: -103104.78971090121
2024-02-28 10:32:18,282 - Elbo loss: -104918.75509103997
2024-02-28 10:32:19,142 - Elbo loss: -105412.70647369204
2024-02-28 10:32:19,976 - El

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.350039  0.026888 -0.395881 -0.367438 -0.349667 -0.331779 -0.305889 

Site: p
       mean       std        5%      25%       50%       75%      95%
0 -0.009132  0.024512 -0.047832 -0.02579 -0.009962  0.007306  0.03283 

Working on ducks with optimal=70
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:32:28,517 - Starting SVI inference with 1000 iterations.
2024-02-28 10:32:28,540 - Elbo loss: 78395.44955650457
2024-02-28 10:32:29,323 - Elbo loss: 16090.151967784048
2024-02-28 10:32:29,775 - Elbo loss: -59456.22279945987
2024-02-28 10:32:30,259 - Elbo loss: -78401.51121976055
2024-02-28 10:32:30,706 - Elbo loss: -85778.77496981795
2024-02-28 10:32:31,154 - Elbo loss: -90881.80339862086
2024-02-28 10:32:31,603 - Elbo loss: -98184.4877026621
2024-02-28 10:32:32,044 - Elbo loss: -102320.40187924416
2024-02-28 10:32:32,500 - Elbo loss: -101227.30317508159
2024-02-28 10:32:32,954 - Elbo loss: -101127.5742703616
2024-02-28 10:32:33,409 - Elbo loss: -102591.62182842355
2024-02-28 10:32:33,864 - Elbo loss: -100037.75302214542
2024-02-28 10:32:34,314 - Elbo loss: -104089.34538586812
2024-02-28 10:32:34,769 - Elbo loss: -103741.95029192083
2024-02-28 10:32:35,224 - Elbo loss: -104839.47310984632
2024-02-28 10:32:35,676 - Elbo loss: -101829.35995648707
2024-02-28 10:32:36,125 - E

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.345531  0.031376 -0.398246 -0.366899 -0.345658 -0.323991 -0.295142 

Site: p
       mean       std        5%       25%      50%       75%       95%
0 -0.082394  0.026575 -0.126991 -0.100156 -0.08257 -0.064374 -0.036732 

Working on ducks with optimal=80
Initial dataset size: 101213
After dropping NAs: 99637


2024-02-28 10:32:41,268 - Starting SVI inference with 1000 iterations.
2024-02-28 10:32:41,307 - Elbo loss: 77322.81433842925
2024-02-28 10:32:42,058 - Elbo loss: -9650.389131070606
2024-02-28 10:32:42,834 - Elbo loss: -72005.38595639745
2024-02-28 10:32:43,636 - Elbo loss: -80794.85435711726
2024-02-28 10:32:44,437 - Elbo loss: -81516.42215882933
2024-02-28 10:32:45,282 - Elbo loss: -98788.58836387884
2024-02-28 10:32:46,131 - Elbo loss: -95411.96655795016
2024-02-28 10:32:46,572 - Elbo loss: -91291.64325717992
2024-02-28 10:32:47,063 - Elbo loss: -96072.05823471281
2024-02-28 10:32:47,511 - Elbo loss: -101050.37599802564
2024-02-28 10:32:47,956 - Elbo loss: -103246.07143470077
2024-02-28 10:32:48,406 - Elbo loss: -96999.61476413943
2024-02-28 10:32:48,853 - Elbo loss: -100603.34505406191
2024-02-28 10:32:49,295 - Elbo loss: -105219.05713246005
2024-02-28 10:32:49,738 - Elbo loss: -105509.51173637483
2024-02-28 10:32:50,182 - Elbo loss: -105249.25927519104
2024-02-28 10:32:50,631 - El

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.323298  0.029287 -0.370745 -0.343321 -0.323946 -0.302705 -0.275644 

Site: p
       mean       std        5%       25%      50%       75%       95%
0 -0.117752  0.024623 -0.157684 -0.133565 -0.11717 -0.101016 -0.077432 



In [2]:
def plot_coefs(outcomes, title, ann_start_y=100, ann_break_y=50, generate_object=False):
    keys = [10, 20, 30, 40, 50, 60, 70, 80]
    # [19, 46, 85]
    samples = {}

    for key in keys:
        samples[key] = outcomes[key]["svi_samples"]["p"].flatten()

    samples_df = pd.DataFrame(samples)
    # samples_df_medians = samples_df.median(axis=0).tolist()

    fig_coefs = px.histogram(
        samples_df,
        template="presentation",
        opacity=0.4,
        labels={"variable": "preferred proximity", "value": "proximity coefficient"},
        width=700,
        title=title,
    )

    # for i, color in enumerate(['#1f77b4', '#ff7f0e', '#2ca02c']):
    #         fig_coefs.add_vline(x=samples_df_medians[i], line_dash="dash", line_color=color, name=f"Median ({samples_df_medians[i]})")

    #         fig_coefs.add_annotation(
    #         x=samples_df_medians[i],
    #         y= ann_start_y + ann_break_y * i,  # Adjust the vertical position of the label
    #         text=f"{samples_df_medians[i]:.2f}",
    #         bgcolor="white",
    #         showarrow=False,
    #         opacity=0.8,
    #         )

    fig_coefs.update_layout(
        barmode="overlay"
    )  # , yaxis=dict(showticklabels=False, title=None, showgrid=False))

    if generate_object:
        return fig_coefs
    else:
        fig_coefs.show()

In [6]:
duck_outcomes_path = os.path.join(
    root, "data/foraging/central_park_birds_cleaned_2022/duck_outcomes.pkl"
)
duck_outcomes = dill.load(open(duck_outcomes_path, "rb"))

ducks_coefs_plot = plot_coefs(
    duck_outcomes, "Ducks", ann_start_y=350, ann_break_y=50, generate_object=True
)

ducks_coefs_plot.show()

pio.write_image(
    ducks_coefs_plot,
    os.path.join(root, "docs/figures/duck_coefs_plot.png"),
    engine="kaleido",
    width=700,
    scale=5,
)

In [7]:
def calculate_R_squared_prox(distance, proximity, how_far, guide, subsample_size=1000):
    predictive = pyro.infer.Predictive(
        model_sigmavar_proximity, guide=guide, num_samples=1000
    )

    random_indices = np.random.choice(len(distance), size=subsample_size, replace=False)
    distance_sub = distance[random_indices]
    proximity_sub = proximity[random_indices]
    how_far_sub = how_far[random_indices]

    predictions = predictive(distance_sub, proximity_sub, how_far_sub)

    simulated_outcome = (
        predictions["b"] + predictions["p"] * proximity + predictions["d"] * distance
    )

    mean_sim_outcome = simulated_outcome.mean(0).detach().cpu().numpy()

    observed_mean = torch.mean(how_far)

    tss = torch.sum((how_far - observed_mean) ** 2)
    rss = torch.sum((how_far - mean_sim_outcome) ** 2)

    r_squared = 1 - (rss / tss)

    return r_squared.float().item()

In [8]:
for key in keys:
    guide = duck_outcomes[key]["svi_guide"]
    print(
        f"R^2 for ducks with optimal={key}:",
        calculate_R_squared_prox(distance, proximity, how_far, guide),
    )

# interestingly, knowing where they won't go is useful

NameError: name 'distance' is not defined

In [13]:
sps_objects = central_park_objects[1]
keys = [10, 20, 30, 40, 50, 60, 70, 80]  # [19, 46, 85]

sps_outcomes = {}

for key in keys:
    obj = sps_objects[key]
    print(f"Working on sparrows et al. with optimal={key}")
    distance, proximity, how_far = cp_prep_data_for_iference(obj)
    ft.visualise_forager_predictors(
        distance,
        proximity,
        how_far,
        vis_sampling_rate=0.05,
        titles=[
            f"Distance (sparrows et al.)",
            f"Proximity (sparrows et al., optimal={key})",
        ],
        x_axis_labels=["distance", "proximity"],
    )

    sps_outcomes[key] = get_samples(distance, proximity, how_far)

Working on sparrows et al. with optimal=10
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:35:17,797 - Starting SVI inference with 1000 iterations.
2024-02-28 10:35:17,821 - Elbo loss: 46582.5812314604
2024-02-28 10:35:18,755 - Elbo loss: 1994.2626498134734
2024-02-28 10:35:19,341 - Elbo loss: -40793.88502218755
2024-02-28 10:35:19,758 - Elbo loss: -61447.9668788502
2024-02-28 10:35:20,214 - Elbo loss: -78501.72694635855
2024-02-28 10:35:20,620 - Elbo loss: -78215.55899741781
2024-02-28 10:35:21,051 - Elbo loss: -80341.09862532592
2024-02-28 10:35:21,481 - Elbo loss: -80008.91650999506
2024-02-28 10:35:21,913 - Elbo loss: -84395.52146481432
2024-02-28 10:35:22,339 - Elbo loss: -74951.78724227345
2024-02-28 10:35:22,719 - Elbo loss: -84092.89389111532
2024-02-28 10:35:23,110 - Elbo loss: -83782.12703356871
2024-02-28 10:35:23,492 - Elbo loss: -82297.8263304229
2024-02-28 10:35:23,873 - Elbo loss: -85776.93794738276
2024-02-28 10:35:24,302 - Elbo loss: -85379.72793154125
2024-02-28 10:35:24,730 - Elbo loss: -80409.1761002486
2024-02-28 10:35:25,159 - Elbo loss: -

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.178317  0.022751 -0.215583 -0.194474 -0.178075 -0.162717 -0.139911 

Site: p
      mean       std        5%       25%       50%       75%       95%
0  0.04981  0.024907  0.008156  0.032668  0.050025  0.066911  0.090015 

Working on sparrows et al. with optimal=20
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:35:32,204 - Starting SVI inference with 1000 iterations.
2024-02-28 10:35:32,232 - Elbo loss: 54401.432348716444
2024-02-28 10:35:32,799 - Elbo loss: -1121.7086544995968
2024-02-28 10:35:33,304 - Elbo loss: -39304.07943664743
2024-02-28 10:35:33,830 - Elbo loss: -54710.3711423208
2024-02-28 10:35:34,307 - Elbo loss: -70710.92554801106
2024-02-28 10:35:34,779 - Elbo loss: -72633.61451755035
2024-02-28 10:35:35,203 - Elbo loss: -76873.57465864724
2024-02-28 10:35:35,630 - Elbo loss: -80440.82153874246
2024-02-28 10:35:36,048 - Elbo loss: -81345.86403838667
2024-02-28 10:35:36,449 - Elbo loss: -83814.75847486086
2024-02-28 10:35:36,855 - Elbo loss: -83642.89867239035
2024-02-28 10:35:37,280 - Elbo loss: -84368.39868565257
2024-02-28 10:35:37,696 - Elbo loss: -84596.3687616536
2024-02-28 10:35:38,108 - Elbo loss: -84712.82852600836
2024-02-28 10:35:38,521 - Elbo loss: -85703.63627457543
2024-02-28 10:35:38,934 - Elbo loss: -86241.84418608676
2024-02-28 10:35:39,349 - Elbo los

SVI-based coefficient marginals:
Site: d
       mean       std        5%      25%       50%       75%       95%
0 -0.171663  0.021371 -0.207043 -0.18533 -0.170726 -0.156934 -0.139067 

Site: p
       mean       std        5%      25%       50%       75%       95%
0  0.064949  0.022209  0.029314  0.04994  0.064921  0.078794  0.104593 

Working on sparrows et al. with optimal=30
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:35:45,351 - Starting SVI inference with 1000 iterations.
2024-02-28 10:35:45,378 - Elbo loss: 57039.565350632205
2024-02-28 10:35:46,187 - Elbo loss: -146.61673770464262
2024-02-28 10:35:46,969 - Elbo loss: -44958.24308699733
2024-02-28 10:35:47,718 - Elbo loss: -55243.882038611984
2024-02-28 10:35:48,553 - Elbo loss: -63344.04406076031
2024-02-28 10:35:49,367 - Elbo loss: -82743.34287656541
2024-02-28 10:35:50,195 - Elbo loss: -83946.63770812114
2024-02-28 10:35:51,052 - Elbo loss: -82362.36612602047
2024-02-28 10:35:51,892 - Elbo loss: -84476.72729677585
2024-02-28 10:35:52,732 - Elbo loss: -85075.96576620851
2024-02-28 10:35:53,578 - Elbo loss: -81512.38346819524
2024-02-28 10:35:54,420 - Elbo loss: -86430.17855928822
2024-02-28 10:35:55,262 - Elbo loss: -84578.86131050976
2024-02-28 10:35:55,962 - Elbo loss: -83763.78841567496
2024-02-28 10:35:56,380 - Elbo loss: -85246.87069703382
2024-02-28 10:35:56,829 - Elbo loss: -81894.56314228992
2024-02-28 10:35:57,272 - Elbo 

SVI-based coefficient marginals:
Site: d
       mean       std        5%      25%       50%       75%       95%
0 -0.156768  0.023373 -0.194345 -0.17214 -0.156995 -0.141288 -0.116157 

Site: p
       mean       std        5%      25%      50%       75%       95%
0  0.087347  0.022677  0.049259  0.07266  0.08716  0.102578  0.124449 

Working on sparrows et al. with optimal=40
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:36:03,725 - Starting SVI inference with 1000 iterations.
2024-02-28 10:36:03,749 - Elbo loss: 50428.9534580028
2024-02-28 10:36:04,603 - Elbo loss: -1499.9686925786502
2024-02-28 10:36:05,436 - Elbo loss: -29901.694972010984
2024-02-28 10:36:06,264 - Elbo loss: -70262.9705202919
2024-02-28 10:36:07,096 - Elbo loss: -22662.387867944642
2024-02-28 10:36:07,920 - Elbo loss: -73307.33404291565
2024-02-28 10:36:08,738 - Elbo loss: -52476.106478267655
2024-02-28 10:36:09,561 - Elbo loss: -84120.7048988914
2024-02-28 10:36:10,378 - Elbo loss: -83481.36294910006
2024-02-28 10:36:11,199 - Elbo loss: -84941.85493430332
2024-02-28 10:36:12,016 - Elbo loss: -84579.97614174514
2024-02-28 10:36:12,847 - Elbo loss: -82563.57175898422
2024-02-28 10:36:13,666 - Elbo loss: -75317.97880868049
2024-02-28 10:36:14,500 - Elbo loss: -83997.26947445744
2024-02-28 10:36:15,333 - Elbo loss: -83446.79554273839
2024-02-28 10:36:16,145 - Elbo loss: -83041.30122594089
2024-02-28 10:36:16,966 - Elbo lo

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.158545  0.028853 -0.207236 -0.176753 -0.158927 -0.139838 -0.110249 

Site: p
      mean       std        5%     25%       50%       75%       95%
0  0.05673  0.022316  0.020612  0.0411  0.055688  0.072276  0.092604 

Working on sparrows et al. with optimal=50
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:36:25,466 - Starting SVI inference with 1000 iterations.
2024-02-28 10:36:25,533 - Elbo loss: 50595.02439063825
2024-02-28 10:36:26,564 - Elbo loss: -5882.959228757947
2024-02-28 10:36:27,446 - Elbo loss: -54197.62494786334
2024-02-28 10:36:28,302 - Elbo loss: -72825.66853843445
2024-02-28 10:36:29,174 - Elbo loss: -58825.62142409523
2024-02-28 10:36:30,038 - Elbo loss: -61475.9965180549
2024-02-28 10:36:30,918 - Elbo loss: -65092.87768224945
2024-02-28 10:36:31,919 - Elbo loss: -72472.02783558343
2024-02-28 10:36:32,791 - Elbo loss: -81646.17805635135
2024-02-28 10:36:33,666 - Elbo loss: -81619.66564346326
2024-02-28 10:36:34,533 - Elbo loss: -83319.20596161592
2024-02-28 10:36:35,413 - Elbo loss: -82590.79781770296
2024-02-28 10:36:36,304 - Elbo loss: -85078.51888588311
2024-02-28 10:36:37,215 - Elbo loss: -84884.22290218901
2024-02-28 10:36:38,066 - Elbo loss: -83739.27872369529
2024-02-28 10:36:38,908 - Elbo loss: -85741.10881739538
2024-02-28 10:36:39,766 - Elbo loss

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.155807  0.029528 -0.203878 -0.176967 -0.155007 -0.135106 -0.109111 

Site: p
       mean      std       5%       25%       50%       75%       95%
0 -0.008233  0.02152 -0.04317 -0.022467 -0.008447  0.005886  0.028662 

Working on sparrows et al. with optimal=60
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:36:48,446 - Starting SVI inference with 1000 iterations.
2024-02-28 10:36:48,473 - Elbo loss: 43890.83343443862
2024-02-28 10:36:49,464 - Elbo loss: -7890.240066832247
2024-02-28 10:36:50,340 - Elbo loss: -54454.43357220033
2024-02-28 10:36:51,227 - Elbo loss: -24528.290609728974
2024-02-28 10:36:52,099 - Elbo loss: -65319.35462646869
2024-02-28 10:36:52,984 - Elbo loss: -71520.7216049858
2024-02-28 10:36:53,862 - Elbo loss: -77501.82775177927
2024-02-28 10:36:54,718 - Elbo loss: -83018.96383407761
2024-02-28 10:36:55,250 - Elbo loss: -78799.5396938397
2024-02-28 10:36:55,774 - Elbo loss: -81915.86485452486
2024-02-28 10:36:56,292 - Elbo loss: -83329.69814409286
2024-02-28 10:36:56,812 - Elbo loss: -83287.4813963343
2024-02-28 10:36:57,416 - Elbo loss: -85303.59003782485
2024-02-28 10:36:57,944 - Elbo loss: -84739.92986249915
2024-02-28 10:36:58,482 - Elbo loss: -85975.39230150712
2024-02-28 10:36:59,459 - Elbo loss: -84498.70671899935
2024-02-28 10:37:00,287 - Elbo loss:

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%      75%       95%
0 -0.143852  0.026135 -0.187058 -0.162589 -0.144062 -0.12612 -0.100686 

Site: p
       mean       std        5%       25%       50%       75%       95%
0 -0.051811  0.021569 -0.085612 -0.066398 -0.052266 -0.037716 -0.014672 

Working on sparrows et al. with optimal=70
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:37:08,198 - Starting SVI inference with 1000 iterations.
2024-02-28 10:37:08,222 - Elbo loss: 51438.90066780672
2024-02-28 10:37:09,223 - Elbo loss: -11498.93312503949
2024-02-28 10:37:09,670 - Elbo loss: -37454.277877164604
2024-02-28 10:37:10,492 - Elbo loss: -57730.24708898092
2024-02-28 10:37:11,321 - Elbo loss: -67801.23209305393
2024-02-28 10:37:12,145 - Elbo loss: -76808.42491135816
2024-02-28 10:37:12,586 - Elbo loss: -81035.64155385859
2024-02-28 10:37:13,369 - Elbo loss: -76243.8529353049
2024-02-28 10:37:14,148 - Elbo loss: -81857.00431410044
2024-02-28 10:37:14,901 - Elbo loss: -80417.58317194565
2024-02-28 10:37:15,711 - Elbo loss: -80896.57756756978
2024-02-28 10:37:16,517 - Elbo loss: -83615.82862322484
2024-02-28 10:37:17,327 - Elbo loss: -81954.75966490194
2024-02-28 10:37:18,098 - Elbo loss: -83633.8634566381
2024-02-28 10:37:18,912 - Elbo loss: -81987.25057121458
2024-02-28 10:37:19,796 - Elbo loss: -82469.2586900416
2024-02-28 10:37:20,590 - Elbo loss:

SVI-based coefficient marginals:
Site: d
       mean       std        5%       25%       50%       75%       95%
0 -0.131649  0.025781 -0.175758 -0.149477 -0.131403 -0.114922 -0.089961 

Site: p
       mean       std       5%       25%       50%       75%       95%
0 -0.085005  0.020407 -0.11895 -0.098762 -0.084167 -0.071978 -0.051602 

Working on sparrows et al. with optimal=80
Initial dataset size: 61115
After dropping NAs: 60594


2024-02-28 10:37:29,373 - Starting SVI inference with 1000 iterations.
2024-02-28 10:37:29,397 - Elbo loss: 63295.476455509226
2024-02-28 10:37:30,314 - Elbo loss: -13070.69503277872
2024-02-28 10:37:31,132 - Elbo loss: -47072.924491365615
2024-02-28 10:37:31,952 - Elbo loss: -55423.679170281335
2024-02-28 10:37:32,781 - Elbo loss: -29183.503105238586
2024-02-28 10:37:33,603 - Elbo loss: -55128.889004753444
2024-02-28 10:37:34,431 - Elbo loss: -62508.41810279181
2024-02-28 10:37:35,262 - Elbo loss: -50715.79201653902
2024-02-28 10:37:35,670 - Elbo loss: -76108.81696613197
2024-02-28 10:37:36,130 - Elbo loss: -80062.98458272323
2024-02-28 10:37:36,580 - Elbo loss: -82133.14774355276
2024-02-28 10:37:36,970 - Elbo loss: -80179.08427749129
2024-02-28 10:37:37,373 - Elbo loss: -83148.11667405601
2024-02-28 10:37:37,785 - Elbo loss: -70836.71444314536
2024-02-28 10:37:38,217 - Elbo loss: -82405.2932508017
2024-02-28 10:37:38,640 - Elbo loss: -84203.28200022085
2024-02-28 10:37:39,046 - Elbo

SVI-based coefficient marginals:
Site: d
       mean      std        5%       25%       50%       75%       95%
0 -0.139388  0.02887 -0.187542 -0.159071 -0.140264 -0.119804 -0.092184 

Site: p
      mean       std        5%       25%       50%       75%       95%
0 -0.09384  0.022398 -0.130905 -0.108524 -0.093531 -0.078533 -0.057525 



In [14]:
path = os.path.join(
    root, "data/foraging/central_park_birds_cleaned_2022/sps_outcomes.pkl"
)

if not smoke_test:
    if not os.path.exists(path):
        with open(path, "wb") as file:
            dill.dump(sps_outcomes, file)

In [9]:
sps_outcomes_path = os.path.join(
    root, "data/foraging/central_park_birds_cleaned_2022/sps_outcomes.pkl"
)
sps_outcomes = dill.load(open(sps_outcomes_path, "rb"))

sps_coefs_plot = plot_coefs(
    sps_outcomes,
    "Sparrows et al.",
    ann_start_y=200,
    ann_break_y=30,
    generate_object=True,
)

sps_coefs_plot.show()
# add title to figure

pio.write_image(
    sps_coefs_plot,
    os.path.join(root, "docs/figures/sps_coefs_plot.png"),
    engine="kaleido",
    width=700,
    scale=5,
)