In [1]:
# Inferential analyses: ANOVA and Logistic Regression
# ANOVA: compare bat_landing_to_food across habit categories
# Logistic: model risk and reward with rat_minutes, season, habit_category

import pandas as pd
from scipy import stats
import statsmodels.formula.api as smf

# load data
df = pd.read_csv("df_combined.csv")

# basic casting for categories
df["season"] = df["season"].astype("category")
df["habit_category"] = df["habit_category"].astype("category")

# ANOVA: landing-to-food time across habit groups
a = df[["bat_landing_to_food", "habit_category"]].dropna()
groups = [g["bat_landing_to_food"].values for _, g in a.groupby("habit_category")]
if len(groups) >= 3:
    F, p = stats.f_oneway(*groups)
    print("ANOVA bat_landing_to_food ~ habit_category")
    print("F =", round(F, 3), "p =", round(p, 4))
else:
    print("ANOVA skipped (need >=3 groups)")

# Logistic regression: risk ~ rat_minutes + season + habit_category
d1 = df[["risk", "rat_minutes", "season", "habit_category"]].dropna().copy()
d1["risk"] = d1["risk"].astype(int)
m1 = smf.logit("risk ~ rat_minutes + C(season) + C(habit_category)", data=d1).fit(disp=False)
print("\nLogistic (risk):")
print(m1.summary())

# Logistic regression: reward ~ rat_minutes + season + habit_category
d2 = df[["reward", "rat_minutes", "season", "habit_category"]].dropna().copy()
d2["reward"] = d2["reward"].astype(int)
m2 = smf.logit("reward ~ rat_minutes + C(season) + C(habit_category)", data=d2).fit(disp=False)
print("\nLogistic (reward):")
print(m2.summary())


ANOVA bat_landing_to_food ~ habit_category
F = 15.716 p = 0.0

Logistic (risk):
                           Logit Regression Results                           
Dep. Variable:                   risk   No. Observations:                  907
Model:                          Logit   Df Residuals:                      899
Method:                           MLE   Df Model:                            7
Date:                Wed, 03 Sep 2025   Pseudo R-squ.:                  0.7034
Time:                        06:13:42   Log-Likelihood:                -186.45
converged:                      False   LL-Null:                       -628.64
Covariance Type:            nonrobust   LLR p-value:                1.128e-186
                                                          coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------------
Intercept                                       

In [2]:
from scipy.stats import ttest_ind

# Filter data (exclude NaNs or invalid values)
df_clean = df_combined.dropna(subset=['bat_landing_to_food', 'risk'])

# Split by risk
time_risk0 = df_clean[df_clean['risk'] == 0]['bat_landing_to_food']
time_risk1 = df_clean[df_clean['risk'] == 1]['bat_landing_to_food']

# Perform one-tailed t-test
t_stat, p_val = ttest_ind(time_risk1, time_risk0, alternative='greater', equal_var=False)  # Welch's t-test for unequal variances

print(f"T-statistic: {t_stat:.4f}")
print(f"P-value: {p_val:.4f}")

# Interpretation
alpha = 0.05
if p_val < alpha:
    print("Reject H0: Bats take significantly longer to approach food when rats are present, suggesting higher vigilance due to perceived predation risk.")
else:
    print("Fail to reject H0: No significant difference in time to approach food.")

NameError: name 'df_combined' is not defined

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=74c414ef-560d-420d-b4af-809d2b670979' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>