In [None]:
###################################################
# Author: Fernando Cardoso e Sandro Camargo
# Course: Biostatistics
# Federal University of Pampa - Bagé - PPGCAP
#
# Example 1 - Feed protein x Milk production
# One-way Regression ANOVA (no repetition)
###################################################

import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt

In [None]:
# --- Read data ---
dados = pd.read_csv("https://raw.githubusercontent.com/Sandrocamargo/biostatistics/refs/heads/master/datasets/rl-leite.txt", sep="\t", decimal=",", header=0)
dados.info()
dados.head()

In [None]:
# --- Fit linear model ---
modelo = smf.ols("y ~ prot", data=dados).fit()

# --- Summary (t-tests, coefficients, R²) ---
print("\nResumo do modelo:")
print(modelo.summary())

In [None]:
# --- ANOVA table ---
anova_table = sm.stats.anova_lm(modelo, typ=2)
print("\nANOVA do modelo:")
print(anova_table)

In [None]:
# --- Scatterplot with regression line ---
plt.figure(figsize=(8, 6))
plt.scatter(dados["prot"], dados["y"], color="blue", label="Observações")

# Regression line
x_vals = pd.Series(sorted(dados["prot"]))
#y_vals = modelo.params[0] + modelo.params[1] * x_vals
y_vals = modelo.params.iloc[0] + modelo.params.iloc[1] * x_vals
plt.plot(x_vals, y_vals, color="red", label="Ajuste linear")

# Axis limits similar to R version
plt.xlim(dados["prot"].min() * 0.95, dados["prot"].max() * 1.05)
plt.ylim(dados["y"].min() * 0.95, dados["y"].max() * 1.05)

# Labels
plt.xlabel("Proteína na Ração (%)")
plt.ylabel("Produção de Leite (kg)")

# Equation and R² in legend
#eq_text = f"y = {modelo.params[0]:.4f} + {modelo.params[1]:.4f}x\nR² = {modelo.rsquared_adj:.4f}"
eq_text = f"y = {modelo.params.iloc[0]:.4f} + {modelo.params.iloc[1]:.4f}x\nR² = {modelo.rsquared_adj:.4f}"
plt.legend([eq_text], loc="upper left")

# --- Save or show ---
plt.savefig("rl_leite_regressao.png", dpi=300, bbox_inches="tight")
plt.show()