Physiology Synthetic Data

In [None]:
import pandas as pd
import numpy as np

# Load and clean base data

df = pd.read_csv("data_combined.csv")

df = df.drop(columns=["timestamp"], errors="ignore")
df = df.dropna().reset_index(drop=True)

df["delta_mean"] = df["mean_temp"] - df["ambient_temp"]
df["delta_max"]  = df["max_temp"]  - df["ambient_temp"]

df["lactation"] = 0
df["health_state"] = "healthy"
df["season"] = "winter"


# Physiology-aware helpers

def apply_cow_variability(row):
    row = row.copy()
    baseline_shift = np.random.uniform(-0.4, 0.4)
    std_shift = np.random.uniform(0.9, 1.1)

    row["delta_mean"] += baseline_shift
    row["delta_max"]  += baseline_shift
    row["frame_std"] *= std_shift
    return row


def make_lactating(row):
    row = row.copy()
    row["lactation"] = 1

    if row["cow_part"] == "udder":
        row["delta_mean"] += np.random.uniform(1.2, 2.0)
        row["delta_max"]  += np.random.uniform(1.5, 3.0)
        row["frame_std"] *= np.random.uniform(1.2, 1.3)

    elif row["cow_part"] == "body":
        row["delta_mean"] += np.random.uniform(0.4, 0.8)

    elif row["cow_part"] == "eye":
        row["delta_mean"] += np.random.uniform(0.2, 0.4)

    return row


def apply_season(row, season):
    row = row.copy()
    row["season"] = season

    if season == "spring":
        row["delta_mean"] += np.random.uniform(0.4, 0.6)
        row["frame_std"] *= 1.1

    elif season == "summer":
        row["delta_mean"] += np.random.uniform(1.0, 1.5)
        row["frame_std"] *= 1.25

    return row


def inject_mastitis(row):
    row = row.copy()
    if row["cow_part"] == "udder":
        row["delta_mean"] += np.random.uniform(2.0, 4.0)
        row["delta_max"]  += np.random.uniform(3.0, 6.0)
        row["frame_std"] *= np.random.uniform(1.4, 1.6)
        row["health_state"] = "mastitis"
    return row


def inject_lameness(row):
    row = row.copy()
    if row["cow_part"] in ["hoof", "leg"]:
        row["delta_max"] += np.random.uniform(2.5, 4.0)
        row["frame_std"] *= np.random.uniform(1.3, 1.5)
        row["health_state"] = "lameness"
    return row


def inject_fever(row):
    row = row.copy()
    if row["cow_part"] in ["eye", "body"]:
        row["delta_mean"] += np.random.uniform(1.0, 2.0)
        row["health_state"] = "fever"
    return row


def generate_synthetic_data(
    base_df,
    n_cows=40,
    seasons=("winter", "spring", "summer"),
    diseases=("mastitis", "lameness", "fever")
):
    synthetic = []

    for _ in range(n_cows):
        for _, row in base_df.iterrows():

            r = apply_cow_variability(row)
            r_lac = make_lactating(r)

            for season in seasons:
                r_season = apply_season(r_lac, season)
                synthetic.append(r_season)

                for disease in diseases:
                    r_dis = r_season.copy()
                    if disease == "mastitis":
                        r_dis = inject_mastitis(r_dis)
                    elif disease == "lameness":
                        r_dis = inject_lameness(r_dis)
                    elif disease == "fever":
                        r_dis = inject_fever(r_dis)

                    synthetic.append(r_dis)

    return pd.DataFrame(synthetic)



synthetic_df = generate_synthetic_data(df)

baseline_df = df.copy()
baseline_df["lactation"] = 0
baseline_df["health_state"] = "healthy"
baseline_df["season"] = "winter"

final_df = pd.concat([baseline_df, synthetic_df], ignore_index=True)

# Reconstruct absolute temperatures FOR ALL DATA
final_df["mean_temp"] = final_df["delta_mean"] + final_df["ambient_temp"]
final_df["max_temp"]  = final_df["delta_max"]  + final_df["ambient_temp"]

final_df.to_csv(
    "final_cow_thermal_dataset.csv",
    index=False
)

print("Final dataset size:", final_df.shape)


In [None]:
synthetic_df["health_state"].value_counts()



In [None]:
synthetic_df["lactation"].value_counts()


In [None]:
synthetic_df.groupby("cow_part")["delta_mean"].mean()