In [16]:
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler


In [17]:
df = pd.read_csv("final_cow_thermal_dataset.csv")
df = df.drop(columns=["sample_folder"], errors="ignore")

print(df.shape)
df.head()


(51948, 11)


Unnamed: 0,cow_part,ambient_temp,humidity,mean_temp,max_temp,frame_std,delta_mean,delta_max,lactation,health_state,season
0,etc,17.6,83.2,24.34,26.05,3.2,6.74,8.45,0,healthy,winter
1,etc,17.0,82.9,23.66,26.15,3.02,6.66,9.15,0,healthy,winter
2,etc,16.9,83.7,22.76,26.13,2.82,5.86,9.23,0,healthy,winter
3,etc,16.8,84.2,22.34,24.84,2.81,5.54,8.04,0,healthy,winter
4,etc,16.8,84.2,22.14,25.42,2.9,5.34,8.62,0,healthy,winter


In [18]:
train_df = df[
    (df["health_state"] == "healthy") &
    (df["lactation"] == 1)
].reset_index(drop=True)

print("Training samples:", train_df.shape)


Training samples: (42120, 11)


In [19]:
FEATURES = [
    "delta_mean",
    "delta_max",
    "frame_std",
    "humidity"
]


In [20]:
gmm_models = {}
scalers = {}


In [21]:
for part in train_df["cow_part"].unique():

    part_df = train_df[train_df["cow_part"] == part][FEATURES]

    # Standardize features
    scaler = StandardScaler()
    X = scaler.fit_transform(part_df)

    # Gaussian Mixture Model
    gmm = GaussianMixture(
        n_components=2,          # 2 is enough; 3 max
        covariance_type="full",
        random_state=42
    )

    gmm.fit(X)

    gmm_models[part] = gmm
    scalers[part] = scaler

    print(f"✔ Trained GMM for {part} | Samples: {len(part_df)}")


✔ Trained GMM for etc | Samples: 12960
✔ Trained GMM for eye | Samples: 9000
✔ Trained GMM for hoof | Samples: 5760
✔ Trained GMM for leg | Samples: 5400
✔ Trained GMM for udder | Samples: 9000


In [22]:
def compute_gmm_score(row):
    part = row["cow_part"]
    if part not in gmm_models:
        return np.nan

    x = pd.DataFrame([row[FEATURES]], columns=FEATURES)
    x_scaled = scalers[part].transform(x)


    return gmm_models[part].score_samples(x_scaled)[0]


In [23]:
df["gmm_score"] = df.apply(compute_gmm_score, axis=1)

df[["cow_part", "health_state", "gmm_score"]].head()


Unnamed: 0,cow_part,health_state,gmm_score
0,etc,healthy,-3.732525
1,etc,healthy,-4.22783
2,etc,healthy,-5.287482
3,etc,healthy,-4.024456
4,etc,healthy,-4.150791


In [24]:
thresholds = {}

for part in gmm_models.keys():

    scores = df[
        (df["cow_part"] == part) &
        (df["health_state"] == "healthy") &
        (df["lactation"] == 1)
    ]["gmm_score"]

    thresholds[part] = np.percentile(scores, 5)

    print(f"{part} threshold = {thresholds[part]:.2f}")


etc threshold = -6.91
eye threshold = -5.87
hoof threshold = -5.49
leg threshold = -6.50
udder threshold = -6.90


In [25]:
def flag_abnormal(row):
    part = row["cow_part"]
    if part not in thresholds:
        return 0
    return int(row["gmm_score"] < thresholds[part])


In [26]:
df["abnormal"] = df.apply(flag_abnormal, axis=1)

df[["cow_part", "health_state", "abnormal"]].head()


Unnamed: 0,cow_part,health_state,abnormal
0,etc,healthy,0
1,etc,healthy,0
2,etc,healthy,0
3,etc,healthy,0
4,etc,healthy,0


In [27]:
pd.crosstab(
    df["health_state"],
    df["abnormal"],
    normalize="index"
)


abnormal,0,1
health_state,Unnamed: 1_level_1,Unnamed: 2_level_1
fever,0.449667,0.550333
healthy,0.949796,0.050204
lameness,0.008065,0.991935
mastitis,0.111,0.889


In [28]:
def infer_disease_from_part(row):
    """
    Rule-based disease inference from a single body-part observation.
    Works without cow ID or time information.
    """

    # -------------------------
    # Normal case
    # -------------------------
    if row["abnormal"] == 0:
        return {
            "predicted_condition": "normal",
            "confidence": 0.0,
            "advice": "No abnormal thermal pattern detected."
        }

    # Strength of anomaly (normalized)
    score_strength = min(1.0, abs(row["gmm_score"]) / 6)

    # -------------------------
    # Mastitis (udder-specific)
    # -------------------------
    if row["cow_part"] == "udder":
        return {
            "predicted_condition": "mastitis_suspected",
            "confidence": round(score_strength, 2),
            "advice": (
                "High udder temperature detected. Possible mastitis. "
                "Inspect udder and milk; increase milking frequency and "
                "consult a veterinarian if the condition persists."
            )
        }

    # -------------------------
    # Lameness (hoof / leg)
    # -------------------------
    if row["cow_part"] in ["hoof", "leg"]:
        return {
            "predicted_condition": "lameness_suspected",
            "confidence": round(score_strength, 2),
            "advice": (
                "Abnormal hoof or leg temperature detected. Possible lameness. "
                "Inspect hooves for injury or infection, reduce movement, "
                "and consult a veterinarian if needed."
            )
        }

    # -------------------------
    # Fever / systemic infection (CONSERVATIVE RULE)
    # -------------------------
    if row["cow_part"] in ["eye", "body"] and score_strength > 0.4:
        return {
            "predicted_condition": "fever_or_infection_suspected",
            "confidence": round(score_strength, 2),
            "advice": (
                "Elevated eye or body temperature detected. Possible fever or "
                "systemic infection. Monitor the cow closely and consult "
                "a veterinarian if temperature remains high."
            )
        }

    # -------------------------
    # Fallback (abnormal but non-specific)
    # -------------------------
    return {
        "predicted_condition": "abnormal_unspecified",
        "confidence": round(score_strength, 2),
        "advice": (
            "Abnormal thermal pattern detected. Monitor the cow closely "
            "and consider further inspection."
        )
    }


In [29]:
disease_results = df.apply(
    lambda r: pd.Series(infer_disease_from_part(r)),
    axis=1
)

df = pd.concat(
    [df.drop(columns=["predicted_condition", "confidence", "advice"], errors="ignore"),
     disease_results],
    axis=1
)
df[["cow_part", "health_state", "abnormal", "predicted_condition", "confidence"]].head()


Unnamed: 0,cow_part,health_state,abnormal,predicted_condition,confidence
0,etc,healthy,0,normal,0.0
1,etc,healthy,0,normal,0.0
2,etc,healthy,0,normal,0.0
3,etc,healthy,0,normal,0.0
4,etc,healthy,0,normal,0.0


In [30]:
pd.crosstab(
    df["health_state"],
    df["predicted_condition"],
    normalize="index"
)


predicted_condition,abnormal_unspecified,fever_or_infection_suspected,lameness_suspected,mastitis_suspected,normal
health_state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
fever,0.0,0.550333,0.0,0.0,0.449667
healthy,0.015369,0.01068,0.013238,0.010917,0.949796
lameness,0.0,0.0,0.991935,0.0,0.008065
mastitis,0.0,0.0,0.0,0.889,0.111


In [31]:
def assign_severity(row):
    if row["predicted_condition"] == "normal":
        return {
            "severity": "NONE",
            "action": "No action needed."
        }

    conf = row["confidence"]

    if conf < 0.3:
        return {
            "severity": "LOW",
            "action": (
                "Mild abnormality detected. Monitor the cow and "
                "recheck during the next routine inspection."
            )
        }

    elif conf < 0.6:
        return {
            "severity": "MEDIUM",
            "action": (
                "Moderate abnormality detected. Inspect the affected body part "
                "today and monitor closely."
            )
        }

    else:
        return {
            "severity": "HIGH",
            "action": (
                "Strong abnormality detected. Immediate inspection recommended "
                "and consult a veterinarian if the condition persists."
            )
        }


In [32]:
severity_results = df.apply(
    lambda r: pd.Series(assign_severity(r)),
    axis=1
)

df = pd.concat(
    [df.drop(columns=["severity", "action"], errors="ignore"),
     severity_results],
    axis=1
)


In [33]:
df[[
    "cow_part",
    "predicted_condition",
    "confidence",
    "severity",
    "action"
]].head(10)


Unnamed: 0,cow_part,predicted_condition,confidence,severity,action
0,etc,normal,0.0,NONE,No action needed.
1,etc,normal,0.0,NONE,No action needed.
2,etc,normal,0.0,NONE,No action needed.
3,etc,normal,0.0,NONE,No action needed.
4,etc,normal,0.0,NONE,No action needed.
5,etc,normal,0.0,NONE,No action needed.
6,etc,normal,0.0,NONE,No action needed.
7,etc,normal,0.0,NONE,No action needed.
8,etc,normal,0.0,NONE,No action needed.
9,etc,normal,0.0,NONE,No action needed.


In [34]:
pd.crosstab(
    df["predicted_condition"],
    df["severity"],
    normalize="index"
)


severity,HIGH,NONE
predicted_condition,Unnamed: 1_level_1,Unnamed: 2_level_1
abnormal_unspecified,1.0,0.0
fever_or_infection_suspected,1.0,0.0
lameness_suspected,1.0,0.0
mastitis_suspected,1.0,0.0
normal,0.0,1.0


In [35]:
import os

os.makedirs("artifacts/gmm_models", exist_ok=True)
os.makedirs("artifacts/scalers", exist_ok=True)
os.makedirs("artifacts/config", exist_ok=True)


In [36]:
import joblib

for part in gmm_models.keys():
    joblib.dump(
        gmm_models[part],
        f"artifacts/gmm_models/gmm_{part}.joblib"
    )
    joblib.dump(
        scalers[part],
        f"artifacts/scalers/scaler_{part}.joblib"
    )

print("✅ Saved all GMM models and scalers")


✅ Saved all GMM models and scalers


In [37]:
import json

with open("artifacts/config/thresholds.json", "w") as f:
    json.dump(thresholds, f, indent=2)

print("✅ Saved thresholds")


✅ Saved thresholds


In [38]:
feature_config = {
    "features": FEATURES,
    "model_type": "GMM",
    "n_components": 2,
    "threshold_percentile": 5
}

with open("artifacts/config/model_config.json", "w") as f:
    json.dump(feature_config, f, indent=2)

print("✅ Saved model configuration")


✅ Saved model configuration


In [39]:
# Test reload
test_part = list(gmm_models.keys())[0]

gmm_test = joblib.load(f"artifacts/gmm_models/gmm_{test_part}.joblib")
scaler_test = joblib.load(f"artifacts/scalers/scaler_{test_part}.joblib")

print("Reload test OK for:", test_part)


Reload test OK for: etc
