In [1]:
import numpy as np
import pandas as pd

# =========================================================
#          ABSOLUTE SENSOR RANGES (LOCKED)
# =========================================================
PPM_RANGE = (0, 1000)
TURB_RANGE = (0, 1000)     # inverted sensor
PH_RANGE = (0, 14)
TEMP_RANGE = (0, 150)

# =========================================================
#         SCENARIOS GUARANTEED BY CLASS
# =========================================================

# ---------- SAFE ----------
def safe_scenario():
    return (
        np.random.uniform(50, 250),       # ppm
        np.random.uniform(700, 1000),     # turbidity (SAFE)
        np.random.uniform(6.8, 8.2),      # ph
        np.random.uniform(22, 26)         # temperature
    )

# ---------- WARNING ----------
def warning_scenario():
    # exactly ONE warning condition, rest safe
    choice = np.random.choice(["ppm", "turbidity", "ph", "temperature"])

    ppm = np.random.uniform(50, 250)
    turbidity = np.random.uniform(700, 1000)
    ph = np.random.uniform(6.8, 8.2)
    temperature = np.random.uniform(22, 26)

    if choice == "ppm":
        ppm = np.random.uniform(300, 600)

    elif choice == "turbidity":
        turbidity = np.random.uniform(400, 700)

    elif choice == "ph":
        ph = np.random.choice([
            np.random.uniform(5.5, 6.5),
            np.random.uniform(8.5, 9.5)
        ])

    elif choice == "temperature":
        temperature = np.random.choice([
            np.random.uniform(15, 20),
            np.random.uniform(28, 32)
        ])

    return ppm, turbidity, ph, temperature

# ---------- DANGEROUS ----------
def dangerous_scenario():
    # exactly ONE dangerous condition, rest safe
    choice = np.random.choice(["ppm", "turbidity", "ph", "temperature"])

    ppm = np.random.uniform(50, 250)
    turbidity = np.random.uniform(700, 1000)
    ph = np.random.uniform(6.8, 8.2)
    temperature = np.random.uniform(22, 26)

    if choice == "ppm":
        ppm = np.random.uniform(600, 1000)

    elif choice == "turbidity":
        turbidity = np.random.uniform(0, 400)

    elif choice == "ph":
        ph = np.random.choice([
            np.random.uniform(0, 5.4),
            np.random.uniform(9.6, 14)
        ])

    elif choice == "temperature":
        temperature = np.random.choice([
            np.random.uniform(0, 14),
            np.random.uniform(32, 150)
        ])

    return ppm, turbidity, ph, temperature

# =========================================================
#           DATASET GENERATION (BALANCED)
# =========================================================
def generate_dataset(
    samples_per_class=100_000,
    filename="fish_water_balanced_dataset.csv"
):
    data = []

    for _ in range(samples_per_class):
        data.append([*safe_scenario(), "SAFE"])
        data.append([*warning_scenario(), "WARNING"])
        data.append([*dangerous_scenario(), "DANGEROUS"])

    df = pd.DataFrame(
        data,
        columns=["ppm", "turbidity", "ph", "temperature", "label"]
    )

    # -----------------------------------------------------
    # Add realistic sensor noise
    # -----------------------------------------------------
    df["ppm"] += np.random.normal(0, 2, len(df))
    df["turbidity"] += np.random.normal(0, 5, len(df))
    df["ph"] += np.random.normal(0, 0.02, len(df))
    df["temperature"] += np.random.normal(0, 0.1, len(df))

    # Clip to absolute limits
    df["ppm"] = df["ppm"].clip(*PPM_RANGE)
    df["turbidity"] = df["turbidity"].clip(*TURB_RANGE)
    df["ph"] = df["ph"].clip(*PH_RANGE)
    df["temperature"] = df["temperature"].clip(*TEMP_RANGE)

    # Shuffle
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)

    # Save
    df.to_csv(filename, index=False)

    print(f"\n✅ Dataset generated: {filename}")
    print("Class distribution:")
    print(df["label"].value_counts())


if __name__ == "__main__":
    generate_dataset(samples_per_class=100_000)


KeyboardInterrupt: 

In [None]:
pip install numpy pandas

In [None]:
!pip install -q pandas scikit-learn joblib


In [None]:
import pandas as pd

df = pd.read_csv("fish_water_balanced_dataset.csv")
df.head()


In [None]:
X = df[["ppm", "turbidity", "ph", "temperature"]]   # ✅ same names as JSON
y = df["label"]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=400,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)
print("✅ Model trained successfully")


In [None]:
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


In [None]:
import joblib

joblib.dump(model, "fish_classifier.pkl")
print("✅ Saved model: fish_classifier.pkl")
