In [1]:
!pip install numpy pandas scikit-learn tensorflow joblib



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m26.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import numpy as np
import pandas as pd
import joblib
import tensorflow as tf

from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("pond_dataset.csv")

print(df.head())
print(df.columns)

       id     created_date  water_pH  TDS  water_temp
0  181740  1/26/2023 10:38      7.94  329       23.69
1  181741  1/26/2023 10:38      7.79  329       23.69
2  181747  1/26/2023 10:39      5.98  233       23.75
3  181748  1/26/2023 10:39      7.32  233       23.75
4  181754  1/26/2023 10:39      7.90  233       23.75
Index(['id', 'created_date', 'water_pH', 'TDS', 'water_temp'], dtype='str')


In [6]:
# normalize column names
df.columns = df.columns.str.lower()

# rename for simplicity
df = df.rename(columns={
    "water_ph": "ph",
    "tds": "tds",
    "water_temp": "temp"
})

In [7]:
df["created_date"] = pd.to_datetime(df["created_date"])
df = df.sort_values("created_date")

# =========================
# features
# =========================
X = df[["ph", "tds", "temp"]].values

# =========================
# create labels (safe/unsafe)
# =========================
def create_label(row):
    if 6.5 <= row[0] <= 8.5 and row[1] <= 500 and 24 <= row[2] <= 27:
        return 1
    return 0

y = np.array([create_label(r) for r in X])

print("Safe:", sum(y))
print("Unsafe:", len(y)-sum(y))

Safe: 118286
Unsafe: 387444


In [14]:
# ================================
# IMPORTS
# ================================
import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.utils import class_weight

import tensorflow as tf
from tensorflow.keras import Sequential, Input
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout


# ================================
# LOAD DATASET
# ================================
df = pd.read_csv("pond_dataset.csv")


# ================================
# SELECT FEATURES
# (based on your dataset columns)
# ================================
X = df[["water_pH", "TDS", "water_temp"]].values


# ================================
# CREATE LABELS (rule based)
# ================================
def create_label(row):
    if (
        6.5 <= row[0] <= 8.5 and
        row[1] <= 500 and
        24 <= row[2] <= 27
    ):
        return 1
    return 0

y = np.array([create_label(r) for r in X])


# ================================
# CLEANING
# ================================
imputer = SimpleImputer()
scaler = StandardScaler()

X = imputer.fit_transform(X)
X = scaler.fit_transform(X)


# ================================
# CREATE SEQUENCES
# ================================
WINDOW = 20

X_seq = []
y_seq = []

for i in range(len(X) - WINDOW):
    X_seq.append(X[i:i+WINDOW])
    y_seq.append(y[i+WINDOW])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

print("Shape:", X_seq.shape)


# ================================
# SPLIT (STRATIFIED)
# ================================
X_train, X_test, y_train, y_test = train_test_split(
    X_seq,
    y_seq,
    test_size=0.2,
    stratify=y_seq,
    shuffle=True,
    random_state=42
)


# ================================
# CLASS WEIGHTS
# ================================
weights = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)

class_weights = {0: weights[0], 1: weights[1]}
print("Class weights:", class_weights)


# ================================
# CNN + LSTM MODEL
# ================================
model = Sequential([
    Input(shape=(WINDOW, 3)),

    Conv1D(32, kernel_size=3, activation="relu"),
    MaxPooling1D(2),

    LSTM(32),

    Dropout(0.3),

    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")
])


model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()


# ================================
# TRAIN
# ================================
model.fit(
    X_train,
    y_train,
    epochs=15,
    batch_size=128,
    validation_split=0.2,
    class_weight=class_weights
)


# ================================
# EVALUATE
# ================================
loss, acc = model.evaluate(X_test, y_test)
print("Test accuracy:", acc)


# ================================
# SAVE
# ================================
model.save("pond_cnn_lstm_model.keras")

joblib.dump(scaler, "pond_scaler.pkl")
joblib.dump(imputer, "pond_imputer.pkl")

Shape: (505710, 20, 3)
Class weights: {0: np.float64(0.6526448240816919), 1: np.float64(2.137788909673124)}


Epoch 1/15
[1m2529/2529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.8163 - loss: 0.3094 - val_accuracy: 0.8177 - val_loss: 0.3372
Epoch 2/15
[1m2529/2529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8209 - loss: 0.2941 - val_accuracy: 0.8335 - val_loss: 0.3135
Epoch 3/15
[1m2529/2529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8219 - loss: 0.2923 - val_accuracy: 0.8190 - val_loss: 0.3162
Epoch 4/15
[1m2529/2529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8229 - loss: 0.2910 - val_accuracy: 0.8293 - val_loss: 0.3171
Epoch 5/15
[1m2529/2529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8238 - loss: 0.2899 - val_accuracy: 0.8097 - val_loss: 0.3364
Epoch 6/15
[1m2529/2529[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.8247 - loss: 0.2892 - val_accuracy: 0.8274 - val_loss: 0.3235
Epoch 7/15
[1m2

['pond_imputer.pkl']

In [15]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

y_pred = (model.predict(X_test) > 0.5).astype(int)

print("Confusion Matrix")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report")
print(classification_report(y_test, y_pred))

[1m3161/3161[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 435us/step
Confusion Matrix
[[59861 17625]
 [  453 23203]]

Classification Report
              precision    recall  f1-score   support

           0       0.99      0.77      0.87     77486
           1       0.57      0.98      0.72     23656

    accuracy                           0.82    101142
   macro avg       0.78      0.88      0.79    101142
weighted avg       0.89      0.82      0.83    101142

