In [3]:
pip install pandas numpy scikit-learn tensorflow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m26.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout

# -----------------------------
# 1. Load dataset
# -----------------------------
df = pd.read_csv("water_potability.csv")

# -----------------------------
# 2. Separate features & target
# -----------------------------
X = df.drop("Potability", axis=1)
y = df["Potability"]

# -----------------------------
# 3. Handle missing values
# -----------------------------
imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)

# -----------------------------
# 4. Normalize
# -----------------------------
scaler = StandardScaler()
X = scaler.fit_transform(X)

# -----------------------------
# 5. Convert to time-series windows
# (important for CNN-LSTM)
# -----------------------------
WINDOW = 5   # 5 consecutive readings = 1 sample

X_seq = []
y_seq = []

for i in range(len(X) - WINDOW):
    X_seq.append(X[i:i+WINDOW])
    y_seq.append(y[i+WINDOW])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

# shape -> (samples, timesteps, features)
print(X_seq.shape)

# -----------------------------
# 6. Train test split
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq, test_size=0.2, random_state=42
)

# -----------------------------
# 7. CNN-LSTM model
# -----------------------------
model = Sequential([
    Conv1D(64, 2, activation='relu', input_shape=(WINDOW, X_seq.shape[2])),
    MaxPooling1D(2),

    LSTM(64, return_sequences=False),

    Dropout(0.3),

    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# -----------------------------
# 8. Train
# -----------------------------
model.fit(
    X_train,
    y_train,
    epochs=40,
    batch_size=32,
    validation_split=0.2
)

# -----------------------------
# 9. Evaluate
# -----------------------------
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc)

# -----------------------------
# 10. Save model
# -----------------------------
model.save("cnn_lstm_water_model.h5")

(3271, 5, 9)
Epoch 1/40


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5953 - loss: 0.6721 - val_accuracy: 0.6775 - val_loss: 0.6419
Epoch 2/40
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6974 - loss: 0.6227 - val_accuracy: 0.7443 - val_loss: 0.5339
Epoch 3/40
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7571 - loss: 0.5111 - val_accuracy: 0.7805 - val_loss: 0.4867
Epoch 4/40
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8001 - loss: 0.4270 - val_accuracy: 0.7748 - val_loss: 0.4859
Epoch 5/40
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8224 - loss: 0.4029 - val_accuracy: 0.7920 - val_loss: 0.4487
Epoch 6/40
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8378 - loss: 0.3609 - val_accuracy: 0.7901 - val_loss: 0.4337
Epoch 7/40
[1m66/66[0m [32m━━━━━━━━━━━━━━━━━━━━



Test Accuracy: 0.7984732985496521


In [None]:
import numpy as np
import tensorflow as tf

# load trained model
model = tf.keras.models.load_model("cnn_lstm_water_model.h5")

# -------------------------------------------------
# MANUAL POTABLE SENSOR READINGS (5 timesteps × 9)
# -------------------------------------------------
sample = np.array([
    [7.2,120,320,6.5,180,280,6.0,35,1.2],
    
])

# ---------- remove NaN if any ----------
col_mean = np.nanmean(sample, axis=0)
inds = np.where(np.isnan(sample))
sample[inds] = np.take(col_mean, inds[1])

# ---------- safe normalization ----------
mean = sample.mean(axis=0)
std = sample.std(axis=0)
std[std == 0] = 1

sample = (sample - mean) / std

# reshape for CNN-LSTM
sample = sample.reshape(1, 5, 9)

# predict
p = model.predict(sample)[0][0]


if p < 0.30:
    status = "UNSAFE"
elif p <= 0.70:
    status = "MODERATE"
else:
    status = "SAFE"

print("Probability:", p)
print("Status:", status)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Probability: 0.9997874
Status: SAFE
