In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models

# Load the dataset
df = pd.read_csv("/content/water_system_dataset.csv")

# Encode the 'Anomaly' column into numeric labels
le = LabelEncoder()
df['Anomaly_Label'] = le.fit_transform(df['Anomaly'])

# Optionally, print out the mapping between classes and encoded labels
print("Label Mapping:", dict(zip(le.classes_, le.transform(le.classes_))))

# Define the feature set and target variable
features = ['Pressure', 'Flow_Rate', 'Water_Quality', 'Temperature']
X = df[features]
y = df['Anomaly_Label']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Normalize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Determine the number of classes
num_classes = len(le.classes_)
print("Number of classes:", num_classes)

# Build a simple neural network for multi-class classification
model = models.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(16, activation='relu'),
    layers.Dense(8, activation='relu'),
    layers.Dense(num_classes, activation='softmax')  # softmax for multi-class classification
])

# Compile the model using sparse_categorical_crossentropy loss
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Display the model architecture
model.summary()

# Train the model
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=20,          # Adjust the number of epochs as needed
    batch_size=16,      # Experiment with different batch sizes
    validation_split=0.1
)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)


Label Mapping: {'Normal': np.int64(0), 'Pipe Leak': np.int64(1), 'Temperature Anomaly': np.int64(2), 'Water Quality Issue': np.int64(3)}
Number of classes: 4


Epoch 1/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.6885 - loss: 1.2805 - val_accuracy: 0.8625 - val_loss: 1.0709
Epoch 2/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8698 - loss: 0.9815 - val_accuracy: 0.8625 - val_loss: 0.7245
Epoch 3/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8508 - loss: 0.6433 - val_accuracy: 0.8500 - val_loss: 0.4741
Epoch 4/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8589 - loss: 0.4183 - val_accuracy: 0.8500 - val_loss: 0.3442
Epoch 5/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8645 - loss: 0.3231 - val_accuracy: 0.8750 - val_loss: 0.2760
Epoch 6/20
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8863 - loss: 0.2418 - val_accuracy: 0.8750 - val_loss: 0.2333
Epoch 7/20
[1m45/45[0m [32m━━━━━━━━━━

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.callbacks import EarlyStopping

# -------------------------
# 1. Load and Preprocess Data
# -------------------------
# Load the dataset
df = pd.read_csv("/content/water_system_dataset.csv")

# Encode the 'Anomaly' column into numeric labels
le = LabelEncoder()
df['Anomaly_Label'] = le.fit_transform(df['Anomaly'])
print("Label Mapping:", dict(zip(le.classes_, le.transform(le.classes_))))

# Define features and target
features = ['Pressure', 'Flow_Rate', 'Water_Quality', 'Temperature']
X = df[features]
y = df['Anomaly_Label']

# Split the dataset into training and testing sets (stratified split to maintain class distribution)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Normalize the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Number of classes
num_classes = len(le.classes_)
print("Number of classes:", num_classes)

# -------------------------
# 2. Define a Function to Build the Model
# -------------------------
def build_model(input_shape, num_classes, l2_reg=0.001, dropout_rate=0.5):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Dense(16, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout_rate),
        layers.Dense(8, activation='relu', kernel_regularizer=regularizers.l2(l2_reg)),
        layers.Dropout(dropout_rate),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

# -------------------------
# 3. Train the Model with Early Stopping
# -------------------------
# Create an early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Build the model
model = build_model(input_shape=(X_train_scaled.shape[1],), num_classes=num_classes)

# Train the model using a validation split
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=50,          # Start with a higher number and let early stopping decide when to stop
    batch_size=16,
    validation_split=0.1,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

# -------------------------
# 4. Optional: Cross-Validation for Further Assessment
# -------------------------
# (This step is optional but useful for getting an average estimate of generalization performance.)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_accuracies = []

for train_index, val_index in skf.split(X_train_scaled, y_train):
    X_fold_train, X_fold_val = X_train_scaled[train_index], X_train_scaled[val_index]
    y_fold_train, y_fold_val = y_train.iloc[train_index], y_train.iloc[val_index]

    fold_model = build_model(input_shape=(X_train_scaled.shape[1],), num_classes=num_classes)

    fold_model.fit(X_fold_train, y_fold_train,
                   epochs=50,
                   batch_size=16,
                   validation_data=(X_fold_val, y_fold_val),
                   callbacks=[early_stopping],
                   verbose=0)

    loss_fold, acc_fold = fold_model.evaluate(X_fold_val, y_fold_val, verbose=0)
    cv_accuracies.append(acc_fold)

print("Cross-Validation Accuracy:", np.mean(cv_accuracies))

# -------------------------
# 5. Save the Model for Download
# -------------------------
model.save("water_system_model.h5")
print("Model saved as water_system_model.h5")

# If you're in a Jupyter Notebook, you can generate a download link:
from IPython.display import FileLink
FileLink("water_system_model.h5")


Label Mapping: {'Normal': np.int64(0), 'Pipe Leak': np.int64(1), 'Temperature Anomaly': np.int64(2), 'Water Quality Issue': np.int64(3)}
Number of classes: 4
Epoch 1/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.7301 - loss: 0.9729 - val_accuracy: 0.9894 - val_loss: 0.1167
Epoch 2/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9101 - loss: 0.3642 - val_accuracy: 0.9969 - val_loss: 0.0692
Epoch 3/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9248 - loss: 0.3031 - val_accuracy: 0.9969 - val_loss: 0.0569
Epoch 4/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9334 - loss: 0.2573 - val_accuracy: 0.9987 - val_loss: 0.0480
Epoch 5/50
[1m900/900[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9350 - loss: 0.2425 - val_accuracy: 0.9994 - val_loss: 0.0444
Epoch 6/50
[1m900/900[0m [32m



Cross-Validation Accuracy: 1.0
Model saved as water_system_model.h5
