
# Training a Neural Network for Water Quality Classification

This notebook demonstrates the process of training a neural network to classify water quality into 'healthy' or 'unhealthy' categories based on specific parameters.

## Steps Covered:
1. Loading and exploring the dataset
2. Preprocessing the data (handling missing values, encoding, and scaling)
3. Splitting the data into training, validation, and testing sets
4. Building and training a neural network
5. Evaluating the model performance


In [None]:
import tensorflow as tf
import pandas as pd
# Split the dataset into training, validation, and testing sets
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [None]:
# Load the dataset into a Pandas DataFrame
data = pd.read_csv('/content/training_data.csv')
print(data.head())

     PH     TH    CA      MG  CHLORIDE  SULPHATE  NITRATE  FLUORIDE      TDS  \
0  8.34  130.0  22.0  18.225    17.725      3.21     3.63      0.37  217.748   
1  8.46  120.0  14.0  20.655    38.995     46.22     2.46      0.24  360.745   
2  8.11  160.0  12.0  31.590    17.725     30.46     0.00      0.96  239.246   
3  7.89  200.0  22.0  35.235    17.725     13.34     0.00      1.02  253.220   
4  8.01  125.0  12.0  23.085    14.180     13.37     0.00      0.74  185.361   

   Health_Status  
0              1  
1              1  
2              0  
3              0  
4              1  


In [None]:
X = data.drop('Health_Status', axis=1)
y = data['Health_Status']


In [None]:
# Split the dataset into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Split the dataset into training, validation, and testing sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)


In [None]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

(11466, 9)
(3822, 9)
(3822, 9)
(11466,)
(3822,)
(3822,)


In [None]:
# Define the architecture of the neural network
model = Sequential([
    Dense(64, input_shape=(9,)),
    Activation('relu'),
    Dense(32),
    Activation('relu'),
    Dense(16),
    Activation('relu'),
    Dense(8),
    Activation('relu'),
    Dense(1),
    Activation('sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.save('model.h5')



In [None]:
# Compile the model with a loss function, optimizer, and metrics
model.compile(optimizer='adam',
              loss='binary_crossentropy' ,
              metrics=['accuracy'])

In [None]:
# Train the model with the training data and validate on the validation set
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]
)


Epoch 1/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.7220 - loss: 4.2535 - val_accuracy: 0.8859 - val_loss: 0.2502
Epoch 2/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8891 - loss: 0.2526 - val_accuracy: 0.8605 - val_loss: 0.3047
Epoch 3/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8935 - loss: 0.2461 - val_accuracy: 0.9194 - val_loss: 0.1949
Epoch 4/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9208 - loss: 0.1912 - val_accuracy: 0.9058 - val_loss: 0.2138
Epoch 5/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9188 - loss: 0.1937 - val_accuracy: 0.9320 - val_loss: 0.1794
Epoch 6/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9213 - loss: 0.1863 - val_accuracy: 0.9160 - val_loss: 0.1894
Epoch 7/100
[1m359/35

In [None]:
# Evaluate the model on the test set to check generalization performance
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9346 - loss: 0.1531
Test Loss: 0.15181416273117065
Test Accuracy: 0.9379906058311462


In [None]:
weights = model.get_weights()



In [None]:
input_weights = model.layers[0].get_weights()[0]
print(input_weights.shape)

(9, 64)


In [None]:
feature_importance = np.mean(np.abs(input_weights), axis=1)
print(feature_importance.shape)

(9,)


In [None]:

normalized_importance = feature_importance / np.sum(feature_importance)
print(normalized_importance)


[0.16951798 0.07292499 0.08530092 0.0764336  0.06886995 0.06573547
 0.07829428 0.31485346 0.06806942]


In [None]:
print(np.sum(normalized_importance))

1.0
