In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import shap

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [None]:
data = pd.read_csv('/content/training_data.csv')
print(data.head())

     PH     TH    CA      MG  CHLORIDE  SULPHATE  NITRATE  FLUORIDE      TDS  \
0  8.34  130.0  22.0  18.225    17.725      3.21     3.63      0.37  217.748   
1  8.46  120.0  14.0  20.655    38.995     46.22     2.46      0.24  360.745   
2  8.11  160.0  12.0  31.590    17.725     30.46     0.00      0.96  239.246   
3  7.89  200.0  22.0  35.235    17.725     13.34     0.00      1.02  253.220   
4  8.01  125.0  12.0  23.085    14.180     13.37     0.00      0.74  185.361   

   Health_Status  
0              1  
1              1  
2              0  
3              0  
4              1  


In [None]:
X = data.drop('Health_Status', axis=1)
y = data['Health_Status']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)


In [None]:
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)

(11466, 9)
(3822, 9)
(3822, 9)
(11466,)
(3822,)
(3822,)


In [None]:
model = Sequential([
    Dense(64, input_shape=(9,), activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
model.compile(optimizer='adam',
              loss='binary_crossentropy' ,
              metrics=['accuracy'])

In [None]:
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)]
)


Epoch 1/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.7806 - loss: 0.6837 - val_accuracy: 0.8773 - val_loss: 0.2972
Epoch 2/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8883 - loss: 0.2497 - val_accuracy: 0.9021 - val_loss: 0.2410
Epoch 3/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9073 - loss: 0.2137 - val_accuracy: 0.9137 - val_loss: 0.2193
Epoch 4/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9211 - loss: 0.1916 - val_accuracy: 0.9349 - val_loss: 0.1789
Epoch 5/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9155 - loss: 0.2059 - val_accuracy: 0.9364 - val_loss: 0.1836
Epoch 6/100
[1m359/359[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9293 - loss: 0.1718 - val_accuracy: 0.9032 - val_loss: 0.2178
Epoch 7/100
[1m359/35

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

[1m120/120[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9450 - loss: 0.1341
Test Loss: 0.13524885475635529
Test Accuracy: 0.9463631510734558


In [None]:
background_size = 100
random_indices = np.random.choice(X_train.shape[0], background_size, replace=False)
background = X_train.iloc[random_indices].values


In [None]:
# Initialize the DeepExplainer
explainer = shap.DeepExplainer(model, background)




In [None]:
X_test_np = X_test.values

In [None]:
# Compute SHAP values for X_test
shap_values = explainer.shap_values(X_test_np)


In [None]:
print("SHAP values shape:", shap_values.shape)  # Expected: (num_samples, num_features)


SHAP values shape: (3822, 9, 1)


In [None]:
print(shap_values[0])

[[ 0.00254369]
 [-0.34711566]
 [-0.01161325]
 [ 0.00525697]
 [ 0.01854808]
 [-0.00386402]
 [ 0.00235991]
 [-0.00185822]
 [ 0.091455  ]]


In [None]:
avg_shap = np.mean(np.abs(shap_values), axis=0)

In [None]:
print(avg_shap)

[[0.01520171]
 [0.27919419]
 [0.01474817]
 [0.02690506]
 [0.01434205]
 [0.00865683]
 [0.0337026 ]
 [0.02548623]
 [0.06027751]]


In [None]:
normalized_relevance= avg_shap / np.sum(avg_shap)
print(normalized_relevance)

[[0.03176855]
 [0.58346045]
 [0.03082074]
 [0.05622624]
 [0.02997204]
 [0.01809105]
 [0.07043174]
 [0.05326117]
 [0.12596802]]


In [None]:
lrp_val = [-0.15988433,  0.3479544,   0.04085304,  0.01621986, -0.04715924, -0.03599263,
 -0.04263536,  0.02462474,  0.06745085]

In [None]:
lrp_val_abs = np.abs(lrp_val)
normalized_lrp = lrp_val_abs / np.sum(lrp_val_abs)
print(normalized_lrp)

[0.20425338 0.44451425 0.05219005 0.02072099 0.06024627 0.04598084
 0.05446698 0.03145828 0.08616895]


In [None]:
PH     TH    CA      MG  CHLORIDE  SULPHATE  NITRATE  FLUORIDE      TDS