In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load dataset
df = pd.read_csv("northeast_water_disease_500plus.csv")

# Features and target
features = ["pH", "Turbidity", "Temperature", "Conductivity", "Symptom_Count", "Disease_Cases"]
X = df[features].values
y = df["Risk"].values

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model (silent training, minimal epochs for clarity)
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=40, batch_size=32, validation_split=0.1, verbose=0)  # verbose=0 for no epoch output

# Evaluation (print only required outputs)
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
y_pred = (model.predict(X_test, verbose=0) > 0.5).astype('int32')
print(f"Test Accuracy: {accuracy*100:.2f}%")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, model.predict(X_test, verbose=0)))

# Example prediction (west sikkim)
sample = np.array([[7.5, 9.1, 28.5, 520, 13, 8]])  # Replace with any valid input
sample_scaled = scaler.transform(sample)
pred = model.predict(sample_scaled, verbose=0)
print("Sample prediction (west sikkim):", "Risk" if pred[0][0] > 0.5 else "Safe")



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Test Accuracy: 88.28%
Confusion Matrix:
[[53  6]
 [ 9 60]]
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.90      0.88        59
           1       0.91      0.87      0.89        69

    accuracy                           0.88       128
   macro avg       0.88      0.88      0.88       128
weighted avg       0.88      0.88      0.88       128

ROC AUC Score: 0.9358879882092852
Sample prediction (west sikkim): Risk
