**CNN-Based**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, Flatten, MaxPooling1D, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
file_path = "C:/Users/KIIT/Desktop/AqualQ/src/data/generated_water_quality_data_with_dates.csv"  # Replace with your file path
df = pd.read_csv(file_path)

# Encode target variables
water_quality_mapping = {'Good': 2, 'Needs Treatment': 1, 'Poor': 0}
df['Water Quality'] = df['Water Quality'].map(water_quality_mapping)

# Features and target
X = df.drop(columns=['Water Quality', 'Drinking Water', 'Date'])
y = df['Water Quality']

# Train-Test Split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features for the neural network
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert targets to one-hot encoding for multi-class classification
y_train_onehot = pd.get_dummies(y_train).values
y_test_onehot = pd.get_dummies(y_test).values

# Reshape the input data for CNN
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

cnn_model = Sequential([
    # First convolutional block
    Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train_cnn.shape[1], 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=1),  # Adjusted pooling size
    Dropout(0.3),
    
    # Second convolutional block
    Conv1D(filters=64, kernel_size=2, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=1),  # Adjusted pooling size
    Dropout(0.3),
    
    # Flatten and Dense layers
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.4),
    Dense(3, activation='softmax')  # 3 classes: Poor, Needs Treatment, Good
])

# Compile the CNN model
cnn_model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

# Train the CNN model
history_cnn = cnn_model.fit(X_train_cnn, y_train_onehot, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model
test_loss_cnn, test_accuracy_cnn = cnn_model.evaluate(X_test_cnn, y_test_onehot, verbose=0)
print(f"Test Accuracy (CNN): {test_accuracy_cnn:.2f}")

# Predict Water Quality using CNN
y_pred_quality_onehot_cnn = cnn_model.predict(X_test_cnn)
y_pred_quality_cnn = np.argmax(y_pred_quality_onehot_cnn, axis=1)

# Derive Drinking Water predictions from Water Quality predictions
def derive_drinking_water(quality_pred):
    return ['Yes' if q == 2 else 'No' for q in quality_pred]

y_pred_drinking_cnn = derive_drinking_water(y_pred_quality_cnn)
actual_drinking_water = ['Yes' if q == 2 else 'No' for q in y_test]

# Classification Report for Water Quality (CNN)
print("/nWater Quality Classification Report (CNN):")
print(classification_report(y_test, y_pred_quality_cnn, target_names=['Poor', 'Needs Treatment', 'Good']))

# Confusion Matrix for Water Quality (CNN)
conf_matrix_cnn = confusion_matrix(y_test, y_pred_quality_cnn)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix_cnn, annot=True, fmt='d', cmap='Blues', xticklabels=['Poor', 'Needs Treatment', 'Good'], yticklabels=['Poor', 'Needs Treatment', 'Good'])
plt.title("Confusion Matrix for Water Quality Prediction (CNN)")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()

# Accuracy for Drinking Water Prediction (CNN)
drinking_water_accuracy_cnn = np.mean(np.array(actual_drinking_water) == np.array(y_pred_drinking_cnn))
print(f"/nDrinking Water Prediction Accuracy (CNN): {drinking_water_accuracy_cnn:.2f}")

# Plot Training vs. Validation Accuracy
plt.figure(figsize=(8, 6))
plt.plot(history_cnn.history['accuracy'], label='Training Accuracy')
plt.plot(history_cnn.history['val_accuracy'], label='Validation Accuracy')
plt.title("Training vs Validation Accuracy (CNN)")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

# Plot Training vs. Validation Loss
plt.figure(figsize=(8, 6))
plt.plot(history_cnn.history['loss'], label='Training Loss')
plt.plot(history_cnn.history['val_loss'], label='Validation Loss')
plt.title("Training vs Validation Loss (CNN)")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()

cnn_model.save('C:/Users/KIIT/Desktop/AqualQ/src/SavedModels/water_quality_model_cnn.h5')
print("Model saved successfully!")


  super().__init__(


Epoch 1/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.5588 - loss: 1.1309 - val_accuracy: 0.6917 - val_loss: 0.8178
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7017 - loss: 0.7616 - val_accuracy: 0.7000 - val_loss: 0.7466
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7229 - loss: 0.6903 - val_accuracy: 0.6958 - val_loss: 0.6978
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7540 - loss: 0.5991 - val_accuracy: 0.7521 - val_loss: 0.6646
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7348 - loss: 0.6002 - val_accuracy: 0.7771 - val_loss: 0.5853
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7387 - loss: 0.5763 - val_accuracy: 0.7937 - val_loss: 0.5209
Epoch 7/50
[1m60/60[0m [32m━━━━━━━━━━

KeyboardInterrupt: 