In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load dataset
df = pd.read_csv('River.csv')

# Data preprocessing
df_filtered = df[['pH', 'Nitrate', 'Color', 'Turbidity', 'Odor', 'Chlorine', 'Total Dissolved Solids', 'Water Temperature', 'Target']]
df_filtered = df_filtered.dropna()

# Encode categorical features
categorical_cols = ['Color', 'Odor']
le = LabelEncoder()
for col in categorical_cols:
    df_filtered[col] = le.fit_transform(df_filtered[col])

# Scale numerical features
scaler = StandardScaler()
numerical_cols = ['pH', 'Nitrate', 'Turbidity', 'Chlorine', 'Total Dissolved Solids', 'Water Temperature']
df_filtered[numerical_cols] = scaler.fit_transform(df_filtered[numerical_cols])

# Split dataset into training and testing sets
X = df_filtered.drop('Target', axis=1)
y = df_filtered['Target']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Define the ANN model
model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Use 'softmax' if there are more than 2 classes
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])  # Use 'categorical_crossentropy' for multi-class

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

# Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype(int)  # Adjust threshold for multi-class if needed
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()



Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6634 - loss: 782.4320 - val_accuracy: 0.7705 - val_loss: 0.5422
Epoch 2/50
[1m2300/2300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 908us/step - accuracy: 0.7515 - loss: 1.2293 - val_accuracy: 0.7705 - val_loss: 0.5387
Epoch 3/50
[1m1043/2300[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m0s[0m 739us/step - accuracy: 0.7632 - loss: 0.6598

In [None]:
import joblib
joblib.dump(model, 'ann.pkl')
