In [None]:
from google.colab import drive
drive.mount('/content/drive')

**IMPORTING LIBRARIES**

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
import tensorflow as tf
from imblearn.over_sampling import SMOTE

In [None]:
df = pd.read_csv('/content/drive/My Drive/WATERQUALITY.csv')

**PRE-PROCESSING**

In [None]:
df.info()

In [None]:
df = df.replace({'#NUM!':np.nan})
df=df.dropna()
df.isnull().sum()

In [None]:
df['is_safe'].value_counts()

In [None]:
df.info()

In [None]:
df['ammonia']=df['ammonia'].astype(float)
df['is_safe']=df['is_safe'].astype(int)

In [None]:
df.info()

**Creating two data sets (independent variables),(dependent variable)**

In [None]:
X = df.drop('is_safe', axis='columns')
y = df['is_safe']

**Using the SMOTE library to oversample the data by randomly creating data**

In [None]:
sm = SMOTE(random_state=27)
X,y = sm.fit_resample(X,y)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Building the ANN
**Initializing the ANN**

In [None]:
ann = tf.keras.models.Sequential()

**Adding the input layer and the first hidden layer**

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

**Adding the second hidden layer**

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

**Adding the output layer**

In [None]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

**Training the ANN**(optimizer: 'Adam' .  loss: 'binary_crossentropy'  .  metrics: 'accuracy')

In [None]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
ann.fit(X_train, y_train, batch_size = 32, epochs = 100)

In [None]:
print(ann.predict(sc.transform([[1.36, 11.33, 0.04, 2.96, 0.001, 7.23, 0.03, 1.66, 1.08, 0.71, 0.71, 0.016,1.41,1.29,0.004,9.12,1.72,0.02,0.45,0.05]])) > 0.5)

**Predicting the Test set results**

In [None]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

**Making the Confusion Matrix**

In [None]:
from sklearn.metrics import accuracy_score, recall_score,precision_score, f1_score, classification_report
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

In [None]:
cm

In [None]:
sns.heatmap(cm,annot=True, fmt='.0f')

# Classification with CNN
**IMPORTING LIBRARIES**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, MaxPooling1D
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from numpy import unique

from sklearn.model_selection import GridSearchCV

from keras.layers import  Activation, Embedding,  LeakyReLU, BatchNormalization, Dropout
from keras.activations import relu, sigmoid

In [None]:
x = df.drop('is_safe', axis='columns')
y = df['is_safe']

**Using the SMOTE library to oversample the data by randomly creating data**

In [None]:
sm = SMOTE(random_state=27)
x,y = sm.fit_resample(x,y)

In [None]:
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.30)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
train_x = sc.fit_transform(train_x)
test_x = sc.transform(test_x)

**Build the model**

In [None]:
model1 = Sequential()
model1.add(Conv1D(64,2, activation="relu", input_shape=(20,1)))
model1.add(MaxPooling1D())
model1.add(Flatten())
model1.add(Dense(1, kernel_initializer= 'glorot_normal', activation = 'sigmoid'))
model1.compile(loss = 'binary_crossentropy',
     optimizer = "adam",
              metrics = ['accuracy']) #Adds a fully connected layer with a single output node and sigmoid activation function
model1.summary()

In [None]:
model1.fit(train_x, train_y, batch_size=16,epochs=100 , verbose=0)

**Evaluates the performance of the trained CNN model on a test set of data**

In [None]:
acc = model1.evaluate(test_x, test_y)
print("Loss:", acc[0], " Accuracy:", acc[1])

In [None]:
print(model1.predict(sc.transform([[1.36, 11.33, 0.04, 2.96, 0.001, 7.23, 0.03, 1.66, 1.08, 0.71, 0.71, 0.016,1.41,1.29,0.004,9.12,1.72,0.02,0.45,0.05]])) > 0.5)

**Predicting the Test set results**

In [None]:
yp = model1.predict(test_x)
yp = (yp > 0.5)

**Making the Confusion Matrix**

In [None]:
print(classification_report(test_y, yp))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm1 = confusion_matrix(test_y, yp)
print(cm1)
accuracy_score(test_y, yp)

In [None]:
sns.heatmap(cm1,annot=True)

#**CNN MODEL 2**

In [None]:
x = df.drop('is_safe', axis='columns')
y = df['is_safe']

In [None]:
#Using the SMOTE library to oversample the data by randomly creating data
sm = SMOTE(random_state=27)
x,y = sm.fit_resample(x,y)

In [None]:
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.30)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
train_x = sc.fit_transform(train_x)
test_x = sc.transform(test_x)

In [None]:
model2 = Sequential()
model2.add(Conv1D(32,2, activation="relu", input_shape=(20,1)))
model2.add(MaxPooling1D())
model2.add(Conv1D(64,2, activation="relu", input_shape=(20,1)))
model2.add(MaxPooling1D())
model2.add(Flatten())
model2.add(Dense(1, kernel_initializer= 'glorot_uniform', activation = 'sigmoid')) # Adds a fully connected layer with a single output node and sigmoid activation function
model2.compile(loss = 'binary_crossentropy',
     optimizer = "adam",
              metrics = ['accuracy']) # Compiles the model with binary cross-entropy loss function, Adam optimizer, and accuracy metric
model2.summary()

In [None]:
model2.fit(train_x, train_y, batch_size=16,epochs=100 , verbose=0)

In [None]:
acc = model2.evaluate(test_x, test_y)
print("Loss:", acc[0], " Accuracy:", acc[1])

In [None]:
print(model2.predict(sc.transform([[1.36, 11.33, 0.04, 2.96, 0.001, 7.23, 0.03, 1.66, 1.08, 0.71, 0.71, 0.016,1.41,1.29,0.004,9.12,1.72,0.02,0.45,0.05]])) > 0.5)

**Predicting the Test set results**

In [None]:
yp = model2.predict(test_x)
yp = (yp > 0.5)

**Making the Confusion Matrix**

In [None]:
print(classification_report(test_y, yp))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm2 = confusion_matrix(test_y, yp)
print(cm2)
accuracy_score(test_y, yp)

In [None]:
sns.heatmap(cm2,annot=True)