In [None]:
#Install elm package temporarily
!pip install hpelm



In [None]:
%tensorflow_version 1.x
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
import hpelm

TensorFlow 1.x selected.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#Load data frame using pandas
df = pd.read_csv('/content/drive/My Drive/1_gecco2019_water_quality.csv', index_col = 0)
df
#Colab Notebooks/

Unnamed: 0,Time,Tp,pH,Cond,Turb,SAC,PFM,Event
1,2017-07-01 00:00:00,6.94,8.60774,0.020954,0.125931,3.58683,43.7559,False
2,2017-07-01 00:01:00,6.93,8.60589,0.020965,0.127219,3.59025,43.4366,False
3,2017-07-01 00:02:00,6.94,8.60220,0.020968,0.126482,3.58318,43.5994,False
4,2017-07-01 00:03:00,6.94,8.60220,0.020972,0.126184,3.58769,43.3704,False
5,2017-07-01 00:04:00,6.94,8.60405,0.020974,0.127908,3.58287,43.1656,False
...,...,...,...,...,...,...,...,...
132476,2017-09-30 23:55:00,10.30,8.56593,0.020724,0.126518,4.53577,56.4686,False
132477,2017-09-30 23:56:00,10.30,8.56593,0.020727,0.126575,4.53008,56.3567,False
132478,2017-09-30 23:57:00,10.30,8.56593,0.020723,0.126512,4.53512,55.0477,False
132479,2017-09-30 23:58:00,10.30,8.56228,0.020720,0.126477,4.54084,55.4052,False


In [None]:
#Map True and False to values 0 and 1 respectively

#0 represents Anomaly
#1 represents Normalcy

df['Event'] = df['Event'].astype('category')
encode_map ={
    False : 1,
    True : 0 }

df['Event'].replace(encode_map, inplace=True)  

In [None]:
#Count of values of 0 and 1

df['Event'].value_counts()

1    132268
0       212
Name: Event, dtype: int64

In [None]:
#Wherever some of the fields are not filled with data,fill those fields with the mean value

df['pH'].fillna((df['pH'].mean()), inplace=True)
df['Tp'].fillna((df['Tp'].mean()), inplace=True)
df['Cond'].fillna((df['Cond'].mean()), inplace=True)
df['Turb'].fillna((df['Turb'].mean()), inplace=True)
df['SAC'].fillna((df['SAC'].mean()), inplace=True)
df['PFM'].fillna((df['PFM'].mean()), inplace=True)

In [None]:
#Data Type conversion

df['Time'] = pd.to_datetime(df['Time']).astype(np.int64)
df['Tp'] = df['Tp'].astype('float32')
df['pH'] = df['pH'].astype('float32')
df['Cond'] = df['Cond'].astype('float32')
df['Turb'] = df['Turb'].astype('float32')
df['SAC'] = df['SAC'].astype('float32')
df['PFM'] = df['PFM'].astype('float32')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 132480 entries, 1 to 132480
Data columns (total 8 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    132480 non-null  int64  
 1   Tp      132480 non-null  float32
 2   pH      132480 non-null  float32
 3   Cond    132480 non-null  float32
 4   Turb    132480 non-null  float32
 5   SAC     132480 non-null  float32
 6   PFM     132480 non-null  float32
 7   Event   132480 non-null  int64  
dtypes: float32(6), int64(2)
memory usage: 6.1 MB


In [None]:
#Define Input Columns(X) and Output Columns(y) 

X = df.iloc[:, 1:7]
y = df.iloc[:, -1]

In [None]:
#Normalise the values

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() 
X = scaler.fit_transform(X) 

In [None]:
#Split Train and Test Data
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
#Over Sample the Data using SMOTE

from imblearn.over_sampling import SMOTE 
sm = SMOTE(random_state=42)
x_train, y_train = sm.fit_resample(x_train, y_train)



In [None]:
#Encode output

y_train=tf.keras.utils.to_categorical(y_train)
y_test=tf.keras.utils.to_categorical(y_test)

#Reshape Train Data
x_train = x_train.reshape(x_train.shape[0],x_train.shape[1], 1)
x_test = np.asarray(x_test).reshape(x_test.shape[0],x_test.shape[1], 1)

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(211610, 6, 1)
(211610, 2)
(26496, 6, 1)
(26496, 2)


In [None]:
verbose, epochs, batch_size = 0, 100, 64
n_timesteps, n_features, n_outputs = x_train.shape[0], x_train.shape[1], y_train.shape[1]

#Define CNN Model

def generate_cnn():
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(x_train.shape[1], 1)))
  model.add(tf.keras.layers.Conv1D(filters=64, kernel_size=2, activation='relu'))
  model.add(tf.keras.layers.Dropout(0.5))
  model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(200, activation='relu'))
  model.add(tf.keras.layers.Dense(n_outputs, activation='softmax'))
  opt = tf.keras.optimizers.SGD(lr=0.01)
  model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['acc'])
  print(model.summary())
  model.fit(x_train, y_train, epochs=epochs,validation_data=(x_test,y_test), batch_size=batch_size, verbose=1)      
  return model

In [None]:
#Define Hidden Layer
#The Hidden Layer is the layer between CNN and ELM

def hidden_layer_generate(cnn_model):
    layer_name = 'flatten'
    hidden_layer_model = tf.keras.Model(inputs=cnn_model.input, outputs=cnn_model.get_layer(layer_name).output)
    hidden_result = hidden_layer_model.predict(x_train)
    return hidden_layer_model, hidden_result


In [None]:
#Define ELM model
#ELM receives input from the hidden layer and produces the final output
def elm_model_generate(data_train, target_train):

    elm_model = hpelm.elm.ELM(data_train.shape[1], 2)
    elm_model.add_neurons(1000, func='sigm')
    elm_model.train(data_train, y_train, 'c')

    return elm_model

In [None]:
#The result of CNN is fed as input to the ELM
#ELM produces the final output
#Based on the CNN-ELM's predictions and the expected outputs, the metrics are evaluated
def cnn_elm_evaluation(cnn_part, elm_part, data_test, target_test):

    cnn_result = cnn_part.predict(x_test)
    elm_result = elm_part.predict(cnn_result)
    con_mat=metrics.confusion_matrix(y_test.argmax(axis=1), elm_result.argmax(axis=1)) 
    print()
    print()
    print()    
    print("\t\t\t     CONFUSION MATRIX")
    print("\t\t\t+------------------------+")
    print("\t\t\t|\tTP  |\tFP\t |")
    print("\t\t\t+------------------------+")
    print("\t\t\t|\t",con_mat[0][0],"|\t",con_mat[0][1],"\t |")
    print("\t\t\t+------------------------+")
    print("\t\t\t|\t",con_mat[1][0],"|\t",con_mat[1][1],"\t |")
    print("\t\t\t+------------------------+")
    print("\t\t\t|\tFN  |\tTN\t |")
    print("\t\t\t+------------------------+")
    print()
    print()
    print()
    print("\t\t\t     METRICS")
    print()
    print()
    print()
    print(metrics.classification_report(y_test.argmax(axis=1), elm_result.argmax(axis=1)))
    return con_mat


In [None]:
cnn = generate_cnn()        

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 5, 64)             192       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 4, 64)             8256      
_________________________________________________________________
dropout (Dropout)            (None, 4, 64)             0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 2, 64)             0         
_________________________________________________________________
flatten (Flatten)            (None, 128)               0         
_________________________________________________________________
dense (Dense)                (None, 200)               25800     
_________________________________________

In [None]:
hidden_model, elm_input = hidden_layer_generate(cnn)

In [None]:
elm = elm_model_generate(elm_input, y_train)

In [None]:
cm=cnn_elm_evaluation(hidden_model, elm, x_test, y_test)




			     CONFUSION MATRIX
			+------------------------+
			|	TP  |	FP	 |
			+------------------------+
			|	 31 |	 2 	 |
			+------------------------+
			|	 43 |	 26420 	 |
			+------------------------+
			|	FN  |	TN	 |
			+------------------------+



			     METRICS



              precision    recall  f1-score   support

           0       0.42      0.94      0.58        33
           1       1.00      1.00      1.00     26463

    accuracy                           1.00     26496
   macro avg       0.71      0.97      0.79     26496
weighted avg       1.00      1.00      1.00     26496



CALCULATING METRICS MANUALLY 

In [None]:
#Accuracy = (TP+TN)/(TP+TN+FP+FN)

In [None]:
print("Accuracy",(cm[0][0]+cm[1][1])/cm.sum())

Accuracy 0.9983016304347826


In [None]:
#Precision = TP / (TP+FP)

In [None]:
prec=cm[0][0]/(cm[0][0]+cm[0][1])
print("Precision",cm[0][0]/(cm[0][0]+cm[0][1]))

Precision 0.9393939393939394


In [None]:
#Recall = TP / (TP+FN)

In [None]:
rec=cm[0][0]/(cm[0][0]+cm[1][0])
print("Recall",cm[0][0]/(cm[0][0]+cm[1][0]))

Recall 0.4189189189189189


In [None]:
#F1-score = 2*[(Precision*Recall)/(Precision+Recall)]

In [None]:
print("F1 score",2*(prec*rec/(prec+rec)))

F1 score 0.5794392523364487
