## Deep learning network (using KERAS) – DDM & EDDM

#KERAS: 
Chollet, F., & others. (2015). Keras. GitHub. Retrieved from https://github.com/fchollet/keras

#DDM
Link to the tool: https://github.com/scikit-multiflow/scikit-multiflow/blob/a7e316d/src/skmultiflow/drift_detection/ddm.py#L6
Documentation: https://scikit-multiflow.readthedocs.io/en/stable/api/generated/skmultiflow.drift_detection.DDM.html
Paper reference: https://link.springer.com/chapter/10.1007/978-3-540-28645-5_29

#EDDM
Link to the tool: https://github.com/scikit-multiflow/scikit-multiflow/blob/a7e316d/src/skmultiflow/drift_detection/eddm.py#L6
Documentation: https://scikit-multiflow.readthedocs.io/en/stable/api/generated/skmultiflow.drift_detection.EDDM.html#skmultiflow.drift_detection.EDDM
Paper reference: https://www.researchgate.net/profile/Albert-Bifet/publication/245999704_Early_Drift_Detection_Method/links/53e582cd0cf21cc29fd06017/Early-Drift-Detection-Method.pdf

In [None]:
!pip install scikit-multiflow

In [None]:
# multi-class classification with Keras
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
 
# load dataset
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
 
# Loading a dataset

#url = '/content/drive/MyDrive/Dataset/Dataset_without0/FinalDataBM_bcp.csv'
#url = '/content/drive/MyDrive/Dataset/Dataset_without0/FinalDataGM_bcp.csv'
#url = '/content/drive/MyDrive/Dataset/Dataset_without0/FinalDataBM_gender.csv'
#url = '/content/drive/MyDrive/Dataset/Dataset_without0/FinalDataGM_gender.csv'
#url = '/content/drive/MyDrive/Dataset/Dataset_without0/FinalDataBM_sm.csv'
url = '/content/drive/MyDrive/Dataset/Dataset_without0/FinalDataGM_sm.csv'
 
data_frame = pd.read_csv(url, delimiter=';')
def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~data_frame.isin([np.nan, np.inf, -np.inf]).any(1)
    return data_frame[indices_to_keep].astype(np.float64)
 
data_frame = clean_dataset(data_frame)
# Splitting the dataset
#X, Y = data_frame.drop(columns="bcp"), data_frame.bcp
#X, Y = data_frame.drop(columns="gender"), data_frame.gender
X, Y = data_frame.drop(columns="smoker"), data_frame.smoker
 
train_X, test_X, train_Y, test_Y = train_test_split(
    X, Y, test_size=0.2, random_state=42
)
 
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(train_Y)
encoded_Y = encoder.transform(train_Y)
 
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

# define baseline model
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(150, input_dim=1589, activation='relu'))
    model.add(Dense(50, activation='softmax'))
    model.add(Dense(150, input_dim=1589, activation='relu'))
    model.add(Dense(80, activation='softmax'))
    model.add(Dense(33, activation='softmax'))
    model.add(Dense(4, activation='sigmoid'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model
 
estimator = KerasClassifier(build_fn=baseline_model, epochs=3000, batch_size=5, verbose=True)
train_model = estimator.fit(train_X, train_Y)
predicted = estimator.predict(test_X)

In [None]:
#DDM
from skmultiflow.drift_detection.ddm import DDM
ddm = DDM()

# Adding stream elements to DDM and verifying if drift occurred

for i in range(len(predicted)):
    ddm.add_element(predicted[i])
    if ddm.detected_warning_zone():
        print('Warning zone has been detected in data: ' + str(predicted[i]) + ' - of index: ' + str(i))
    if ddm.detected_change():
        print('Change detected in data: ' + str(predicted[i]) + ' - at index: ' + str(i))


In [None]:
#EDDM
from skmultiflow.drift_detection.eddm import EDDM
eddm = EDDM()

# Adding stream elements to EDDM and verifying if drift occurred

for i in range(len(predicted)):
    eddm.add_element(predicted[i])
    if eddm.detected_warning_zone():
        print('Warning zone has been detected in data: ' + str(predicted[i]) + ' - of index: ' + str(i))
    if eddm.detected_change():
        print('Change detected in data: ' + str(predicted[i]) + ' - at index: ' + str(i))