BLOK 1 -> Load the required packages

In [None]:
import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, concatenate 
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

BLOK 2 -> Define a global variables

In [None]:
activation = ['relu', 'tanh'] 
dropout_rate = [0.1, 0.2]
batch_size = [16, 32, 64]
optimizer = ['SGD', 'RMSprop', 'Adam']
class_weight = {1: 6, 0: 2}

BLOK 3 -> Define a path

In [None]:
path = 'PATH-TO-SVMDAT-FOLDER'

BLOK 4 -> Data preprocessing functions

In [None]:
def loadFileNames(filePath, prefix):
    files = []
    for r, d, f in os.walk(filePath):
        for file in f:
            data = '.' + prefix
            if data in file:
                files.append(os.path.join(r, file))
    return files          

def load_file(filepath):
    dataframe = pd.read_csv(filepath, header = None, delim_whitespace = True)
    return dataframe

def frameFunc(filepath):
    dataframe = load_file(filepath)
    df_list, df_list2, df_list3, df_list4, df_list5, df_list6, df_list7, df_list8, df_list9, df_list10, df_list11, df_list12 = [], [], [], [], [], [], [], [], [], [], [], []
    for index in dataframe.index:
        odparsovat = dataframe.iloc[index]
        label = odparsovat.iloc[0]    
        pracovne = odparsovat.drop(0)    
        result, result2, result3, result4, result5 = [], [], [], [], []
        for items in pracovne.iteritems():
            result.append(items[1])        
        for i in result:
            i = str(i)
            result2.append(i[i.find(":")+1:])  
        result3 = result2[:result2.index("#")]
        result4 = result2[result2.index("#")+1:]
        result4.remove("#")
        for i in result3:
            i = float(i)
            result5.append(i)
        leto = np.array(result5)
        leto.resize(20,4)    
        df_list.append(leto)
        df_list2.append(label)
        df_list3.append(float(result4[0]))
        df_list4.append(float(result4[1]))
        df_list5.append(float(result4[2]))
        df_list6.append(result4[3])
        df_list7.append(float(result4[4]))
        df_list8.append(float(result4[5]))
        df_list9.append(result4[6])
        df_list10.append(result4[7])
        df_list11.append(result4[8])
        df_list12.append(filepath[filepath.rindex("/")+1:filepath.index(".")])
    zippedList =  list(zip(df_list, df_list2, df_list3, df_list4, df_list5, df_list6, df_list7, df_list8, df_list9, df_list10, df_list11, df_list12))
    dfObj = pd.DataFrame(zippedList, columns = ['x','y', 'longitude1', 'latitude1', 'neviem1', 'neviem2', 'longitude2', 'latitude2', 'id', 'magnitude', 'distance', 'name'])
    return dfObj

def createFrame(prefix):
    frame = pd.DataFrame(columns = ['x','y']) 
    for i in loadFileNames(path, prefix):
        frame = frame.append(frameFunc(i))
    return frame

BLOK 5 -> Call the data preprocessing function

In [None]:
parsed_data = createFrame('eq').append(createFrame('ex'))

BLOK 6 -> Divide data into train and test

In [None]:
orig = pd.DataFrame(parsed_data)[['id','name','distance','latitude1','longitude1','x','y']]
orig = orig.rename(columns={"id": "qid", "name": "station", "distance":"dist", "latitude1":"lat", "longitude1":"long", "x":"x", "y":"y"})
orig['y'].replace(-1, 0, inplace = True)
orig['dist'] = pd.to_numeric(orig['dist'],downcast = 'float')    
orig.info()

print()
print(orig.head())

uniqueIDs = orig['qid'].unique()
trainID, testID = train_test_split(uniqueIDs, test_size = 0.2, random_state = 42)
train = orig[orig['qid'].isin(trainID)]
test = orig[orig['qid'].isin(testID)]

BLOK 7 -> Function for initialize CNN model for Grid Search -> Output: model

In [None]:
def initializeModel(dropout_rate=0.1, activation='relu', optimizer='adam'):
    inputs = Input(shape=(20,4,1))
    a = Flatten()(inputs)
    a = Dense(128, activation = activation)(a)
    a = Dense(64, activation = activation)(a)
    a = Dense(32, activation = activation)(a)
    a = Dense(64, activation = activation)(a)
    a = Dense(128, activation = activation)(a)
    a = Dense(256, activation = activation)(a)
    
    b = Conv2D(64, kernel_size = (4,4), padding='same', activation = activation)(inputs)
    b = MaxPooling2D(pool_size = (2, 2), padding = 'same')(b)
    b = Flatten()(b)
    
    c = Flatten()(inputs)
    c = Dense(128, activation = activation)(c)
    c = Dense(64, activation = activation)(c)
    c = Dense(32, activation = activation)(c)
    c = Dense(64, activation = activation)(c)
    c = Dense(128, activation = activation)(c)
    c = Dense(256, activation = activation)(c)
    
    x = concatenate([a, b, c])
    x = Dropout(dropout_rate)(x)
    x = Dense(64, activation = activation)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=inputs, outputs=output)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

BLOK 8 -> MAIN FUNCTION FOR GRID SEARCH

In [None]:
X_train = np.expand_dims(np.stack(train['x']), axis = 3)
y_train = np.stack(train['y'])
X_test = np.expand_dims(np.stack(test['x']), axis = 3)
y_test = np.stack(test['y'])

# Use scikit-learn to grid search 
model = KerasClassifier(build_fn = initializeModel, epochs=40) 

param_grid = dict(batch_size=batch_size, activation=activation, dropout_rate=dropout_rate, optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)
grid_result = grid.fit(X_train, y_train, class_weight=class_weight)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means_acc = grid_result.cv_results_['mean_test_score']
stds_acc = grid_result.cv_results_['std_test_score']
params_acc = grid_result.cv_results_['params']

for  means_acc, stds_acc, params_acc in zip(means_acc, stds_acc, params_acc):
    print("%f (%f) with: %r" % (means_acc, stds_acc, params_acc))