In [99]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
from keras.utils import np_utils
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras import regularizers
# from . import create_model
import keras.losses

In [100]:
data_root = "/home/cmlare/Data/RF Data/Processed/"

In [101]:
print("Loading Data")
rainfall_classified_1 = pd.read_csv(data_root+"2018-05-08 to 2018-05-15_integrated_classified.csv")
rainfall_classified_2 = pd.read_csv(data_root+"2018-05-16 to 2018-05-23_integrated_classified.csv")
rainfall_classified_3 = pd.read_csv(data_root+"2018-05-24 to 2018-05-31_integrated_classified.csv")
rainfall_classified_4 = pd.read_csv(data_root+"2018-06-01 to 2018-06-10_integrated_classified.csv")
rainfall_classified = pd.concat([rainfall_classified_1, rainfall_classified_2, rainfall_classified_3,rainfall_classified_3,rainfall_classified_4])

Loading Data


In [105]:
#specific_data_frame = rainfall_classified.loc[rainfall_classified["precipitation(mm)"]!=0]
rainfall_classified["class"] = rainfall_classified["class"].map({"A":"DRY","B":"MILD","C":"MILD","D":"MILD","E":"WET","F":"WET","G":"WET","H":"WET"})
# specific_data_frame["Baseline_ATTN"] = specific_data_frame["Baseline"].sub(specific_data_frame["RSL_MIN"])

Index(['date_time', 'ID', 'PrecipStation', 'Frequency', 'FrequencyBand',
       'PAttAvg', 'PRAvg', 'PRmax', 'PRmin', 'PTAvg', 'PTmax', 'PTmin',
       'PathLength', 'RSL_AVG', 'RSL_MAX', 'RSL_MIN', 'SLAttn', 'TSL_AVG',
       'TSL_MAX', 'TSL_MIN', 'XEnd', 'XStart', 'YEnd', 'YStart', 'distance',
       'Baseline', 'precipitation(mm)', 'class'],
      dtype='object')

In [106]:
selected_feature_frame = rainfall_classified[["FrequencyBand","class","PathLength","RSL_MIN", "RSL_MAX",
"TSL_MAX","RSL_AVG"]]
X = selected_feature_frame.drop('class',axis = 1)
y = selected_feature_frame['class']

In [94]:
X.shape

(32408, 3)

In [95]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit only to the training data
print("Fitting data..")
# scaler.fit(X_train)
scaler.fit(X)
print("Data fitting finished.")
StandardScaler(copy=True, with_mean=True, with_std=True)
X = scaler.transform(X)

Fitting data..
Data fitting finished.


In [None]:
def create_model(dropout_rate=0.0,learning_rate=0.1,neurons=1,activation='relu',momentum=0):

#     optimizer = SGD(lr=learning_rate, decay=1e-6, momentum=momentum,nesterov=True)
    
#     model.compile(loss=loss_function, optimizer=optimizer, metrics=['accuracy'])
    model = Sequential()
    model.add(Dense(neurons, input_dim=3, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(neurons, input_dim=neurons, activation='linear'))
    model.add(Dense(12, input_dim=neurons, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # Compile model
    optimizer = SGD(lr=learning_rate, momentum=momentum)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)#, metrics=['accuracy'])
    
    return model

In [96]:
print("Starting Grid Search.........")

model = KerasClassifier(build_fn=create_model)

epochs = [10,50,100]
learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
batch_size = [10,20,30,32,50,64,100]
parallel_threads = 1
neurons = [5,10,13,15,20,25,30,50,60,80,90]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']

param_grid = dict(dropout_rate=dropout_rate, batch_size=batch_size, epochs=epochs,learning_rate=learning_rate,activation=activation,neurons=neurons,momentum=momentum)

grid = GridSearchCV(estimator=model,param_grid=param_grid, n_jobs=parallel_threads,scoring="precision_weighted")

grid_result = grid.fit(X,y)
grid_result.classes_
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Starting Grid Search.........
Epoch 1/10
Epoch 2/10
 1230/21605 [>.............................] - ETA: 4s - loss: -2.0799

KeyboardInterrupt: 

In [80]:
from sklearn.metrics import classification_report,confusion_matrix

predictions = grid_result.predict(X)
print(classification_report(y,predictions))
print(confusion_matrix(y,predictions))

             precision    recall  f1-score   support

          A       0.00      0.00      0.00         0
          B       0.09      0.01      0.02     12636
          C       0.00      0.00      0.00      6058
          D       0.00      0.00      0.00      5726
          E       0.00      0.00      0.00      3924
          F       0.00      0.00      0.00      1262
          G       0.00      0.00      0.00      1074
          H       0.00      0.00      0.00      1728

avg / total       0.04      0.01      0.01     32408

[[    0     0     0     0     0     0     0     0]
 [12467   169     0     0     0     0     0     0]
 [ 5948   110     0     0     0     0     0     0]
 [ 5422   304     0     0     0     0     0     0]
 [ 3579   345     0     0     0     0     0     0]
 [ 1022   240     0     0     0     0     0     0]
 [  889   185     0     0     0     0     0     0]
 [ 1294   434     0     0     0     0     0     0]]


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [37]:
print("Saving the best model..")
import pickle

filename = "/home/cmlare/Data/Models/"+'mlp_gridsearch_best.sav'
print("Saving model to "+ filename)
pickle.dump(model, open(filename, 'wb'))
print("Model Saved!")
print("saving configs")
filename2 = "/home/cmlare/Data/Models/"+'mlp_gridsearch_wet_dry_best_configs.sav'
print("Saving configd to "+ filename2filename2)
pickle.dump(model, open(filename2, 'wb'))
print("Configd Saved!")

array([['A'],
       ['A'],
       ['A'],
       ...,
       ['A'],
       ['A'],
       ['A']], dtype=object)