### Project-Breast-Cancer (Deep Learning)
https://www.kaggle.com/uciml/breast-cancer-wisconsin-data  
https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29

In [1]:
# Import required libraries
%matplotlib inline
import pandas as pd
import numpy as np

# Plotting
import matplotlib.pyplot as plt  
from plotnine import *

# Deep learning
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import TensorBoard
from tensorflow.contrib.tensorboard.plugins import projector

  from pandas.core import datetools
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Import training data (scaled)
X_train = pd.read_csv('../output/data_train_scaled.csv', index_col = 0).drop(['id', 'diagnosis'], axis = 1)

# Labels for training data
y_train = pd.read_csv('../data/training_data.csv', index_col = 0)['diagnosis'].values

# Integer encode
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

### Simple Sequential model
hidden layer (12) + relu activation  
hidden layer (8) + sigmoid activation

In [None]:
# Basic sequential model
model = Sequential()

# Add a dense layer with 12 nodes
## Relu activation
## input_dim = number of input features
model.add(Dense(12, input_dim=30, activation='relu'))

# Add 2nd dense layer with relu activation
model.add(Dense(8, activation='relu'))

# Add 3rd dense layer with sigmoid (want numbers between 0-1)
model.add(Dense(1, activation='sigmoid'))

# Compile the model
## Use cross-entropy as the loss function (logarithmic loss)
## ADAM is the method for running Gradient descent
## Want to use accuracy as a metric (for classification problem)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Set tensorboard
tbCallBack = keras.callbacks.TensorBoard(log_dir='./keras_logs', write_graph=True)

In [None]:
# Fit the model
## run the model 150 times (hyperparameter)
## take 10 samples at a time for GD (hyperparameter)
model.fit(X_train, y_train, validation_split=0.25, epochs=10, batch_size=32, callbacks=[tbCallBack])

In [None]:
# View tensorboard
# tensorboard --logdir=logs/ --port=8008 --host localhost

### Model 2: Cross-validation


In [None]:
# Function to create model, required for KerasClassifier
def create_model():
    # create simple sequential model
    model = Sequential()
    model.add(Dense(12, input_dim=30, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
# create model
model = KerasClassifier(build_fn=create_model, validation_split=0.25, epochs=10, batch_size=10, verbose=1 )

# evaluate using 10-fold cross validation
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=30)
results = cross_val_score(model, X_train, y_train, cv=kfold)

In [None]:
# Print mean accuracy over Kfold validations
print(results.mean())

### Model 3: GridSearch

In [None]:
# Function to create model, required for KerasClassifier
def create_model(optimizer='rmsprop', init='glorot_uniform'):
    # create model
    model = Sequential()
    model.add(Dense(12, input_dim=30, kernel_initializer=init, activation='relu'))
    model.add(Dense(8, kernel_initializer=init, activation='relu'))
    model.add(Dense(1, kernel_initializer=init, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
# grid search epochs, batch size and optimizer
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform', 'normal', 'uniform']
epochs = [50, 100, 150]
batches = [5, 10, 20]
param_grid = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, y_train)

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
	print("%f (%f) with: %r" % (mean, stdev, param))