## ARTIFICIAL NEURAL NETWORK FOR SONAR DATASET
In this dataset, we had 60 features to predict whether the coming obstacle would be a rock or mine. We used sklearn and keras to build ANN. 

In [12]:
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from keras.optimizers import SGD

In [2]:
seed = 7                    #this seed value is selected so that we have the same distribution of data everytime.
numpy.random.seed(seed)

In [3]:
import pandas as pd
dataframe = pd.read_csv("sonar.csv")
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

#### our dependent variable Y is string and we can't process it so we have to convert it into binary classification.

In [4]:
encoder = LabelEncoder()        
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [24]:
encoded_Y 

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [13]:
def create_baseline():            #This function will create our model.
    model=Sequential()            #Create model object from Sequential.
    model.add(Dense(60,activation="relu",input_dim= 60)) # We are going to use dense layer with activation function relu.
    model.add(Dropout(0.2))                              #The dropout regularization is used in order to discard the neural networks 
                                                          #with large weights in order to get better accuracu.
    model.add(Dense(30,activation="relu"))
    model.add(Dropout(0.2))
    model.add(Dense(1,activation="sigmoid"))              #This layer will give us the probability that whther the sample belongs to "Rock" or "Mine"
    sgd = SGD(lr=0.1, momentum=0.8, decay=0.002, nesterov=False)
    model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
    return model


#### We will first use train the model without standadization of data. We are using Kfold so that the data is divided into portions. The data is trained on K-1 portion and the last portion is used to test the set in order to avoid information leakage.

In [14]:
estimator = KerasClassifier(build_fn=create_baseline, epochs=50, batch_size=10, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Results: 82.09% (8.60%)


#### Now we are going to use standardized dataset. Pipeline is used in order to standardize just the training dataset so that the test dataset remains "unseen" by the  neural network.

In [15]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=50, batch_size=10, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 86.00% (4.52%)


In [8]:
results

array([0.90909091, 0.95238094, 0.76190476, 0.80952382, 0.80952379,
       0.85714285, 0.89999998, 0.84999999, 0.90000001, 0.90000001])