# Step 1: Description of the Dataset

**The dataset we will use in this tutorial is the Sonar dataset. This is a dataset that describes sonar chirp returns bouncing off different services. The 60 input variables are the strength of the returns at different angles. It is a binary classification problem that requires a model to differentiate rocks from metal cylinders. **

# Step 2. Baseline Neural Network Model Performance

Let’s create a **baseline model** and result for this project.

We will start off by *importing* all of the classes and functions we will need:

In [4]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [5]:
# load dataset
dataframe = pandas.read_csv("sonar.csv", header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

In [6]:
label_encoder = LabelEncoder()
label_encoder.fit(Y)
encoded_Y = label_encoder.transform(Y)

In [8]:
def create_baseline():
    import keras
    model = Sequential()
    model.add(Dense(32, activation='relu', input_shape=(60,)))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer = 'rmsprop',
             loss='mse' ,
             metrics= ['accuracy'])
    return model

In [9]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Results: 80.28% (4.56%)


# Step 3. Re-Run The Baseline Model With Data Preparation

We can achieve this in *scikit-learn* using a **Pipeline**

In [10]:
# evaluate baseline model with standardized dataset
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 84.56% (6.50%)


# Step 4. Tuning Layers and Number of Neurons in The Model

# 4.1. Evaluate a Smaller Network

In [8]:
# smaller model
def create_smaller():
    import keras
	# create model
    model=Sequential()                        
    model.add(Dense(32,activation='relu',input_shape=(60,)))
    model.add(Dense(1,activation='sigmoid'))
    
	# Compile model
    model.compile(optimizer='rmsprop',
                 loss='mse',
                 metrics=['accuracy'])
    return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Smaller: 86.59% (7.71%)


# Step 4.2. Evaluate a Larger Network

In [None]:
# larger model
def create_larger():
	# create model
    model = Sequential()
    model.add(Dense(60,activation='relu',input_shape=(60,)))
    model.add(Dense(30,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='rmsprop',loss='mse',metrics=['accuracy'])
    
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

# Step 5: Really Scaling up: developing a model that overfits

Once you’ve obtained a model that has statistical power, the question becomes, is your
model sufficiently *powerful*? Does it have enough *layers* and *parameters* to properly
model the **problem at hand**? 

In [14]:
#really scalling up
def create_big():
    model = Sequential()
    model.add(Dense(200,activation='relu',input_shape=(60,)))
    model.add(Dense(100,activation='relu'))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='rmsprop',loss='mse',metrics=['acc'])
    return model

In [12]:
 estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_big, epochs=200, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Overfitting: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Overfitting: 84.11% (7.13%)


# Step 6: Tuning the Model

In [13]:
 estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_big, epochs=150, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Tuning: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Tuning: 84.06% (8.49%)


# Step 7: Rewriting the code using the Keras Functional API

In [18]:
def fuctional_api():
    inputs = keras.Input(shape=(60))
    x = layers.Dense(60,activation='relu')(x)
    x = layers.Dense(30,activation='relu')(x)
    
    outputs = layers.Dense(1,activation='softmax')(x)
    model = Keras.Model(inputs,outputs)
    model.compile(optimizer='rmsprop',
                 loss='mse',
                 metrics=['accuracy'])
    return model
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Using Functional API: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Using Functional API: 80.73% (4.48%)


# Step 8: Rewriting the code by doing Model Subclassing

In [43]:
import keras
from keras import layers
import tensorflow as tf

class MyModel(tf.keras.Model):
    
    def _init_(self):
        
        super(MyModel, self)._init_()
        self.dense1 = layers.Dense(60,activation = 'relu')
        self.dense2 = layers.Dense(30,activation = 'relu')
        self.dense3 = layers.Dense(1,activation = 'softmax')
    
    def call(self,inputs):
        x = self.dense1(x)
        x = sel.dense2(x)
        return self.dense3(x)
    
       

model = MyModel()
    
model.compile(loss='binary_crossentropy', optimizer = 'adam',metrics = 'accuracy')

estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Using Model Sub-Classing: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
#model.fit(X, encoded_Y, epochs = 145, batch_size = 4, verbose = False)

Using Model Sub-Classing: 82.68% (4.98%)


# Step 9: Rewriting the code without using scikit-learn

In [50]:
def build_model():
    
    model = Sequential()
    model.add(Dense(60, activation = 'relu', input_shape = (60,)))
    model.add(Dense(1, activation = 'sigmoid'))

    #Compile the mode
    
    model.compile(optimizer = 'adam', 
                  loss = 'binary_crossentropy' ,
                  metrics = ['accuracy'])
    
    return model


In [51]:
import numpy as np
k =15
num_val_samples = len(X) // k
num_epochs = 100
all_scores = []

#following lists have mean values at every k fold
acc=[]
loss=[]
val_acc=[]
val_loss=[]

#following 2 lists have the accuracies for every epoch
acc_epoch=[]
val_acc_epoch=[]

for i in range(k):
    print('processing fold #', i)
    val_data = X[i * num_val_samples: (i + 1) * num_val_samples]
    val_targets = encoded_Y[i * num_val_samples: (i + 1) * num_val_samples]
    partial_train_data = np.concatenate(
    [X[:i * num_val_samples],
    X[(i + 1) * num_val_samples:]],
    axis=0)
    partial_train_targets = np.concatenate(
    [encoded_Y[:i * num_val_samples],
    encoded_Y[(i + 1) * num_val_samples:]],
    axis=0)
    model = build_model()
    hist=model.fit(partial_train_data, partial_train_targets,
    epochs=num_epochs, batch_size=1,validation_data=(val_data,val_targets),verbose=0)
  
    #taking mean of acc and losses for plotting
    acc.append(np.mean(hist.history["acc"]))
    loss.append(np.mean(hist.history["loss"]))

    val_acc.append(np.mean(hist.history["val_acc"]))
    val_loss.append(np.mean(hist.history["val_loss"]))
  
    acc_epoch.append(hist.history["acc"])
    val_acc_epoch.append(hist.history["val_acc"])

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3
processing fold # 4
processing fold # 5
processing fold # 6
processing fold # 7
processing fold # 8
processing fold # 9
processing fold # 10
processing fold # 11
processing fold # 12
processing fold # 13
processing fold # 14
