## mlp for binary classification

In [1]:
# IMPORT PACKAGES
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense


### The Data 

We will use the Ionosphere binary (two-class) classification dataset to demonstrate an MLP for binary classification.

This dataset involves predicting whether a structure is in the atmosphere or not given radar returns.

In [2]:

# load the dataset
path = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/ionosphere.csv'
df = read_csv(path, header=None)


In [3]:
#data shape
df.shape 

(351, 35)

In [4]:
#check the top five rows
df.head() 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1.0,0.0376,...,-0.51171,0.41078,-0.46168,0.21266,-0.3409,0.42267,-0.54487,0.18641,-0.453,g
1,1,0,1.0,-0.18829,0.93035,-0.36156,-0.10868,-0.93597,1.0,-0.04549,...,-0.26569,-0.20468,-0.18401,-0.1904,-0.11593,-0.16626,-0.06288,-0.13738,-0.02447,b
2,1,0,1.0,-0.03365,1.0,0.00485,1.0,-0.12062,0.88965,0.01198,...,-0.4022,0.58984,-0.22145,0.431,-0.17365,0.60436,-0.2418,0.56045,-0.38238,g
3,1,0,1.0,-0.45161,1.0,1.0,0.71216,-1.0,0.0,0.0,...,0.90695,0.51613,1.0,1.0,-0.20099,0.25682,1.0,-0.32382,1.0,b
4,1,0,1.0,-0.02401,0.9414,0.06531,0.92106,-0.23255,0.77152,-0.16399,...,-0.65158,0.1329,-0.53206,0.02431,-0.62197,-0.05707,-0.59573,-0.04608,-0.65697,g


In [5]:
#check missing data
df.isnull().sum() 

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
31    0
32    0
33    0
34    0
dtype: int64

We dont have any missing data

In [6]:
#check statistical properties 
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
0,351.0,0.891738,0.311155,0.0,1.0,1.0,1.0,1.0
1,351.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,351.0,0.641342,0.497708,-1.0,0.472135,0.87111,1.0,1.0
3,351.0,0.044372,0.441435,-1.0,-0.064735,0.01631,0.194185,1.0
4,351.0,0.601068,0.519862,-1.0,0.41266,0.8092,1.0,1.0
5,351.0,0.115889,0.46081,-1.0,-0.024795,0.0228,0.334655,1.0
6,351.0,0.550095,0.492654,-1.0,0.21131,0.72873,0.96924,1.0
7,351.0,0.11936,0.52075,-1.0,-0.05484,0.01471,0.445675,1.0
8,351.0,0.511848,0.507066,-1.0,0.08711,0.68421,0.95324,1.0
9,351.0,0.181345,0.483851,-1.0,-0.048075,0.01829,0.534195,1.0


In [7]:
# split into input and output columns
X, y = df.values[:, :-1], df.values[:, -1]

# ensure all data are floating point values
X = X.astype("float32")

# encode strings to integer
y = LabelEncoder().fit_transform(y)

In [8]:
# split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

# determine the number of input features
n_features = X_train.shape[1]


(235, 34) (116, 34) (235,) (116,)


In [9]:
# define model
model = Sequential()
model.add(Dense(10, activation='relu', kernel_initializer='he_normal', input_shape=(n_features,)))
model.add(Dense(8, activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1, activation='sigmoid'))


In [10]:
# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [12]:
# fit the model
model.fit(X_train, y_train, epochs=150, batch_size=32, verbose=2)


Train on 235 samples
Epoch 1/150
235/235 - 0s - loss: 0.0486 - accuracy: 0.9872
Epoch 2/150
235/235 - 0s - loss: 0.0480 - accuracy: 0.9872
Epoch 3/150
235/235 - 0s - loss: 0.0477 - accuracy: 0.9872
Epoch 4/150
235/235 - 0s - loss: 0.0471 - accuracy: 0.9872
Epoch 5/150
235/235 - 0s - loss: 0.0467 - accuracy: 0.9872
Epoch 6/150
235/235 - 0s - loss: 0.0465 - accuracy: 0.9872
Epoch 7/150
235/235 - 0s - loss: 0.0463 - accuracy: 0.9872
Epoch 8/150
235/235 - 0s - loss: 0.0452 - accuracy: 0.9872
Epoch 9/150
235/235 - 0s - loss: 0.0453 - accuracy: 0.9915
Epoch 10/150
235/235 - 0s - loss: 0.0450 - accuracy: 0.9915
Epoch 11/150
235/235 - 0s - loss: 0.0443 - accuracy: 0.9915
Epoch 12/150
235/235 - 0s - loss: 0.0442 - accuracy: 0.9915
Epoch 13/150
235/235 - 0s - loss: 0.0437 - accuracy: 0.9915
Epoch 14/150
235/235 - 0s - loss: 0.0435 - accuracy: 0.9872
Epoch 15/150
235/235 - 0s - loss: 0.0429 - accuracy: 0.9915
Epoch 16/150
235/235 - 0s - loss: 0.0425 - accuracy: 0.9915
Epoch 17/150
235/235 - 0s - 

Epoch 137/150
235/235 - 0s - loss: 0.0194 - accuracy: 0.9957
Epoch 138/150
235/235 - 0s - loss: 0.0190 - accuracy: 0.9957
Epoch 139/150
235/235 - 0s - loss: 0.0196 - accuracy: 0.9957
Epoch 140/150
235/235 - 0s - loss: 0.0189 - accuracy: 0.9957
Epoch 141/150
235/235 - 0s - loss: 0.0189 - accuracy: 0.9957
Epoch 142/150
235/235 - 0s - loss: 0.0184 - accuracy: 0.9957
Epoch 143/150
235/235 - 0s - loss: 0.0187 - accuracy: 0.9957
Epoch 144/150
235/235 - 0s - loss: 0.0177 - accuracy: 0.9957
Epoch 145/150
235/235 - 0s - loss: 0.0189 - accuracy: 0.9957
Epoch 146/150
235/235 - 0s - loss: 0.0193 - accuracy: 0.9957
Epoch 147/150
235/235 - 0s - loss: 0.0181 - accuracy: 0.9957
Epoch 148/150
235/235 - 0s - loss: 0.0191 - accuracy: 0.9957
Epoch 149/150
235/235 - 0s - loss: 0.0180 - accuracy: 0.9957
Epoch 150/150
235/235 - 0s - loss: 0.0179 - accuracy: 0.9957


<tensorflow.python.keras.callbacks.History at 0x1d7563d3c50>

In [14]:
# evaluate the model
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print('Test Accuracy: %.3f' % acc)


Test Accuracy: 0.922


In [15]:
row = [1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1,0.03760,0.85243,-0.17755,0.59755,-0.44945,0.60536,-0.38223,0.84356,-0.38542,0.58212,-0.32192,0.56971,-0.29674,0.36946,-0.47357,0.56811,-0.51171,0.41078,-0.46168,0.21266,-0.34090,0.42267,-0.54487,0.18641,-0.45300]
result = model.predict([row])
print('Predicted: %.3f' % result)

Predicted: 1.000
