# Introduction

# objective
The task is to train a network to discriminate between sonar signals bounced off a metal cylinder and those bounded off a roughly cylinder rock

In [1]:
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# read data
dat = pd.read_csv("Sonar.csv")
dat.head()

Unnamed: 0.1,Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V52,V53,V54,V55,V56,V57,V58,V59,V60,Class
0,1,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,2,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,3,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,4,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,5,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [3]:
# dat.head()
print(dat.shape)

(208, 62)


In [4]:
# convert 'Class' column to 'category' data type
X = dat.drop('Class', 1)
dat['Class'] = dat['Class'].astype('category')
y = dat['Class']

In [5]:
# split data into train / test data sets
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=42)

In [6]:
# fit the logistic regression model
logReg = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_train)

In [7]:
# evaluate the model on test data set
y_pred = logReg.predict(X_test)
print(confusion_matrix(y_test, y_pred))

[[35  0]
 [13 15]]


In [8]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.7936507936507936


In [9]:
# run the naive bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

GaussianNB()

In [10]:
nb.score(X_test, y_test)

0.9523809523809523

In [11]:
y_pred = nb.predict(X_test)
print(confusion_matrix(y_test, y_pred))

[[33  2]
 [ 1 27]]


In [12]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9523809523809523


In [13]:
# Support vector machine model

In [14]:
svc = SVC(random_state=42, probability=True)
svc.fit(X_train, y_train)

SVC(probability=True, random_state=42)

In [24]:
svc.score(X_test, y_test)

1.0

In [25]:
y_pred = svc.predict(X_test)
print(confusion_matrix(y_test, y_pred))

[[35  0]
 [ 0 28]]


In [17]:
# random forest model

In [18]:
from sklearn.ensemble import RandomForestClassifier

In [19]:
rf = RandomForestClassifier(random_state=42)

In [20]:
rf.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [21]:
y_pred = rf.predict(X_test)
print(confusion_matrix(y_test, y_pred))

[[35  0]
 [ 0 28]]


In [26]:
rf.score(X_test, y_test)

1.0

In [23]:
# add a knn model

In [27]:
knc = KNeighborsClassifier()
knc.fit(X_train, y_train)

KNeighborsClassifier()

In [28]:
knc.score(X_test, y_test)

1.0

In [29]:
# add a Gradient Boosting Classifier

In [30]:
from sklearn.ensemble import GradientBoostingClassifier

In [31]:
gbc = GradientBoostingClassifier()
gbc.fit(X_train, y_train)

GradientBoostingClassifier()

In [32]:
gbc.score(X_test, y_test)

0.9841269841269841