In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn import cross_validation
from sklearn.cross_validation import cross_val_score
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC



In [2]:
#Importing dataset
df = pd.read_csv('breast-cancer-wisconsin.csv')
df.replace('?',-99999,inplace=True)
df.drop(['id'],1,inplace=True)

X = np.array(df.drop(['class'],1))
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = .20,random_state = 42)

In [3]:
# Spot Check Algorithms

models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))


# evaluate each model in turn
results = []
names = []

for name, model in models:
	cv_results = cross_val_score(model, X_train, y_train, cv=5)
	results.append(cv_results)
	names.append(name)
	msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
	print(msg)


LR: 0.951687 (0.021686)
LDA: 0.946298 (0.015099)
KNN: 0.960680 (0.010575)
CART: 0.935583 (0.018302)
NB: 0.953441 (0.017502)
SVM: 0.955179 (0.016295)


In [4]:

# Make predictions on validation dataset
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
predictions = knn.predict(X_test)

print("knn testing")
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

knn testing
0.9857142857142858
[[94  1]
 [ 1 44]]
             precision    recall  f1-score   support

          2       0.99      0.99      0.99        95
          4       0.98      0.98      0.98        45

avg / total       0.99      0.99      0.99       140



In [5]:
#Applying Neural network
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.preprocessing import scale
y2 = pd.get_dummies(df['class'])

X_train, X_test, y_train, y_test = train_test_split(X,y2,test_size = .20,random_state = 42)

model = Sequential()
model.add(Dense(9, input_dim=9, activation='relu'))
model.add(Dense(5, activation='relu'))
model.add(Dense(3, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer = 'rmsprop',metrics=['accuracy'])

history = model.fit(X_train,y_train,nb_epoch= 10, verbose=1,batch_size=5)

[test_loss, test_acc] = model.evaluate(X_test, y_test, batch_size=5)
print("Evaluation result on Test Data : Loss = {}, accuracy = {}".format(test_loss, test_acc))

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Evaluation result on Test Data : Loss = 0.14974378318791942, accuracy = 0.9500000029802322
