# Load Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# load Data

In [2]:
df=pd.read_csv('breast_cancer.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


In [4]:
df.isnull().sum()

Unnamed: 0                 0
mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
target                     0
dtype: int64

In [5]:
X=df.drop('target',axis=1)
y=df[['target']]

# Feature Scale

In [6]:
sc=StandardScaler()
X=sc.fit_transform(X)
X

array([[-1.72900945,  1.09706398, -2.07333501, ...,  2.29607613,
         2.75062224,  1.93701461],
       [-1.72292139,  1.82982061, -0.35363241, ...,  1.0870843 ,
        -0.24388967,  0.28118999],
       [-1.71683333,  1.57988811,  0.45618695, ...,  1.95500035,
         1.152255  ,  0.20139121],
       ...,
       [ 1.71683333,  0.70228425,  2.0455738 , ...,  0.41406869,
        -1.10454895, -0.31840916],
       [ 1.72292139,  1.83834103,  2.33645719, ...,  2.28998549,
         1.91908301,  2.21963528],
       [ 1.72900945, -1.80840125,  1.22179204, ..., -1.74506282,
        -0.04813821, -0.75120669]])

# Spliting Data

In [7]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((455, 31), (114, 31), (455, 1), (114, 1))

# Model Fitting

In [8]:
# Model libraries
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

#Accuracy measuring library
from sklearn.metrics import accuracy_score

In [9]:
#1. Logistic Regression - used because this model is best suited for binary classification
LR_model = LogisticRegression()

#Fitting training set to the model
LR_model.fit(X_train,y_train)

#Predicting the test set results based on the model
# accuracy on training data
lr_y_pred = LR_model.predict(X_train)
score = accuracy_score(y_train,lr_y_pred)
print('Training Accuracy of LR model is ', score)

# accuracy on testing data
lr_y_pred_test = LR_model.predict(X_test)
score = accuracy_score(y_test,lr_y_pred_test)
print('Testing Accuracy of LR model is ', score)

Training Accuracy of LR model is  0.9868131868131869
Testing Accuracy of LR model is  0.9912280701754386


In [10]:
#2. Linear Regression - used because this model is best suited for Linear classification
Linear_model = LinearRegression()

#Fitting training set to the model
Linear_model.fit(X_train,y_train)

#Predicting the test set results based on the model
# accuracy on training data
linear_y_pred = Linear_model.predict(X_train)
score = accuracy_score(y_train,lr_y_pred)
print('Training Accuracy of LR model is ', score)

# accuracy on testing data
linear_y_pred_test = Linear_model.predict(X_test)
score = accuracy_score(y_test,lr_y_pred_test)
print('Testing Accuracy of LR model is ', score)

Training Accuracy of LR model is  0.9868131868131869
Testing Accuracy of LR model is  0.9912280701754386


In [11]:
#3. Support Vector Machine(SVM) 
svm_model = SVC(kernel='linear')

#Fitting training set to the model
svm_model.fit(X_train,y_train)

#Predicting the test set results based on the model
# accuracy on training data
train_svm_y_pred = svm_model.predict(X_train)
score = accuracy_score(y_train,train_svm_y_pred)
print('Training Accuracy of SVM model is ', score)

# accuracy on testing data
test_svm_y_pred = svm_model.predict(X_test)
score = accuracy_score(y_test,test_svm_y_pred)
print('Testing Accuracy of SVM model is ', score)

Training Accuracy of SVM model is  0.989010989010989
Testing Accuracy of SVM model is  0.9912280701754386


In [12]:
#rbf Kernal use
model_r=SVC(kernel='rbf')
model_r.fit(X_train,y_train)
model_r.score(X_test,y_test)

0.9912280701754386

In [13]:
#Linear Kernal use
model_l=SVC(kernel='linear')
model_l.fit(X_train,y_train)
model_l.score(X_test,y_test)

0.9912280701754386

In [14]:
y_predict=model_l.predict(X_test)
for i in range(0,10):
    if y_predict[i]==0:
        print('Patient has Cancer (malignant tumor)')
    else:
        print('Patient has no Cancer (malignant benign)')
y_predict[:10],y_test[:10]

Patient has Cancer (malignant tumor)
Patient has no Cancer (malignant benign)
Patient has Cancer (malignant tumor)
Patient has no Cancer (malignant benign)
Patient has Cancer (malignant tumor)
Patient has no Cancer (malignant benign)
Patient has no Cancer (malignant benign)
Patient has no Cancer (malignant benign)
Patient has no Cancer (malignant benign)
Patient has no Cancer (malignant benign)


(array([0., 1., 0., 1., 0., 1., 1., 1., 1., 1.]),
      target
 300     0.0
 145     1.0
 236     0.0
 496     1.0
 17      0.0
 450     1.0
 316     1.0
 410     1.0
 539     1.0
 398     1.0)

In [15]:
if y_predict[0] == 0:
  print('Patient has Cancer (malignant tumor)')
else:
  print('Patient has no Cancer (malignant benign)')

Patient has Cancer (malignant tumor)
