# Load libraries 

In [1]:
import numpy as np
import pandas as pd

# Load dataset

In [2]:
df = pd.read_csv("breast-cancer-wisconsin.data.csv",header=0)

# Row and Cloumn of dataset

In [3]:
df.shape

(698, 11)

# Missing values

In [4]:
df['1.3'].unique()


array(['10', '2', '4', '1', '3', '9', '7', '?', '5', '8', '6'],
      dtype=object)

In [5]:
df=df.replace('?',np.nan)

In [6]:
df['1.3'].unique()

array(['10', '2', '4', '1', '3', '9', '7', nan, '5', '8', '6'],
      dtype=object)

# Drop missing values

In [7]:
df = df.dropna(how='any',axis=0)

In [8]:
df.shape

(682, 11)

# Count Malignent and Benigen

In [9]:
df['2.1'].value_counts()

2    443
4    239
Name: 2.1, dtype: int64

In [10]:
df.dtypes

1000025     int64
5           int64
1           int64
1.1         int64
1.2         int64
2           int64
1.3        object
3           int64
1.4         int64
1.5         int64
2.1         int64
dtype: object

# Dividing dataset into independent X and dependent Y

In [11]:
X = df.iloc[:,1:10].values
Y = df.iloc[:,-1].values

# Split dataset into training and testing

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.25, random_state=0)

# Scale the data

In [14]:
from sklearn.preprocessing import StandardScaler

In [15]:
sc = StandardScaler()

In [16]:
X_train = sc.fit_transform(X_train)

In [17]:
X_test = sc.fit_transform(X_test)

# 1. Support Vector Machine Classifier

In [18]:
from sklearn.svm import SVC

In [19]:
sv = SVC()

In [20]:
sv.fit(X_train, Y_train)

SVC()

In [21]:
pred=sv.predict (X_test)

In [22]:
from sklearn.metrics import accuracy_score

In [23]:
accuracy_score(Y_test,pred)

0.9766081871345029

In [24]:
from sklearn.metrics import classification_report

In [25]:
print(classification_report(Y_test,pred))

              precision    recall  f1-score   support

           2       0.99      0.97      0.98       112
           4       0.95      0.98      0.97        59

    accuracy                           0.98       171
   macro avg       0.97      0.98      0.97       171
weighted avg       0.98      0.98      0.98       171



# Compare Hyperparameters 

In [26]:
from sklearn.model_selection import GridSearchCV

In [27]:
param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}

In [28]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=2)

In [29]:
grid.fit(X_train, Y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....................... C=0.1, gamma=1, kernel=rbf, total=   0.0s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ...................... C=0.1, gamma=1, kernel=poly, total=   0.0s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ..........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


[CV] ..................... C=1, gamma=1, kernel=sigmoid, total=   0.0s
[CV] C=1, gamma=1, kernel=sigmoid ....................................
[CV] ..................... C=1, gamma=1, kernel=sigmoid, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=rbf ......................................
[CV] ....................... C=1, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1, gamma=0.1, kernel=poly .....................................
[CV] .

[Parallel(n_jobs=1)]: Done 240 out of 240 | elapsed:    0.4s finished


GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001],
                         'kernel': ['rbf', 'poly', 'sigmoid']},
             verbose=2)

In [30]:
print(grid.best_estimator_)

SVC(C=0.1, gamma=1, kernel='sigmoid')


In [31]:
grid_predictions = grid.predict(X_test)

In [32]:
print(classification_report(Y_test,grid_predictions))

              precision    recall  f1-score   support

           2       1.00      0.97      0.99       112
           4       0.95      1.00      0.98        59

    accuracy                           0.98       171
   macro avg       0.98      0.99      0.98       171
weighted avg       0.98      0.98      0.98       171



# 2. Gaussian Naive Bayes Classifier

In [33]:
from sklearn.naive_bayes import GaussianNB

In [34]:
gnb = GaussianNB()

In [35]:
gnb.fit(X_train, Y_train)

GaussianNB()

In [36]:
pred=gnb.predict (X_test)

In [37]:
from sklearn.metrics import accuracy_score

In [38]:
accuracy_score(Y_test,pred)

0.9707602339181286

In [39]:
from sklearn.metrics import classification_report

In [40]:
print(classification_report(Y_test,pred))

              precision    recall  f1-score   support

           2       1.00      0.96      0.98       112
           4       0.92      1.00      0.96        59

    accuracy                           0.97       171
   macro avg       0.96      0.98      0.97       171
weighted avg       0.97      0.97      0.97       171



# 3. Decision Tree Classifier

In [41]:
from sklearn.tree import DecisionTreeClassifier

In [42]:
dt = DecisionTreeClassifier(random_state=0, max_depth=2)

In [43]:
dt.fit(X_train, Y_train)

DecisionTreeClassifier(max_depth=2, random_state=0)

In [44]:
pred=dt.predict (X_test)

In [45]:
accuracy_score(Y_test,pred)

0.9473684210526315

In [46]:
from sklearn.metrics import classification_report

In [47]:
print(classification_report(Y_test,pred))

              precision    recall  f1-score   support

           2       0.95      0.97      0.96       112
           4       0.95      0.90      0.92        59

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171

