In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split

### Import Dataset

In [2]:
HEADER_ROW = ['ID', 'Clump_Thickness', 'Unif_Cell_Size', 'Unif_Cell_Shape', 'Marginal_Adhesion', 
              'Single_Epit_Cell_Size', 'Bare_Nuclei', 'Bland_Chromatin', 'Normal_Nucleoli', 'Miroses', 'Label']


DATA = pd.read_csv('Dataset/Breast-Cancer-Dataset.txt', names = HEADER_ROW)

### Checking Dataset

In [3]:
# Check is there any missing value
print(DATA.count())

ID                       699
Clump_Thickness          699
Unif_Cell_Size           699
Unif_Cell_Shape          699
Marginal_Adhesion        699
Single_Epit_Cell_Size    699
Bare_Nuclei              699
Bland_Chromatin          699
Normal_Nucleoli          699
Miroses                  699
Label                    699
dtype: int64


In [4]:
# Check is there any null value
print(DATA.isnull().sum())

ID                       0
Clump_Thickness          0
Unif_Cell_Size           0
Unif_Cell_Shape          0
Marginal_Adhesion        0
Single_Epit_Cell_Size    0
Bare_Nuclei              0
Bland_Chromatin          0
Normal_Nucleoli          0
Miroses                  0
Label                    0
dtype: int64


In [5]:
# Replace "?" with 0
DATA.replace('?', 0, inplace = True)
DATA.drop(['ID'], 1, inplace = True)

In [6]:
X = np.array(DATA.drop(['Label'], 1))
Y = np.array(DATA['Label'])

# Split the data
X_TRAIN, X_TEST, Y_TRAIN, Y_TEST = train_test_split(X, Y, test_size = 0.3, random_state = 42)

### Classifier

In [7]:
CLASSIFIER = svm.SVC()
CLASSIFIER.fit(X_TRAIN, Y_TRAIN)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [8]:
ACCURACY = CLASSIFIER.score(X_TEST, Y_TEST)
print(ACCURACY)

0.9666666666666667


In [28]:
for i in range(10):
    print('Test Data Number:', i, '|| Label :', Y_TEST[i], 
          'Prediction :', CLASSIFIER.predict(X_TEST[i].reshape(1, -1)))

Test Data Number: 0 || Label : 2 Prediction : [2]
Test Data Number: 1 || Label : 2 Prediction : [2]
Test Data Number: 2 || Label : 2 Prediction : [2]
Test Data Number: 3 || Label : 4 Prediction : [4]
Test Data Number: 4 || Label : 2 Prediction : [2]
Test Data Number: 5 || Label : 2 Prediction : [2]
Test Data Number: 6 || Label : 2 Prediction : [4]
Test Data Number: 7 || Label : 4 Prediction : [4]
Test Data Number: 8 || Label : 4 Prediction : [4]
Test Data Number: 9 || Label : 4 Prediction : [4]
