## Cancer Predictions

In [1]:
#Import scikit-learn dataset library
from sklearn import datasets

The dataset comprises 30 features (mean radius, mean texture, mean perimeter, mean area, mean smoothness, mean compactness, mean concavity, mean concave points, mean symmetry, mean fractal dimension, radius error, texture error, perimeter error, area error, smoothness error, compactness error, concavity error, concave points error, symmetry error, fractal dimension error, worst radius, worst texture, worst perimeter, worst area, worst smoothness, worst compactness, worst concavity, worst concave points, worst symmetry, and worst fractal dimension) and a target (type of cancer).

This data has two types of cancer classes: malignant (harmful) and benign (not harmful). Here, you can build a model to classify the type of cancer. The dataset is available in the scikit-learn library or you can also download it from the UCI Machine Learning Library.

In [2]:
#load dataset
cancer = datasets.load_breast_cancer()

In [3]:
# print the names of the 13 features
print("Features: ", cancer.feature_names)

# print the label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)

Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']


In [4]:
import numpy as np

In [5]:
np.unique(cancer.target,return_counts=True)

(array([0, 1]), array([212, 357], dtype=int64))

In [None]:
# Import train_test_split function
from sklearn.model_selection import train_test_split
import numpy as np

In [None]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test

In [None]:
np.unique(y_test,return_counts=True)

In [None]:
#Import svm model
from sklearn.svm import SVC

In [None]:
#Create a svm Classifier
clf = SVC(kernel='linear') # Linear Kernel

In [None]:
#Train the model using the training sets
clf.fit(X_train, y_train)

In [None]:
#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",accuracy_score(y_test, y_pred))

In [None]:
confusion_matrix(y_test,y_pred)

## Apply SVC with Polynomial kernel

In [None]:
# Create a SVC classifier using a polynomial kernel
svm_poly = SVC(kernel='poly')

In [None]:
#Train the model using the training sets
svm_poly.fit(X_train, y_train)

In [None]:
#Predict the response for test dataset
y_pred_poly = svm_poly.predict(X_test)

In [None]:
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",accuracy_score(y_test, y_pred_poly))

In [None]:
confusion_matrix(y_test,y_pred_poly)

In [None]:
#Hyperparameter tuning for polynomial kernel 
parameters = {'C': [1,10,100],'degree': [2,4,5]}

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
svm_poly_tune = GridSearchCV(svm_poly, parameters,cv=5)

In [None]:
svm_poly_tune.fit(X_train,y_train)

In [None]:
svm_poly_tune.best_params_

In [None]:
svm_poly_tune.best_score_

In [None]:
pred_poly_tune=svm_poly_tune.predict(X_test)

In [None]:
confusion_matrix(y_test,pred_poly_tune)

In [None]:
accuracy_score(y_test,pred_poly_tune)

## Apply SVC with Radial kernel

In [None]:
# Create a SVC classifier using a radial kernel
svm_rbf = SVC(kernel='rbf')

In [None]:
svm_rbf.fit(X_train,y_train)

In [None]:
svm_rbf.score(X_train,y_train)

In [None]:
parameters = {'C': [1,10,100],'gamma': [0.0001,0.001,0.01,1,10,100]}

In [None]:
svm_rbf_tune = GridSearchCV(svm_rbf, parameters,cv=5)

In [None]:
svm_rbf_tune.fit(X_train,y_train)

In [None]:
svm_rbf_tune.best_params_

In [None]:
svm_rbf_tune.best_score_

In [None]:
pred_rbf_tune=svm_rbf_tune.predict(X_test)

In [None]:
confusion_matrix(y_test,pred_rbf_tune)

In [None]:
accuracy_score(y_test,pred_rbf_tune)