In [20]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

#loading the cancer data set
cancer = load_breast_cancer()
#printing the details of cancer dataset
print("cancer dataset features:\n{}".format(cancer.keys()))
print("cancer datapoints:\n{}".format(cancer.data))
#spliting the dataset
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)
#printing the shape of each
print("shape of X_train, y_train: {},{}".format(X_train.shape, y_train.shape))
print("shape of X_test, y_test: {},{}\n".format(X_test.shape, y_test.shape))
#30 features present for each datapoint
#training the SVC model on training dataset
svm = SVC(C=100)
svm.fit(X_train, y_train)
#printing the score of prediction before scaling of dataset
print("Training set score before scaling: {:.3f}".format(svm.score(X_train, y_train)))
print("Test set score before scaling: {:.3f}\n".format(svm.score(X_test, y_test)))

#scaling and transforming the dataset
minmax_scaler = MinMaxScaler()
#fit the scaler on training dataset and apply the same scaler on test set to
# the same transformation on test set
minmax_scaler.fit(X_train)
#transforming the training and test set
X_train_scaled = minmax_scaler.transform(X_train)
X_test_scaled = minmax_scaler.transform(X_test)
#checking the score of svm on scaled dataset
#learning svm on scaled dataset
svm.fit(X_train_scaled, y_train)
print("Training set score after scaling: {:.3f}".format(svm.score(X_train_scaled, y_train)))
print("Test set score after scaling: {:.3f}\n".format(svm.score(X_test_scaled, y_test)))
#using StandardScaler
std_scaler = StandardScaler()
#fiting std_scaler on X_train and scaling the data
std_scaler.fit(X_train)
X_train_scaled_std = std_scaler.transform(X_train)
X_test_scaled_std = std_scaler.transform(X_test)
#training the model on the scaled dataset
svm.fit(X_train_scaled_std, y_train)
#predicting
print("Training set score after scaling using StandardScaler: {:.3f}".format(svm.score(X_train_scaled_std, y_train)))
print("Test set score after scaling using StandardScaler: {:.3f}".format(svm.score(X_test_scaled_std, y_test)))

cancer dataset features:
dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])
cancer datapoints:
[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
shape of X_train, y_train: (426, 30),(426,)
shape of X_test, y_test: (143, 30),(143,)

Training set score before scaling: 0.941
Test set score before scaling: 0.944

Training set score after scaling: 1.000
Test set score after scaling: 0.965

Training set score after scaling using StandardScaler: 1.000
Test set score after scaling using StandardScaler: 0.958
