In [None]:
import pandas as pd
import numpy as np

cancer_set = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', 
                        header = None)
print(cancer_set.shape)

In [None]:
cancer_features = cancer_set.iloc[:,2:]

print(cancer_features.shape)
print(type(cancer_features))

In [None]:
cancer_features = cancer_features.values
print(type(cancer_features))
print(cancer_features.shape)

In [None]:
cancer_features_names = ['mean radius', 
'mean texture', 'mean perimeter', 
'mean area', 'mean smoothness', 
'mean compactness', 'mean concavity',
'mean concave points', 'mean symmetry',
'mean fractal dimension','radius error',
'texture error','perimeter error',
'area error', 'smoothness error',
'compactness error','concavity error',
'concave points error','symmetry error',
'fractal dimension error','worst radius',
'worst texture', 'worst perimeter', 
'worst area','worst smoothness', 
'worst compactness', 'worst concavity',
'worst concave points','worst symmetry',
'worst fractal dimension']


In [None]:
import sklearn.preprocessing as preprocessing
from sklearn.datasets import load_breast_cancer

In [None]:
breast_cancer = load_breast_cancer()

In [None]:
standardizer = preprocessing.StandardScaler()
standardizer = standardizer.fit(breast_cancer.data)
breast_cancer_standardized = standardizer.transform(breast_cancer.data)

print('Mean of each feature after Standardization :\n\n')
print(breast_cancer_standardized.mean(axis=0))
print('\nStd. of each feature after Standardization :\n\n')
print(breast_cancer_standardized.std(axis=0))

In [None]:
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 10)).fit(breast_cancer.data)

breast_cancer_minmaxscaled10 = min_max_scaler.transform(breast_cancer.data)

In [None]:
max_abs_scaler = preprocessing.MaxAbsScaler().fit(breast_cancer.data)

breast_cancer_maxabsscaled = max_abs_scaler.transform(breast_cancer.data)

In [None]:
normalizer = preprocessing.Normalizer(norm='l1').fit(breast_cancer.data)

breast_cancer_normalized = normalizer.transform(breast_cancer.data)

In [None]:
binarizer = preprocessing.Binarizer(threshold=3.0).fit(breast_cancer.data)
breast_cancer_binarized = binarizer.transform(breast_cancer.data)
print(breast_cancer_binarized[:5,:5])

In [None]:
onehotencoder = preprocessing.OneHotEncoder()
onehotencoder = onehotencoder.fit([[1], [1], [1], [2], [2], [1]])

# Transforming category values 1 and 2 to one-hot vectors
print(onehotencoder.transform([[1]]).toarray())
print(onehotencoder.transform([[2]]).toarray())

In [None]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(missing_values=np.nan, strategy='mean')

imputer = imputer.fit(breast_cancer.data)
breast_cancer_imputed = imputer.transform(breast_cancer.data)

In [None]:
min_max_scaler = preprocessing.MinMaxScaler().fit(breast_cancer.data)

breast_cancer_minmaxscaled = min_max_scaler.transform(breast_cancer.data)

In [None]:
labels = ['malignant', 'benign', 'malignant', 'benign']

labelencoder = preprocessing.LabelEncoder()

labelencoder = labelencoder.fit(labels)

bc_labelencoded = labelencoder.transform(breast_cancer.target_names)

In [None]:
import sklearn.datasets as datasets

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

In [None]:
cancer = datasets.load_breast_cancer() 

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(cancer.data, cancer.target,stratify=cancer.target, random_state=42)

In [None]:
knn_classifier = KNeighborsClassifier()   

knn_classifier = knn_classifier.fit(X_train, Y_train)

print('Accuracy of Train Data :', knn_classifier.score(X_train,Y_train))
print('Accuracy of Test Data :', knn_classifier.score(X_test,Y_test))

In [None]:
from sklearn.tree import DecisionTreeClassifier

#dt_classifier = DecisionTreeClassifier()   
dt_classifier = DecisionTreeClassifier(max_depth=2)   
dt_classifier = dt_classifier.fit(X_train, Y_train) 

In [None]:
print('Accuracy of Train Data :', dt_classifier.score(X_train,Y_train))

print('Accuracy of Test Data :', dt_classifier.score(X_test,Y_test))

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier = RandomForestClassifier()

rf_classifier = rf_classifier.fit(X_train, Y_train) 

print('Accuracy of Train Data :', rf_classifier.score(X_train,Y_train))

print('Accuracy of Test Data :', rf_classifier.score(X_test,Y_test))

In [None]:
from sklearn.svm import SVC

svm_classifier = SVC()

svm_classifier = svm_classifier.fit(X_train, Y_train) 

print('Accuracy of Train Data :', svm_classifier.score(X_train,Y_train))

print('Accuracy of Test Data :', svm_classifier.score(X_test,Y_test))

In [None]:
standardizer = preprocessing.StandardScaler()
standardizer = standardizer.fit(cancer.data)
cancer_standardized = standardizer.transform(cancer.data)

svm_classifier = SVC()

svm_classifier = svm_classifier.fit(X_train, Y_train) 

In [None]:
print('Accuracy of Train Data :', svm_classifier.score(X_train,Y_train))

print('Accuracy of Test Data :', svm_classifier.score(X_test,Y_test))

In [None]:
from sklearn import metrics

Y_pred = svm_classifier.predict(X_test)

print('Classification report : \n',metrics.classification_report(Y_test, Y_pred))

In [None]:
from sklearn.cluster import KMeans

kmeans_cluster = KMeans(n_clusters=2)

kmeans_cluster = kmeans_cluster.fit(X_train) 

kmeans_cluster.predict(X_test)

In [None]:
from sklearn import metrics

print(metrics.homogeneity_score(kmeans_cluster.predict(X_test), Y_test))

print(metrics.completeness_score(kmeans_cluster.predict(X_test), Y_test))

print(metrics.v_measure_score(kmeans_cluster.predict(X_test), Y_test))

print(metrics.adjusted_rand_score(kmeans_cluster.predict(X_test), Y_test))