In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectFromModel
from sklearn import datasets
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix
from sklearn import datasets
cancer = datasets.load_breast_cancer()
print("Features: ", cancer.feature_names)
print("Labels: ", cancer.target_names)
feat_labels =['mean radius', 'mean texture', 'mean perimeter', 'mean area',
 'mean smoothness', 'mean compactness', 'mean concavity',
 'mean concave points', 'mean symmetry' ,'mean fractal dimension',
 'radius error' ,'texture error' ,'perimeter error' ,'area error',
 'smoothness error', 'compactness error', 'concavity error',
 'concave points error', 'symmetry error' ,'fractal dimension error',
 'worst radius', 'worst texture', 'worst perimeter', 'worst area',
 'worst smoothness' ,'worst compactness', 'worst concavity',
 'worst concave points', 'worst symmetry' ,'worst fractal dimension']
print("Total No.of Instances",cancer.data.shape)
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109)
print ('The train data has {0} rows and {1} columns'.format(X_train.shape[0],X_train.shape[1]))
print ('----------------------------')
print ('The test data has {0} rows and {1} columns'.format(X_test.shape[0],X_test.shape[1]))
clf=RandomForestClassifier(n_estimators=10000, random_state=0,n_jobs=-1)
clf.fit(X_train,y_train)
for feature in zip(feat_labels,clf.feature_importances_):
    print(feature)
sfm = SelectFromModel(clf,threshold=0.1)
sfm.fit(X_train,y_train)
for feature_list_index in sfm.get_support(indices=True):
    print(feat_labels[feature_list_index])
X_important_train=sfm.transform(X_train)
X_important_test=sfm.transform(X_test)
model = svm.SVC(kernel='linear', C=1, gamma=1) 
model.fit(X_train,y_train)
predicted= model.predict(X_test)
model.score(X_train, y_train)

Features:  ['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Labels:  ['malignant' 'benign']
Total No.of Instances (569, 30)
The train data has 398 rows and 30 columns
----------------------------
The test data has 171 rows and 30 columns
('mean radius', 0.031027406298215054)
('mean texture', 0.01585780024538709)
('mean perimeter', 0.04026314935128548)
('mean area', 0.03830898128680701)
('mean smoothness', 0.007018004000881229)
('mean compactness', 0.014220852175901095)
('mean concavity', 0

0.9623115577889447

In [4]:
print("Score of all Features")
print('Train Accuracy: \n', model.score(X_train, y_train))
print('Test Accuracy: \n', model.score(X_test, y_test))
print(confusion_matrix(y_test, predicted))
print(classification_report(y_test, predicted))
print("precision: ", metrics.precision_score(y_test, predicted))  
print("recall: ", metrics.recall_score(y_test, predicted)) 
model2= svm.SVC(kernel='linear', C=1, gamma=1) 
model_important=model2.fit(X_important_train,y_train)
important_predicted=model2.predict(X_important_test)

Score of all Features
Train Accuracy: 
 0.9623115577889447
Test Accuracy: 
 0.9649122807017544
[[ 61   2]
 [  4 104]]
             precision    recall  f1-score   support

          0       0.94      0.97      0.95        63
          1       0.98      0.96      0.97       108

avg / total       0.97      0.96      0.97       171

precision:  0.9811320754716981
recall:  0.9629629629629629


In [5]:
print("Score of selected Features")
model2.score(X_important_train, y_train)
print('Train Accuracy: \n', model2.score(X_important_train, y_train))
print('Test Accuracy: \n', model2.score(X_important_test,y_test))
print(confusion_matrix(y_test, predicted))
print(classification_report(y_test, important_predicted))
print("precision: ", metrics.precision_score(y_test, important_predicted))  
print("recall: ", metrics.recall_score(y_test, important_predicted))
print(confusion_matrix(y_test, important_predicted))


Score of selected Features
Train Accuracy: 
 0.9095477386934674
Test Accuracy: 
 0.9473684210526315
[[ 61   2]
 [  4 104]]
             precision    recall  f1-score   support

          0       0.94      0.92      0.93        63
          1       0.95      0.96      0.96       108

avg / total       0.95      0.95      0.95       171

precision:  0.9541284403669725
recall:  0.9629629629629629
[[ 58   5]
 [  4 104]]
