# Importing Modules

In [None]:
import pandas as pd
import numpy as np
from termcolor import colored

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns 

In [None]:
%matplotlib inline

In [None]:
from sklearn.model_selection import train_test_split

# Loading the data set

In [None]:
from sklearn.datasets import load_breast_cancer

In [None]:
cancer = load_breast_cancer()

In [None]:
#Will give a description of the actual data set
#print(cancer['DESCR'])

In [None]:
cancer.keys()

In [None]:
df_feat = pd.DataFrame(cancer['data'],columns=cancer['feature_names'])
df_feat.info()

# Creating Train test split

In [None]:
X=df_feat
y=cancer['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

# Importing the Support Vector Machines classifier

In [None]:
from sklearn.svm import SVC

In [None]:
model = SVC(gamma='auto')

In [None]:
model.fit(X_train,y_train)

In [None]:
predictions = model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
FP = confusion_matrix(y_test,predictions)[0][1]
C0T=confusion_matrix(y_test,predictions)[0][1]+confusion_matrix(y_test,predictions)[0][0]

FN = confusion_matrix(y_test,predictions)[1][0]
C1T=confusion_matrix(y_test,predictions)[1][0]+confusion_matrix(y_test,predictions)[1][1]

In [None]:
def Model_analysis1():
    
    print(colored('Model Evaluation 1 \nClasses', 'blue', attrs=['bold','underline']))
    print('\t0 = {} \n\t1 = {}\n'.format(cancer['target_names'][0].capitalize()
                               ,cancer['target_names'][1].capitalize()))
    print(colored('Confusion Matrix analysis', 'blue', attrs=['bold','underline']))
    print('\tClass 0: {} of {} = False positives\n\tClass 1: {} of {} = False Negatives'.format(FP,C0T,FN,C1T))
    print(colored('\nClasicfication Report\n','blue',attrs=['bold','underline']))
    print(classification_report(y_test,predictions))


In [None]:
Model_analysis1()

**NOTE:**
The model predicted that all values are in class 1. 

This is because the model needs to have the parameters adjusted.
We can use a grid search to find the optimal values for the parameters of the model 


It would also help to normalize the data before using it in the model

# Importing gridsearch model from sklearn

In [None]:
from sklearn.model_selection import GridSearchCV

# Creating a parameter grid for C and gama values

In [None]:
param_grid = {'C':[0.1,1,10,100,1000],'gamma':[1,0.1,0.01,0.001,0.0001]}

In [None]:
grid=GridSearchCV(SVC(),param_grid,verbose=3,)

In [None]:
grid.fit(X_train,y_train)

# Getting the optimal parameters / estimator / from the gridsearch as well as the hightest score.

In [None]:
print(grid.best_params_)
print('\n')
print(grid.best_estimator_)
print('\n')
print(grid.best_score_)

# Re running the model based the grid search values

In [None]:
grid_predictions = grid.predict(X_test)

# Evaluating the model

In [None]:
FP = confusion_matrix(y_test,grid_predictions)[0][1]
C0T=confusion_matrix(y_test,grid_predictions)[0][1]+confusion_matrix(y_test,grid_predictions)[0][0]

FN = confusion_matrix(y_test,grid_predictions)[1][0]
C1T=confusion_matrix(y_test,grid_predictions)[1][0]+confusion_matrix(y_test,grid_predictions)[1][1]

In [None]:
def Model_analysis2():
    print(colored('Model Evaluation 2 \nClasses', 'blue', attrs=['bold','underline']))
    print('\t0 = {} \n\t1 = {}\n'.format(cancer['target_names'][0].capitalize()
                               ,cancer['target_names'][1].capitalize()))
    print(colored('Confusion Matrix analysis', 'blue', attrs=['bold','underline']))
    print('\tClass 0: {} of {} = False positives\n\tClass 1: {} of {} = False Negatives'.format(FP,C0T,FN,C1T))
    
    print(colored('\nClasicfication Report\n','blue',attrs=['bold','underline']))
    print(classification_report(y_test,grid_predictions))
    
    print(colored('Code Completed!!','blue',attrs=['bold','underline']))

In [None]:
Model_analysis2()
#https://realpython.com/creating-modifying-pdf/#creating-a-pdf-file-from-scratch