In [None]:
### Importation des bibliothèques nécessaires
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

<h1>Importation des données:</h1>

In [None]:
### Simulation de données en 2D séparables linéairement
#heartData = pd.read_csv('heart_statlog.csv')
heartData = pd.read_csv('HeartData.csv', header= None)
# Renommage des colonnes 
heartData.columns = ['age', 'sex', 'chest_pain', 'blood_press', 
                     'serum_chol', 'blood_sugar', 'electrocard', 
                     'max_heart_rate', 'induced_ang', 'oldpeak', 
                     'peak_st_seg', 'major_ves', 'thal', 'presence']

# Decoupage de dataset pour les features et label
x = heartData.drop('presence', axis=1)
y = heartData['presence'].copy()

heartData.head(3)

<h1>Découpage et normalization des données:</h1>

In [None]:
from sklearn.model_selection import train_test_split  
# Split des données à entrainer et à tester
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, shuffle=False,random_state=0)  


In [None]:
from sklearn.preprocessing import StandardScaler    
# Normalisation des données avec meanScaler(StandardScaler)
meanS_x= StandardScaler()    
x_train= meanS_x.fit_transform(x_train)  
x_test= meanS_x.transform(x_test)  

In [None]:
def plot_boundary(classifier, X, y):
    import seaborn as sns
    from matplotlib.colors import ListedColormap, LinearSegmentedColormap
    X1, X2 = np.meshgrid(np.arange(start = X[:, 4].min() - 1, stop = X[:, 4].max() + 1, step = 0.01),
                     np.arange(start = X[:, 7].min() - 1, stop = X[:, 7].max() + 1, step = 0.01))
    Xpred = np.array([X1.ravel(), X2.ravel()] + [np.repeat(0, X1.ravel().size) for _ in range(11)]).T
# Xpred now has a grid for x1 and x2 and average value (0) for x3 through x13
    pred = classifier.predict(Xpred).reshape(X1.shape)   # is a matrix of 0's and 1's !
    plt.contourf(X1, X2, pred,
             alpha = 0.75,   cmap = 'winter')
    sns.scatterplot(X[:,4], 
                X[:,7], 
                hue=y, 
                cmap = 'coolwarm',
                s=30,style=y);
    plt.title('Score : ' + str(classifier.score(X,y)))
    plt.xlabel('Cholesterol')
    plt.ylabel('Blood_pressure')

<h1>GridSearchCV RBF estimation of best model</h1>

In [None]:
### Procédure d'estimation d'un SVM pour différentes valeurs de C et 
### -> différentes fenêtres du noyau RBF (gamma)
### -> GridSearchCV

from sklearn import svm
parameters = {'kernel': ['rbf'], 'gamma': np.logspace(-2, 1, 4), 'C': np.logspace(0, 4, 5)}
svcRBF = svm.SVC(kernel='rbf',class_weight='balanced')
grid = GridSearchCV(svcRBF, parameters, refit = True,cv=10)

grid.fit(x_train, y_train)
print("Accuracy est : ", grid.score(x_train, y_train))
print("Les meilleurs parametres sont : ",grid.best_params_)



In [None]:
plot_boundary(grid,x_train,y_train)
plt.savefig("RBF.png") 

plt.show()

In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
import seaborn as sns

# realisation de matrice de confusion
y_pred = grid.predict(x_test)
tn, fp, fn, tp=confusion_matrix(y_test,y_pred).ravel()
print ("tn :" ,tn," fp :",fp," fn: ",fn," tp :",tp)

# print classification report
print(classification_report(y_test, y_pred))
print("Le taux d'erreur est : ",(fp+fn)/(tn+tp+fp+fn))

cf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion matrix")
print(cf_matrix)
sns.heatmap(cf_matrix, annot=True)


<h1>GridSearchCV POLY estimation of best model</h1>

In [None]:
parameters = {'kernel': ['poly'], 'degree': [1,2,3,4,5,6,7], 'C': np.logspace(-2, 2, 5)}
from sklearn import svm

svcPoly = svm.SVC(kernel='poly',class_weight='balanced')

grid_poly = GridSearchCV(svcPoly, parameters, refit = True,cv=5,return_train_score=True,verbose=4)

grid_poly.fit(x_train, y_train)



In [None]:
print("Accuracy of train est : ",grid_poly.score(x_train, y_train))
print("Le meilleur parametre est : ",grid_poly.best_params_)


In [None]:
plot_boundary(grid_poly,x_train,y_train)
plt.savefig("POLY.png") 

plt.show()


In [None]:
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

y_pred = grid_poly.predict(x_test)
tn, fp, fn, tp=confusion_matrix(y_test,y_pred).ravel()
print ("tn :" ,tn," fp :",fp," fn: ",fn," tp :",tp)

# print classification report
print(classification_report(y_test, y_pred))
print("Le taux d'erreur est : ",(fp+fn)/(tn+tp+fp+fn))

cf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion matrix")
print(cf_matrix)
sns.heatmap(cf_matrix, annot=True)