In [85]:
import numpy as np
from scipy.optimize import minimize, Bounds, LinearConstraint
import matplotlib.pyplot as plt
import pandas as pd


data=pd.read_csv('../input/heartdisease-2/heart_disease.csv')
labels=data.values[:,-1]
labels[labels>1]=1
labels=labels.astype(int)

data=data.values[:,:-1]

data=np.hstack((np.ones((len(data),1)),data))
data=(data-np.mean(data))/np.std(data)

In [86]:
def polynomial_kernel(A, B):
    degree=3
    gamma=1 / (A.shape[1] * np.var(A))
    coef0=0.0
    return (gamma * np.dot(A, B.T) + coef0) ** degree
def rbf_kernel(A, B):
    gamma=1 / (A.shape[1] * np.var(A))
    return np.exp(-gamma * ((A[:,np.newaxis] - B) ** 2 ).sum(axis=2))
def linear_kernel(A, B):
    return np.dot(A, B.T)

from sklearn.svm import SVC

#you can use your custom kernel as an input like this --> svm=SVC(kernel=polynomial_kernel)

In [87]:
from sklearn.model_selection import train_test_split
from sklearn import metrics
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.3, random_state=109)

In [88]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate
cv = KFold(n_splits=5)

In [89]:
svm_my_linear = SVC(kernel=linear_kernel)
svm_my_linear.fit(X_train, y_train)
y_pred_ml = svm_my_linear.predict(X_test)

svm_linear = SVC(kernel='linear')
svm_linear.fit(X_train, y_train)
y_pred_l = svm_linear.predict(X_test)

print("Accuracy SVM with my linear:",metrics.accuracy_score(y_test, y_pred_ml))
print("Accuracy SVM with original linear:",metrics.accuracy_score(y_test, y_pred_l))

Accuracy SVM with my linear: 0.7142857142857143
Accuracy SVM with original linear: 0.7142857142857143


In [90]:
svm_my_poly = SVC(kernel=polynomial_kernel)
svm_my_poly.fit(X_train, y_train)
y_pred_mp = svm_my_poly.predict(X_test)

svm_poly = SVC(kernel='poly')
svm_poly.fit(X_train, y_train)
y_pred_p = svm_poly.predict(X_test)

print("Accuracy SVM with my poly:",metrics.accuracy_score(y_test, y_pred_mp))
print("Accuracy SVM with original poly:",metrics.accuracy_score(y_test, y_pred_p))

Accuracy SVM with my poly: 0.6923076923076923
Accuracy SVM with original poly: 0.7142857142857143


In [91]:
svm_my_rbf = SVC(kernel=rbf_kernel)
svm_my_rbf.fit(X_train, y_train)
y_pred_mrbf = svm_my_rbf.predict(X_test)

svm_rbf = SVC(kernel='rbf')
svm_rbf.fit(X_train, y_train)
y_pred_rbf = svm_rbf.predict(X_test)

print("Accuracy SVM with my rbf:",metrics.accuracy_score(y_test, y_pred_mrbf))
print("Accuracy SVM with original rbf:",metrics.accuracy_score(y_test, y_pred_rbf))

Accuracy SVM with my rbf: 0.6923076923076923
Accuracy SVM with original rbf: 0.6923076923076923


In [92]:
# svm_my_linear = SVC(kernel=linear_kernel)
# ml_scores = cross_validate(svm_my_linear, data, labels, scoring='accuracy', cv=cv, return_train_score=True)
# print('%i fold SVM with my linear Accuracy train score: %.3f (%.3f)' % (5, np.mean(ml_scores['train_score']), np.std(ml_scores['train_score'])))
# print('%i fold SVM with my linear Accuracy test score: %.3f (%.3f)' % (5, np.mean(ml_scores['test_score']), np.std(ml_scores['test_score'])))

# svm_linear = SVC(kernel='linear')
# l_scores = cross_validate(svm_linear, data, labels, scoring='accuracy', cv=cv, return_train_score=True)
# print('%i fold SVM with original linear Accuracy train score: %.3f (%.3f)' % (5, np.mean(l_scores['train_score']), np.std(l_scores['train_score'])))
# print('%i fold SVM with original linear Accuracy test score: %.3f (%.3f)' % (5, np.mean(l_scores['test_score']), np.std(l_scores['test_score'])))

5 fold SVM with my linear Accuracy train score: 0.716 (0.015)
5 fold SVM with my linear Accuracy test score: 0.706 (0.058)
5 fold SVM with original linear Accuracy train score: 0.716 (0.015)
5 fold SVM with original linear Accuracy test score: 0.706 (0.058)


In [93]:
# svm_my_poly = SVC(kernel=polynomial_kernel)
# mp_scores = cross_validate(svm_my_poly, data, labels, scoring='accuracy', cv=cv, return_train_score=True)
# print('%i fold SVM with my poly Accuracy train score: %.3f (%.3f)' % (5, np.mean(mp_scores['train_score']), np.std(mp_scores['train_score'])))
# print('%i fold SVM with my poly Accuracy test score: %.3f (%.3f)' % (5, np.mean(mp_scores['test_score']), np.std(mp_scores['test_score'])))

# svm_poly = SVC(kernel='poly')
# p_scores = cross_validate(svm_poly, data, labels, scoring='accuracy', cv=cv, return_train_score=True)
# print('%i fold SVM with original poly Accuracy train score: %.3f (%.3f)' % (5, np.mean(p_scores['train_score']), np.std(p_scores['train_score'])))
# print('%i fold SVM with original poly Accuracy test score: %.3f (%.3f)' % (5, np.mean(p_scores['test_score']), np.std(p_scores['test_score'])))

5 fold SVM with my poly Accuracy train score: 0.679 (0.011)
5 fold SVM with my poly Accuracy test score: 0.647 (0.074)
5 fold SVM with original poly Accuracy train score: 0.679 (0.011)
5 fold SVM with original poly Accuracy test score: 0.670 (0.069)


In [94]:
# svm_my_rbf = SVC(kernel=rbf_kernel)
# mrbf_scores = cross_validate(svm_my_rbf, data, labels, scoring='accuracy', cv=cv, return_train_score=True)
# print('%i fold SVM with my poly Accuracy train score: %.3f (%.3f)' % (5, np.mean(mrbf_scores['train_score']), np.std(mrbf_scores['train_score'])))
# print('%i fold SVM with my poly Accuracy test score: %.3f (%.3f)' % (5, np.mean(mrbf_scores['test_score']), np.std(mrbf_scores['test_score'])))

# svm_rbf = SVC(kernel='rbf')
# rbf_scores = cross_validate(svm_rbf, data, labels, scoring='accuracy', cv=cv, return_train_score=True)
# print('%i fold SVM with original poly Accuracy train score: %.3f (%.3f)' % (5, np.mean(rbf_scores['train_score']), np.std(rbf_scores['train_score'])))
# print('%i fold SVM with original poly Accuracy test score: %.3f (%.3f)' % (5, np.mean(rbf_scores['test_score']), np.std(rbf_scores['test_score'])))

5 fold SVM with my poly Accuracy train score: 0.673 (0.014)
5 fold SVM with my poly Accuracy test score: 0.653 (0.084)
5 fold SVM with original poly Accuracy train score: 0.673 (0.014)
5 fold SVM with original poly Accuracy test score: 0.653 (0.084)


In [96]:
C_values = [0.001,0.01,0.1,0.25,0.5,0.9,1,5,10,25,42,50,100,200,500,1000,10000]
for c in C_values:
    svm_my_linear_c = SVC(C=c,kernel=linear_kernel)
    mlc_scores = cross_validate(svm_my_linear_c, data, labels, scoring='accuracy', cv=cv, return_train_score=True)
    print('%i fold, C=%.2f, SVM with my linear Accuracy train score: %.3f' % (5, c, np.mean(mlc_scores['train_score'])))
    print('%i fold, C=%.2f, SVM with my linear Accuracy test score: %.3f' % (5, c, np.mean(mlc_scores['test_score'])))

5 fold, C=0.00, SVM with my linear Accuracy train score: 0.541
5 fold, C=0.00, SVM with my linear Accuracy test score: 0.541
5 fold, C=0.01, SVM with my linear Accuracy train score: 0.541
5 fold, C=0.01, SVM with my linear Accuracy test score: 0.541
5 fold, C=0.10, SVM with my linear Accuracy train score: 0.659
5 fold, C=0.10, SVM with my linear Accuracy test score: 0.663
5 fold, C=0.25, SVM with my linear Accuracy train score: 0.688
5 fold, C=0.25, SVM with my linear Accuracy test score: 0.676
5 fold, C=0.50, SVM with my linear Accuracy train score: 0.703
5 fold, C=0.50, SVM with my linear Accuracy test score: 0.676
5 fold, C=0.90, SVM with my linear Accuracy train score: 0.712
5 fold, C=0.90, SVM with my linear Accuracy test score: 0.703
5 fold, C=1.00, SVM with my linear Accuracy train score: 0.716
5 fold, C=1.00, SVM with my linear Accuracy test score: 0.706
5 fold, C=5.00, SVM with my linear Accuracy train score: 0.752
5 fold, C=5.00, SVM with my linear Accuracy test score: 0.729
