# Hard SVM (Support vector machine) using convex Quadratic program 

## Step 1: Import All required Libraries

In [102]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets.samples_generator import make_blobs
from cvxopt import matrix as cvxopt_matrix
from cvxopt import solvers as cvxopt_solvers
import matplotlib.pyplot as plt

## Step 2: Read CSV file (For this i have uploded the file on google colab) 

In [103]:
df = pd.read_csv("diabetes.csv")
print(df.head(),df.shape)

   Pregnancies  Glucose  BloodPressure  ...  DiabetesPedigreeFunction  Age  Outcome
0            6      148             72  ...                     0.627   50        1
1            1       85             66  ...                     0.351   31        0
2            8      183             64  ...                     0.672   32        1
3            1       89             66  ...                     0.167   21        0
4            0      137             40  ...                     2.288   33        1

[5 rows x 9 columns] (768, 9)


## Step 3: Store the features and target in X , Y seperately and since target has value 0,1 change label 0 to -1 

In [104]:
X = df.iloc[:,:-2].to_numpy()
y = df.iloc[:,-1:].to_numpy()
print(X.shape,X)
y[y==0]=-1
print(y.shape,y)


(768, 7) [[  6.    148.     72.    ...   0.     33.6     0.627]
 [  1.     85.     66.    ...   0.     26.6     0.351]
 [  8.    183.     64.    ...   0.     23.3     0.672]
 ...
 [  5.    121.     72.    ... 112.     26.2     0.245]
 [  1.    126.     60.    ...   0.     30.1     0.349]
 [  1.     93.     70.    ...   0.     30.4     0.315]]
(768, 1) [[ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1

## Step 4: Processing the Data and Splitting


In [105]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_scale

array([[0.35294118, 0.74371859, 0.59016393, ..., 0.        , 0.50074516,
        0.23441503],
       [0.05882353, 0.42713568, 0.54098361, ..., 0.        , 0.39642325,
        0.11656704],
       [0.47058824, 0.91959799, 0.52459016, ..., 0.        , 0.34724292,
        0.25362938],
       ...,
       [0.29411765, 0.6080402 , 0.59016393, ..., 0.13238771, 0.390462  ,
        0.07130658],
       [0.05882353, 0.63316583, 0.49180328, ..., 0.        , 0.4485842 ,
        0.11571307],
       [0.05882353, 0.46733668, 0.57377049, ..., 0.        , 0.45305514,
        0.10119556]])

A. Split the data in 70,30 for training and testing the model 

In [106]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, y, test_size=0.3, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(537, 7) (231, 7) (537, 1) (231, 1)


B. Split the data in 80,20 for training and testing the model 

In [107]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, y, test_size=0.2, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(614, 7) (154, 7) (614, 1) (154, 1)


C. Split the data in 90,10 for training and testing the model 

In [108]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, y, test_size=0.1, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(691, 7) (77, 7) (691, 1) (77, 1)


## Step 5: Optimizing the weight vector w using the cvxopt_solvers.
### Create H where Hi,j=y(i)y(j)<x(i)x(j)>
### Calculate w=∑y(i)α(i)x(i)
### Determine the set of support vectors S by finding the indices such that αi>0
### For each new point x′ classify according to y′=sign(wTx′+b)
### as this is hard svm there is no need for y*alpha=0

In [109]:
# Initializing values of m,n and computing gram matrix.
samples,features = X_train.shape
y_train = y_train.reshape(-1,1)
H = np.dot((y_train*X_train),(y_train*X_train).T)

# Converting into cvxopt format
P = cvxopt_matrix(H)
q = cvxopt_matrix(np.repeat([-1.0], samples)[..., None])
G = cvxopt_matrix(np.negative(np.eye(samples)))
h = cvxopt_matrix(np.zeros(samples))
A = cvxopt_matrix(y_train.reshape(1, -1)*1.0)
b = cvxopt_matrix(0.0)

## Step 6: Running the solver and printing the values of solution 

In [110]:
sol = cvxopt_solvers.qp(P, q, G, h, A, b)
alphas = np.array(sol['x'])
print(sol,alphas.flatten()) 

     pcost       dcost       gap    pres   dres
 0: -4.4314e+02 -1.5109e+03  5e+03  5e+01  3e+00
 1: -2.0094e+03 -4.3406e+03  4e+03  3e+01  2e+00
 2: -6.4312e+03 -7.9758e+03  2e+03  2e+01  1e+00
 3: -1.3108e+04 -1.5644e+04  3e+03  2e+01  1e+00
 4: -2.7692e+04 -3.2112e+04  4e+03  2e+01  1e+00
 5: -1.2785e+05 -1.3806e+05  1e+04  2e+01  1e+00
 6: -6.1251e+05 -6.4769e+05  4e+04  2e+01  1e+00
 7: -5.4756e+06 -5.6541e+06  2e+05  2e+01  1e+00
 8: -1.3827e+08 -1.3932e+08  1e+06  2e+01  1e+00
 9: -1.7425e+10 -1.7432e+10  7e+06  2e+01  1e+00
10: -1.9981e+12 -1.9989e+12  8e+08  2e+01  1e+00
Terminated (singular KKT matrix).
{'x': <691x1 matrix, tc='d'>, 'y': <1x1 matrix, tc='d'>, 's': <691x1 matrix, tc='d'>, 'z': <691x1 matrix, tc='d'>, 'status': 'unknown', 'gap': 821061350.0206683, 'relative gap': 0.00041091896479184865, 'primal objective': -1998110139395.921, 'dual objective': -1998931200745.7966, 'primal infeasibility': 15.934059192561486, 'dual infeasibility': 1.0004119528373405, 'primal slac

## Calculating W and b Displaying results

In [111]:
# w parameter using alphas 
w = np.dot((y_train * alphas).T,X_train)[0]

# Selecting the set of indices S corresponding to non zero parameters
S = (alphas > 1e-4).flatten()

#Computing b of non zero paprameters
b = y_train[S] - np.dot(X_train[S], w.reshape(-1,1))
b = sum(b)/len(b)

# Display results
print('Alphas = ',alphas[alphas > 1e-4])
print('w = ', w.flatten())
print('b = ', b)

Alphas =  [2.45923734e+09 1.52831485e+09 2.27807541e+09 2.47602079e+09
 2.10146453e+09 4.06583400e+09 5.01050663e+09 2.01947490e+09
 5.48642982e+09 2.49299776e+09 2.83498234e+09 2.32431238e+09
 2.00086577e+09 1.45199712e+09 1.29654856e+09 1.94196965e+09
 1.56137872e+09 2.38948028e+09 2.95692967e+09 1.96065748e+09
 1.91615852e+09 1.77976143e+09 1.63246830e+09 1.90663744e+09
 1.91814413e+09 2.49261226e+09 1.93601766e+09 1.63365405e+09
 1.61954190e+09 2.96204544e+09 3.11301186e+09 6.00923949e+09
 1.50327555e+09 4.30775924e+09 1.30685989e+09 2.49743018e+09
 1.77515357e+09 5.80731625e+10 1.31902977e+09 1.79967204e+09
 4.55364756e+09 2.09478343e+09 4.02415218e+09 1.80351911e+09
 1.67602394e+09 3.98076515e+09 2.26901467e+09 1.61421973e+09
 1.94648973e+09 2.07879688e+09 3.41647505e+09 3.39312662e+09
 2.11622979e+09 1.47408382e+09 1.34804223e+09 3.50207512e+09
 2.42281332e+09 1.72926853e+09 2.00523316e+09 1.71615490e+09
 4.18627453e+09 9.24064748e+09 1.93177234e+09 1.36603528e+09
 1.64260474e+0

## Printing all support vectors

In [112]:
X_Support_vectors = X_train[S]
y_support_vectors = y_train[S]
print(alphas,'\n',"Support vectors are : = ",X_Support_vectors,'\n','\n',"Number of Support vectors are: ",len(X_Support_vectors))

[[2.45923734e+09]
 [1.52831485e+09]
 [2.27807541e+09]
 [2.47602079e+09]
 [2.10146453e+09]
 [4.06583400e+09]
 [5.01050663e+09]
 [2.01947490e+09]
 [5.48642982e+09]
 [2.49299776e+09]
 [2.83498234e+09]
 [2.32431238e+09]
 [2.00086577e+09]
 [1.45199712e+09]
 [1.29654856e+09]
 [1.94196965e+09]
 [1.56137872e+09]
 [2.38948028e+09]
 [2.95692967e+09]
 [1.96065748e+09]
 [1.91615852e+09]
 [1.77976143e+09]
 [1.63246830e+09]
 [1.90663744e+09]
 [1.91814413e+09]
 [2.49261226e+09]
 [1.93601766e+09]
 [1.63365405e+09]
 [1.61954190e+09]
 [2.96204544e+09]
 [3.11301186e+09]
 [6.00923949e+09]
 [1.50327555e+09]
 [4.30775924e+09]
 [1.30685989e+09]
 [2.49743018e+09]
 [1.77515357e+09]
 [5.80731625e+10]
 [1.31902977e+09]
 [1.79967204e+09]
 [4.55364756e+09]
 [2.09478343e+09]
 [4.02415218e+09]
 [1.80351911e+09]
 [1.67602394e+09]
 [3.98076515e+09]
 [2.26901467e+09]
 [1.61421973e+09]
 [1.94648973e+09]
 [2.07879688e+09]
 [3.41647505e+09]
 [3.39312662e+09]
 [2.11622979e+09]
 [1.47408382e+09]
 [1.34804223e+09]
 [3.502075

## Step 7: Creating target Array and calculating the target values.

In [113]:
target = np.where(np.dot(X_test,w)+b>=0.0,1,-1)
t = np.where(np.dot(X_train,w)+b>=0.0,1,-1)

## Step 8: Checking for Accuracy. 

In [114]:
v1 = 0  
v2 = 0
c = 0
i=0    
for c in range( np.size( target ) ) :  
  if y_test[c] == target[c] :            
    v1 = v1 + 1
for i in range( np.size( t ) ) :
  if y_train[i] == t[i] :            
    v2 = v2 + 1
  i = i + 1
print("Accuracy of Defined Model on test data :",(v1/c)*100)     
print("Accuracy of Defined Model on train data:",(v2/i)*100)   

Accuracy of Defined Model on test data : 63.1578947368421
Accuracy of Defined Model on train data: 65.41244573082489
