In [2]:
# Import all the necessary libraries for support vector

import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [3]:
# datafiles = ['../project/dataset1.csv', '../project/dataset2.csv', '../project/dataset3.csv', '../project/dataset4.csv', '../project/dataset5.csv']
datafiles = ['../project/dataset1.csv']
n_samples = 50000           # Number of samples
n_dataset = len(datafiles)  # Number of datasets
train = 0.9   # 245000 (77.777...%)
test = 0.1    # 35000 (10%)

# Split all datasets
datasets = np.empty((int(n_samples*n_dataset),5))
print(datasets.shape)
for i,datafile in enumerate(datafiles):
  dataset = np.loadtxt(datafile, delimiter=',')
  datasets[int(i*n_samples):int(n_samples*(i+1)),:] = dataset
print(datasets[0,0])
# Split into input and output
Cr = datasets[:,0:4]   # Input: relative entropy of coherence
ES = np.array([float(y) for y in datasets[:,4]])  # Output: entangled/separable

# Count number of entangled and separable states
n_entangled = len([es for es in ES if es == 1])
print(f'Number of entangled states: {n_entangled}\n')
print(f'Number of separable states: {n_samples*n_dataset-n_entangled}\n')
# Split into train-and-validation and test
#break datasets into train and test

Cr_train, Cr_test, ES_train, ES_test = train_test_split(Cr, ES, test_size=test,
                                                      random_state=42)

(50000, 5)
0.195132062642601
Number of entangled states: 25624

Number of separable states: 24376



In [3]:
from sklearn.model_selection import GridSearchCV

# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100, 1000],
			'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
			'kernel': ['linear']}

grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)

# fitting the model for grid search
grid.fit(Cr_train, ES_train)


Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.968 total time=   3.6s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.970 total time=   3.6s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.972 total time=   3.7s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.974 total time=   3.8s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.972 total time=   3.8s
[CV 1/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.968 total time=   3.8s
[CV 2/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.970 total time=   3.8s
[CV 3/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.972 total time=   3.9s
[CV 4/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.974 total time=   3.9s
[CV 5/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.972 total time=   4.2s
[CV 1/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.968 total time=   4.2s
[CV 2/5] END ..C=0.1, gamma=0.01, kernel=linear

In [4]:
# print best parameter after tuning
print(grid.best_params_)

# print how our model looks after hyper-parameter tuning
print(grid.best_estimator_)


{'C': 100, 'gamma': 1, 'kernel': 'linear'}
SVC(C=100, gamma=1, kernel='linear')


In [14]:
# datafiles = ['../project/dataset1.csv', '../project/dataset2.csv', '../project/dataset3.csv', '../project/dataset4.csv', '../project/dataset5.csv']
datafiles = ['../project/dataset1.csv', '../project/dataset2.csv', '../project/dataset3.csv', '../project/dataset4.csv', '../project/dataset5.csv']
n_samples = 50000           # Number of samples
n_dataset = len(datafiles)  # Number of datasets
train = 0.9   # 245000 (77.777...%)
test = 0.1    # 35000 (10%)

# Split all datasets
datasets = np.empty((int(n_samples*n_dataset),5))
print(datasets.shape)
for i,datafile in enumerate(datafiles):
  dataset = np.loadtxt(datafile, delimiter=',')
  datasets[int(i*n_samples):int(n_samples*(i+1)),:] = dataset
print(datasets[0,0])
# Split into input and output
Cr = datasets[:,0:4]   # Input: relative entropy of coherence
ES = np.array([float(y) for y in datasets[:,4]])  # Output: entangled/separable

# Count number of entangled and separable states
n_entangled = len([es for es in ES if es == 1])
print(f'Number of entangled states: {n_entangled}\n')
print(f'Number of separable states: {n_samples*n_dataset-n_entangled}\n')
# Split into train-and-validation and test
#break datasets into train and test

Cr_train, Cr_test, ES_train, ES_test = train_test_split(Cr, ES, test_size=test,
                                                      random_state=42, shuffle=True)

(250000, 5)
0.195132062642601
Number of entangled states: 127330

Number of separable states: 122670



In [10]:
model = SVC(C=100, gamma=1, kernel='linear')
model.fit(Cr_train, ES_train)

In [11]:
# Evaluate the model
accuracy = model.score(Cr_test, ES_test)
print(f'Accuracy: {accuracy}\n')

Accuracy: 0.9892666666666666



In [12]:
W = model.coef_[0]
I = model.intercept_
print(W)
print(I)

[ 76.66673997 -62.35154383  73.01357178  37.16411006]
[-16.33855603]


In [13]:
print('w = ',model.coef_)
print('b = ',model.intercept_)
print('Indices of support vectors = ', model.support_)
print('Support vectors = ', model.support_vectors_)
print('Number of support vectors for each class = ', model.n_support_)
print('Coefficients of the support vector in the decision function = ', np.abs(model.dual_coef_))


w =  [[ 76.66673997 -62.35154383  73.01357178  37.16411006]]
b =  [-16.33855603]
Indices of support vectors =  [   127    190    212 ... 134733 134783 134959]
Support vectors =  [[1.37574850e-01 1.09112013e-01 1.37574850e-01 7.28455051e-02]
 [4.77219385e-01 8.94479733e-01 4.77219385e-01 8.05329893e-04]
 [4.14841205e-01 7.51627556e-01 4.14841205e-01 8.04158526e-03]
 ...
 [2.27780606e-01 3.00931044e-01 2.27780606e-01 5.39460376e-02]
 [2.58562844e-01 3.79431568e-01 2.58562844e-01 5.61643659e-02]
 [3.01283260e-01 4.73584843e-01 3.01283260e-01 4.08960779e-02]]
Number of support vectors for each class =  [1827 1827]
Coefficients of the support vector in the decision function =  [[100. 100. 100. ... 100. 100. 100.]]
