In [10]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sympy as sp
from sklearn import svm

from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import classification_report

In [11]:
# Importing the dataset
dataset = pd.read_csv('D:\SEM3\PYTHON\Pml files\seattle-weather.csv')

# if its rain 1 else -1
dataset['weather'] = dataset['weather'].apply(lambda x: 1 if x == 'rain' else -1)
dataset.drop(['date'], axis=1, inplace=True)
print(dataset.shape)
dataset.head()
print(np.unique(dataset['weather']))

(1461, 5)
[-1  1]


$
\underset{\omega \gamma \xi }{min}(\frac{1}{2}{\omega }^{T}\omega +c\sum _{i=0}^{n}{\xi }_{i})
$

$
{d}_{i}[{\omega }^{T}{x}_{i}-\gamma ]+{\xi }_{i}\ge 1
$

In [6]:
x_i = dataset.iloc[:, 0:4].values # these are the features
d_i = dataset.iloc[:, 4].values # these are the labels
d_i = d_i.astype(np.float64) # converting to float64
print(np.unique(d_i))

[-1.  1.]


In [7]:
import cvxpy as cp
import numpy as np

c = 0.6

# Define the optimization variables
omega = cp.Variable((x_i.shape[1]))
gamma = cp.Variable()
xi = cp.Variable((x_i.shape[0]))

# Define the objective function
obj = cp.Minimize(0.5 * cp.sum_squares(omega) + c * cp.sum(xi))

# Define the constraints
constraints = [d_i[i] * (omega.T @ x_i[i, :] - gamma) + xi[i] >= 1 for i in range(x_i.shape[0])]
constraints += [xi >= 0]

# Define the problem and solve it
prob = cp.Problem(obj, constraints)
prob.solve()

# Print the optimal x
print("The optimal omega is")
print(omega.value)

# Print the optimal dual variable for a constraint
print("A dual variable is")
print(constraints[0].dual_value)

# Print the status of the problem
print("The status of the problem is", prob.status)


The optimal omega is
[ 8.69565841e-01  1.07594277e-07 -8.79511035e-08 -1.68444917e-07]
A dual variable is
0.4456947245378142
The status of the problem is optimal


In [None]:
# find the prediction using the sign function
y_pred = np.sign(x_i @ omega.value - gamma.value)
print(y_pred)

# find the accuracy
print("Accuracy:", accuracy_score(d_i, y_pred))

# find the confusion matrix
print("Confusion Matrix:", confusion_matrix(d_i, y_pred))

# find the precision
print("Precision:", precision_score(d_i, y_pred))

# find the recall
print("Recall:", recall_score(d_i, y_pred))

# find the f1 score
print("F1 Score:", f1_score(d_i, y_pred))


[-1.  1. -1. ... -1. -1. -1.]
Accuracy: 0.8583162217659137
Confusion Matrix: [[797  23]
 [184 457]]
Precision: 0.9520833333333333
Recall: 0.7129485179407177
F1 Score: 0.8153434433541481


In [9]:
from sklearn import svm

# Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel

# Train the model using the training sets
clf.fit(x_i, d_i)

# Predict the response for test dataset
y_pred = clf.predict(x_i)

# Model Accuracy: how often is the classifier correct?
print("Accuracy:",accuracy_score(d_i, y_pred))




Accuracy: 0.8583162217659137
