In [57]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# scratch-1

In [60]:
# loading the data from csv file to pandas dataframe
diabetes_data = pd.read_csv('diabetes.csv')
features = diabetes_data.drop(columns='Outcome', axis=1)
target = diabetes_data['Outcome']

scaler = StandardScaler()
features = scaler.fit_transform(features)

X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size=0.2, random_state = 2)


In [61]:
print(X_train.shape)
# print(type(Y_train))
# to_numpy(Y_test)
Y_train = Y_train.to_numpy()
Y_test = Y_test.to_numpy()


(614, 8)


In [62]:
class SVM_classifier():


  # initiating the hyperparameters
  def __init__(self, learning_rate, no_of_iterations, lambda_parameter):

    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.lambda_parameter = lambda_parameter


  
  # fitting the dataset to SVM Classifier
  def fit(self, X, Y):

    # m  --> number of Data points --> number of rows
    # n  --> number of input features --> number of columns
    self.m, self.n = X.shape

    self.w = np.zeros(self.n)
    self.b = 0
    self.X = X
    self.Y = Y

    for i in range(self.no_of_iterations):
      self.update_weights()


  def update_weights(self):

    y_label = np.where(self.Y <= 0, -1, 1)
    # print(y_label)

    for index, x_i in enumerate(self.X):

      # print(index, x_i)
      condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1

      if (condition == True):
        dw = 2 * self.lambda_parameter * self.w
        db = 0

      else:
        dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])
        db = y_label[index]

      self.w = self.w - self.learning_rate * dw
      self.b = self.b - self.learning_rate * db

  # predict the label for a given input value
  def predict(self, X):

    output = np.dot(X, self.w) - self.b
    # print(output)
    predicted_labels = np.sign(output)
    # print(predicted_labels)
    y_hat = np.where(predicted_labels <= -1, 0, 1)
    # print(y_hat)
      
    y_hat = np.where(output < 0, 0, 1)
    return y_hat  



In [63]:
classifier = SVM_classifier(learning_rate=0.001, no_of_iterations=1000, lambda_parameter=0.01)

classifier.fit(X_train, Y_train)
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(Y_train, X_train_prediction)
training_data_accuracy

0.7768729641693811

In [64]:
# accuracy on training data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(Y_test, X_test_prediction)
test_data_accuracy

0.7532467532467533

# sratch-2

In [65]:
class SVM:

    def __init__(self, C = 1.0):
        # C = error term
        self.C = C
        self.w = 0
        self.b = 0

    # Hinge Loss Function / Calculation
    def hingeloss(self, w, b, x, y):
        # Regularizer term
        reg = 0.5 * (w * w)

        for i in range(x.shape[0]):
            # Optimization term
            opt_term = y[i] * ((np.dot(w, x[i])) + b)

            # calculating loss
            loss = reg + self.C * max(0, 1-opt_term)
        return loss[0][0]

    def fit(self, X, Y, batch_size=100, learning_rate=0.001, epochs=1000):
        # The number of features in X
        number_of_features = X.shape[1]

        # The number of Samples in X
        number_of_samples = X.shape[0]

        c = self.C

        # Creating ids from 0 to number_of_samples - 1
        ids = np.arange(number_of_samples)

        # Shuffling the samples randomly
        np.random.shuffle(ids)

        # creating an array of zeros
        w = np.zeros((1, number_of_features))
        b = 0
        losses = []

        # Gradient Descent logic
        for i in range(epochs):
            # Calculating the Hinge Loss
            l = self.hingeloss(w, b, X, Y)

            # Appending all losses 
            losses.append(l)
            
            # Starting from 0 to the number of samples with batch_size as interval
            for batch_initial in range(0, number_of_samples, batch_size):
                gradw = 0
                gradb = 0

                for j in range(batch_initial, batch_initial+ batch_size):
                    if j < number_of_samples:
                        x = ids[j]
                        ti = Y[x] * (np.dot(w, X[x].T) + b)

                        if ti > 1:
                            gradw += 0
                            gradb += 0
                        else:
                            # Calculating the gradients

                            #w.r.t w 
                            gradw += c * Y[x] * X[x]
                            # w.r.t b
                            gradb += c * Y[x]

                # Updating weights and bias
                w = w - learning_rate * w + learning_rate * gradw
                b = b + learning_rate * gradb
        
        self.w = w
        self.b = b

        return self.w, self.b, losses

    def predict(self, X):
        
        prediction = np.dot(X, self.w[0]) + self.b # w.x + b
        return np.sign(prediction)

In [66]:
# from sklearn import datasets
# import matplotlib.pyplot as plt
# import numpy as np
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split
# # from svm import SVM

# # Creating dataset
# X, y = datasets.make_blobs(

#         n_samples = 100, # Number of samples
#         n_features = 2, # Features
#         centers = 2,
#         cluster_std = 1,
#         random_state=40
#     )

# # Classes 1 and -1
# y = np.where(y == 0, -1, 1)
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)


# print(X_train.shape)
# print(y_train.shape)
# type(y_train)

svm = SVM()
w, b, losses = svm.fit(X_train, Y_train)
prediction = svm.predict(X_test)
lss = losses.pop()

print("Loss:", lss)
print("Prediction:", prediction)
print("Accuracy:", accuracy_score(prediction, Y_test))
print("w, b:", [w, b])


Loss: 1.0000000038423387
Prediction: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Accuracy: 0.2922077922077922
w, b: [array([[ 8.70504930e-05,  9.12105684e-05, -4.10998963e-05,
         1.88501133e-06, -5.22089253e-05,  9.40720649e-05,
         3.00003915e-05,  6.56184708e-05]]), 1.180999999999997]


In [56]:
# losses

# sklearn

In [70]:
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification

clf = make_pipeline(
                    LinearSVC(random_state=0, tol=1e-5))

clf.fit(X_train, Y_train)




In [71]:
pred = clf.predict(X_test)
print(clf.score(X_test, Y_test))


0.7662337662337663
