# Soft SVM (Support vector machine) using Stochastic Gradient Descent
# min(w) (||w||^2 + hinge_loss(w)) 
# where hinge_loss((w, b),(x, y)) = 1/m(summation_over_m(max{0, 1 − y(hw, xi + b)}))


## Step 1: Import All required Libraries

In [38]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Step 2: Read CSV file (For this i have uploded the file on google colab) 

In [39]:
df = pd.read_csv('file.csv')
print(df.shape,df)

(2665, 6)       Temperature   Humidity       Light          CO2  HumidityRatio  Occupancy
0       23.700000  26.272000  585.200000   749.200000       0.004764          1
1       23.718000  26.290000  578.400000   760.400000       0.004773          1
2       23.730000  26.230000  572.666667   769.666667       0.004765          1
3       23.722500  26.125000  493.750000   774.750000       0.004744          1
4       23.754000  26.200000  488.600000   779.000000       0.004767          1
...           ...        ...         ...          ...            ...        ...
2660    24.290000  25.700000  808.000000  1150.250000       0.004829          1
2661    24.330000  25.736000  809.800000  1129.200000       0.004848          1
2662    24.330000  25.700000  817.000000  1125.800000       0.004841          1
2663    24.356667  25.700000  813.000000  1123.000000       0.004849          1
2664    24.408333  25.681667  798.000000  1124.000000       0.004860          1

[2665 rows x 6 columns]


## Step 3: Store the features and target in X , Y seperately and since target has value 0,1 change label 0 to -1 

In [40]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1:].values
print(X.shape,X)
Y[Y==0]=-1
print(Y.shape,Y)

(2665, 5) [[2.37000000e+01 2.62720000e+01 5.85200000e+02 7.49200000e+02
  4.76416302e-03]
 [2.37180000e+01 2.62900000e+01 5.78400000e+02 7.60400000e+02
  4.77266099e-03]
 [2.37300000e+01 2.62300000e+01 5.72666667e+02 7.69666667e+02
  4.76515255e-03]
 ...
 [2.43300000e+01 2.57000000e+01 8.17000000e+02 1.12580000e+03
  4.84075873e-03]
 [2.43566667e+01 2.57000000e+01 8.13000000e+02 1.12300000e+03
  4.84855928e-03]
 [2.44083333e+01 2.56816667e+01 7.98000000e+02 1.12400000e+03
  4.86020770e-03]]
(2665, 1) [[1]
 [1]
 [1]
 ...
 [1]
 [1]
 [1]]


## Step 4: Processing the Data and Splitting


In [41]:
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
X_scale

array([[0.83168317, 0.44513204, 0.34479305, 0.33003334, 0.70421197],
       [0.8359604 , 0.44705255, 0.34078657, 0.34152347, 0.70830847],
       [0.83881188, 0.44065084, 0.33740855, 0.35103018, 0.70468898],
       ...,
       [0.98138614, 0.38410243, 0.48136692, 0.71638882, 0.74113545],
       [0.98772277, 0.38410243, 0.47901016, 0.71351629, 0.74489576],
       [1.        , 0.38214635, 0.47017234, 0.71454219, 0.75051096]])

A. Split the data in 70,30 for training and testing the model 

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X_scale, Y, test_size=0.3, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(1865, 5) (800, 5) (1865, 1) (800, 1)


B. Split the data in 80,20 for training and testing the model 

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2132, 5) (533, 5) (2132, 1) (533, 1)


C. Split the data in 90,10 for training and testing the model 

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(2398, 5) (267, 5) (2398, 1) (267, 1)


## Step 5: Defining class for optimizing the weight vector w using the stochastic gradient descent.
### In stochastic gradient descent algorithm we minimise the true risk by creating a function that calculates the hinge loss for examples and then we update w using the sub gradient.
### As hinge loss is given by :
## hinge_loss((w, b),(x, y)) = max{0, 1 − y(hw, xi + b)}


In [45]:
# Defining the class my SVM.
class mySVM:
  # Defining the constructor and assigning the values C,b,W
  def __init__(self, C=1.0):
    self.C = C
    self.W = 0
    self.b = 0

  # Defining the hinge loss  in this we calculate the loss. 
  def hingeLoss_for_otimization(self, W, b, X, Y):
    loss = 0.0
    loss += .5*np.dot(W, W.T)
    m = X.shape[0]
    for i in range(m):
      ti = Y[i] * (np.dot(W, X[i].T) + b)
      loss += self.C *max(0, (1-ti))
    return loss[0][0]

  # Defining the fit function to optimise the W using stochastic gradient descent by using batch_size = 500 and learning_rate=0.001 and maximum number of iterations maxItr=500.
  def fit_model(self, X, Y, batch_size=5000, learning_rate=0.001, maxItr=500):
    no_of_features = X.shape[1]
    no_of_samples = X.shape[0]
    n = learning_rate
    c = self.C
  #Initialize the model parameters
    W = np.zeros((1, no_of_features))
    bias = 0
  #Initial Loss
    losses = []
    for i in range(maxItr):
  #Training Loop
      l = self.hingeLoss_for_otimization(W, bias, X, Y)
      losses. append(l)
      ids = np.arange(no_of_samples)
      np.random.shuffle(ids)
  #Batch Gradient Descent with random shuffling of data samples
      for batch_start in range(0, no_of_samples, batch_size):
  #Assume 0 gradients for the batch
        gradw = 0
        gradb = 0
  #Iterate over all examples in the mini batch to calculate the sub gradients
        for j in range(batch_start, batch_start + batch_size):
          if j < no_of_samples: 
            i = ids[j] 
            ti = Y[i] * (np.dot(W, X[i].T) + bias)
            if ti > 1:
              gradw += 0
              gradb += 0
            else:
            # Updating the gradients.  
              gradw += c * Y[i] * X[i]
              gradb += c * Y[i]
  # Updating Gradient for the batch Update W, B
        W = W - n*W + n*gradw
        bias = bias + n*gradb
    self.W = W
    self.b = bias
    return W, bias, losses
  # Defining the predict function
  # Here yx calculates the X*W^T+b
  def yx_input(self, X):
      return np.dot(X,self.W.T) + self.b
  def predict(self, X):
      return np.where(self.yx_input(X) >= 0.0, 1, -1)

## Step 6: Calculating optimized value of w using perceptron rule that is w[i+1] = w[i] + yi*xi by deifning the above declared model 

In [46]:
model = mySVM(C=1000)
W, b, losses = model.fit_model(X_train, y_train, maxItr=100)
print(W,b,losses)

[[-9.18609815e+04 -8.79769884e+04  4.86842921e+05 -1.26640482e+05
  -1.20324721e+01]] [-4961.] [2398000.0, 56615047296166.19, 522993411903575.56, 113985645904601.1, 294657549957694.6, 166959673911872.88, 110395921306296.47, 33536538619536.453, 29611791694958.336, 25705217109762.55, 21855944372223.926, 18943477416750.12, 16984949634536.803, 15030336944754.447, 13540705012901.676, 14453241878803.15, 13870560892417.182, 16500676204008.826, 14547031568274.367, 12916108233352.818, 13363365623913.838, 14721172093732.654, 17475807515950.55, 15520213586582.576, 13568528884873.725, 12637339266774.74, 15142057799869.678, 13191129026301.105, 12403242956284.035, 15069421703165.744, 13118638124356.879, 11647605833577.43, 13099864622985.285, 11327418357385.713, 11448492759203.982, 13591709301048.623, 11645901287923.066, 12952964240802.12, 15886277096028.648, 13933860610623.793, 11985347001167.576, 10622386948728.312, 12584377552729.623, 10644975182055.16, 13222869033672.094, 15675277362759.842, 1296

## Step 7: Creating target Array and calculating the target values.

In [47]:
target = model.predict(X_test)
t = model.predict(X_train)
print(target,t,y_test)

[[ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [ 1]
 [-1]
 [-1]
 [-1]
 [-1]
 [-1]
 [ 1]
 [ 1]
 [ 1

## Step 8: Checking for Accuracy. 

In [48]:
v1 = 0  
v2 = 0
c = 0
i=0    
for c in range( np.size( target ) ) :  
  if y_test[c] == target[c] :            
    v1 = v1 + 1
for i in range( np.size( t ) ) :
  if y_train[i] == t[i] :            
    v2 = v2 + 1
  i = i + 1
print("Accuracy of Defined Model on test data :",(v1/c)*100)     
print("Accuracy of Defined Model on train data:",(v2/i)*100)   

Accuracy of Defined Model on test data : 97.36842105263158
Accuracy of Defined Model on train data: 97.99833194328608
