In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [2]:
orig_data = pd.read_csv(r"datasets/Insurance/final_data.csv")  # Loading data

listOfX = [col for col in orig_data.columns if col not in ["CarInsurance", "Id"]]  # List of parameters

orig_data = orig_data.reset_index(drop=True)
orig_data = orig_data.drop(["Id"], axis=1)

print(orig_data.head())
inputData = orig_data[listOfX]  # Getting parameters
outputData = orig_data["CarInsurance"]  # Getting answers
outputData.replace(0, -1, inplace=True)

X_train, X_test, y_train, y_test = train_test_split(inputData, outputData, random_state=228, test_size=0.2)  # Spliting data
X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)

y_train.reset_index(drop=True, inplace=True)
y_test.reset_index(drop=True, inplace=True)


   CallTime  LastContactMonth  PrevAttempts  DaysPassed  HHInsurance  \
0        70                 0             0          -1            1   
1       185                 1             0          -1            1   
2       340                 2             1         119            1   
3       819                 1             0          -1            1   
4       192                 2             0          -1            0   

   NoOfContacts  CarInsurance  
0             2             0  
1             5             0  
2             1             1  
3             2             1  
4             1             0  


In [3]:
def add_bias_feature(a):
    a_extended = np.zeros((a.shape[0],a.shape[1]+1))
    a_extended[:,:-1] = a
    a_extended[:,-1] = int(1)  
    return a_extended

In [40]:
class MySVM(object):
    
    def __init__(self, etha=0.01, alpha=0.1, epochs=500):
        self._epochs = epochs
        self._etha = etha
        self._alpha = alpha
        self._w = None
        self.history_w = []
        self.train_errors = None
        self.val_errors = None
        self.train_loss = None
        self.val_loss = None
    
    def soft_hinge_loss(self, x, y):
        return max(0, 1 - y * (np.dot(self._w, x))) + self._alpha * (np.dot(self._w, self._w.T))
    
    def fit(self, X_tr, X_tt, y_tr, y_tt):
        X_tr = add_bias_feature(X_tr)
        X_tt = add_bias_feature(X_tt)
        
        self._w = np.zeros((1, X_tr.shape[1]))
        self.history_w.append(self._w)
        
        train_errors = []
        val_errors = []
        train_loss_epoch = []
        val_loss_epoch = []
        
        for epoch in range(self._epochs):
            tr_err = 0
            val_err = 0
            tr_loss = 0
            val_loss = 0
            for x, y in zip(X_tr, y_tr):
#                 print(i, x)
                M = y * (np.dot(self._w, x))
#                 print(self._w.shape, x.shape)  # w (1, 7) and x (7, 1)
                if M >= 1:
                    self._w -= self._etha * (self._alpha * self._w/self._epochs)
                    tr_loss += self.soft_hinge_loss(x, y)
                else:
                    self._w -= self._etha * (self._alpha * self._w/self._epochs - y * x.T)
                    tr_err += 1
                    tr_loss += self.soft_hinge_loss(x, y)
                self.history_w.append(self._w)
            for x, y in zip(X_tt, y_tt):
                val_loss += self.soft_hinge_loss(x, y)
                val_err += (y * np.dot(self._w, x) < 1).astype(int)
                
            train_errors.append(tr_err)
            val_errors.append(val_err)
            train_loss_epoch.append(tr_loss)
            val_loss_epoch.append(val_loss)
        self.history_w = np.array(self.history_w)    
        self.train_errors = np.array(train_errors)
        self.val_errors = np.array(val_errors)
        self.train_loss = np.array(train_loss_epoch)
        self.val_loss = np.array(val_loss_epoch)

    def predict(self, X:np.array) -> np.array:
        y_pred = []
        X = add_bias_feature(X)
        for i in range(len(X)):
            y_pred.append(np.sign(np.dot(self._w, X[i])))
        return np.array(y_pred)


In [41]:
sv = MySVM(etha=0.005, alpha=0.005, epochs=10000)
sv.fit(X_train.to_numpy(), X_test.to_numpy(), y_train.to_numpy(), y_test.to_numpy())

In [42]:
preds = sv.predict(X_test.to_numpy())
print(precision_score(y_test, preds))
print(recall_score(y_test, preds))
accuracy_score(y_test, preds)

0.6064814814814815
0.8534201954397395


0.7185863874345549

In [10]:
from sklearn import svm
from sklearn.metrics import recall_score

C = 0.05
model1 = svm.LinearSVC(C=C, max_iter=10000)

model1.fit(X_train, y_train)
y_predict = model1.predict(X_test)
print(accuracy_score(y_test, y_predict))



0.7879581151832461
