### Loading libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

### Step 1: Loading dataset

In [15]:
data1 = pd.read_csv('titanic_X_train.csv')
data1.head()

Unnamed: 0,ID,Pclass,Sex,Age,SibSp,Parch,Fare,Embarced
0,0,3,0,22.0,1,0,7.25,1
1,1,1,1,38.0,1,0,71.2833,0
2,2,3,1,26.0,0,0,7.925,1
3,3,1,1,35.0,1,0,53.1,1
4,4,3,0,35.0,0,0,8.05,1


In [16]:
data2 = pd.read_csv('titanic_y_train.csv')
data2.head()

Unnamed: 0,ID,Survived
0,0,0
1,1,1
2,2,1
3,3,1
4,4,0


### Step 2: Define features (X_train), and labels (y_train)

In [31]:
X = data1.iloc[:, 1:].values
y = data2.iloc[:, 1].values

print(X.shape)
print(y.shape)

(891, 7)
(891,)


### Step 3: Split the data into train and test data

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(712, 7)
(712,)
(179, 7)
(179,)


### Step 4: Model fit (Cost and Gradient descent)

In [33]:
def SVM(X, y, iterations, lambda_param, lr):
    
    n_samples, n_features = X.shape
    
    ### converting labels to -1 and 1
    y_hat = np.where(y <=0, -1, 1)
    
    
    ### initilize parameters
    
    w = np.zeros(n_features)
    b = 0
    
    for i in range(iterations):
        for idx, x_i in enumerate(X):
            condition = y_hat[idx] * (np.dot(x_i, w) - b) >= 1
            if condition:
                w = w - lr * (2*lambda_param * w)
            else:
                w = w - lr * (2*lambda_param * w - np.dot(x_i, y_hat[idx]))
                b = b - lr * y_hat[idx]
    return w, b

In [34]:
iterations = 10000
lambda_param = 0.01
lr = 0.001

w, b = SVM(X_train, y_train, iterations, lambda_param, lr)

### Step 5: Prediction

In [35]:
def prediction(X,y, w, b):
    y_pred = np.sign(np.dot(X, w) - b)
    
    y_pred = y_pred > 0
    y_pred = np.array(y_pred, dtype = 'int64')
    
    acc = (1-np.sum(np.absolute(y_pred-y))/y.shape[0])*100
    print('Accuracy of the model is :', acc, '%')
    

In [36]:
prediction(X_test,y_test, w, b)

Accuracy of the model is : 75.97765363128491 %
