In [225]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm

In [226]:
dataframe = pd.read_csv("./heart.csv")
dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB


In [227]:
dataframe["target"] = dataframe["target"].replace(0, -1)
dataframe.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,-1
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,-1
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,-1
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,-1
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,-1


In [228]:
def scalerTranfrom(features, index):
    new_features = features
    for i in index:
        new_features[:,i] = new_features[:, i] * np.power(10, 20)
    
    return new_features

In [229]:
features = np.array(dataframe.drop(columns="target"))
labels = np.array(dataframe["target"])

# print(features.shape)
features = scalerTranfrom(features, [0, 3, 4, 7, 9])

features_train, features_test, labels_train, labels_test = train_test_split(features, labels)
print(features_train[0])

[1.01381571e+11 1.00000000e+00 0.00000000e+00 2.32679014e+11
 3.44032543e+11 0.00000000e+00 0.00000000e+00 2.29355028e+11
 1.00000000e+00 3.15778662e+09 2.00000000e+00 1.00000000e+00
 3.00000000e+00]


In [230]:
import time
# Support Vector Machine classifiers by kernel which is linear.
class SVM:
    def __init__(self, c=1, learning_rate=1):
        self.cost = c
        self.learning_rate = learning_rate
    
    def fit(self, features_train, labels_train):
        
        #store info
        self.d = features_train.shape[1]
        self.n_record = features_train.shape[0]
        
        # set up features and labels
        full_one = np.full((self.n_record, 1), 1)
        self.features = np.append(features_train, full_one, axis=1)
        self.lables = labels_train
        
        # set vector of weight by d+1 dimensions
        self.weight = np.zeros(self.d + 1)
        
        # self.weight = self.computeWeight(self.weight, self.features, self.lables, self.n_record) # compute to get weight's value
        
        array_weight = np.array([])
        array_margin = np.array([])
        
        for i in range(10):
            # Generating Permutation of feature and label
            np.random.seed(i)
            new_feature = np.random.permutation(self.features)
            np.random.seed(i)
            new_label = np.random.permutation(self.lables)
            
            # calculate weight
            new_weight = self.computeWeight(self.weight, new_feature, new_label, self.n_record) # compute to get weight's value
            
            # calculate margin
            cal_margin = np.inner(new_weight, new_weight)
            # append weight and margin
            array_weight = np.append(array_weight, new_weight, axis=0)
            array_margin = np.append(array_margin, cal_margin)
            
        
        array_weight = array_weight.reshape(10, 14)
        self.weight = array_weight[array_margin.argmin(axis=0)]
        
        

        
        
        
    
    def computeWeight(self, weight, feature_train, labels_train, n_record):
        while True:
            m = 0 # the number of missclassifier
            for i in range(n_record):
                check = labels_train[i] * (np.inner(weight, feature_train[i]))
                if check < 1:
                    m += 1
                    weight += weight + labels_train[i] * feature_train[i]    
            if m == 0:
                break
        
        return weight
    
    def predict(self, feature_test):
        check = np.inner(self.weight, feature_test)
        if check > 1:
            # positive class
            return 1 
        else:
            # negative class
            return -1
    
    def hinge_loss(self):
        sum_loss = 0
        for i in range(self.n_record):
            differenct = 1 - self.lables[i] * (np.inner(self.weight, self.features[i]))
            
            # check max[0, differenct]
            if differenct > 0:
                sum_loss += differenct
        return sum_loss
    
    def score(self, features_test, labels_test):
        size = features_test.shape[0]
        full_one = np.full((size, 1), 1)
        new_features = np.append(features_test, full_one, axis=1)
        
        point = 0
        for i in range(size):
            y_predict = self.predict(new_features[i])
            if y_predict == labels_test[i]:
                point += 1
        
        return np.round(point / size, 4)
            

In [231]:
model = SVM()
model.fit(features_train, labels_train)
print(model.weight)

[ 1.24572415e+297 -3.19963915e+286  1.04561471e+287  8.34523290e+296
 -1.12696082e+297  2.99504538e+281  1.57009628e+286  6.38977740e+296
 -2.42384441e+286  1.10291931e+296 -4.15584596e+286 -2.40933415e+286
 -1.84256983e+286  6.08721813e+285]


In [232]:
model.score(features_test, labels_test)

0.537

In [233]:
# Model Selection
class ModelSelection:
    def __init__(self, features, labels , model):
        self.features = features
        self.labels = labels
        self.model = model
        pass