In [1]:
import numpy as np
import random
class SVM:
    def __init__(self, data_len, lbd):
        self.W = np.zeros(data_len)
        self.b = 0
        self.lbd = lbd
    
    def train(self, X, Y, eta, epsilon, batch_size):
        batch_size = min(batch_size, len(Y))
        
        cnt = 0
        sqr_sum_W = np.ones(len(self.W))
        sqr_sum_b = 1
        e = 1
        
        stop = 0
        while stop<100:
            batch = random.sample(range(len(Y)), batch_size)
            X_batch, Y_batch = X[batch], Y[batch]
            dev_W, dev_b = self.derivative(X_batch, Y_batch)
            sqr_sum_W = 0.9*sqr_sum_W + 0.1*dev_W*dev_W
            sqr_sum_b = 0.9*sqr_sum_b + 0.1*dev_b*dev_b
            
            self.W = self.W - eta*dev_W/np.sqrt(sqr_sum_W + e*np.ones(len(self.W)))
            self.b = self.b - eta*dev_b/np.sqrt(sqr_sum_b + e)
            if np.dot(dev_W, dev_W) + np.dot(dev_b, dev_b) < epsilon:
                break
            if cnt==20:
                print(np.dot(dev_W, dev_W) + np.dot(dev_b, dev_b), self.loss(X,Y))
                cnt = 0
                stop = stop + 1
            cnt = cnt + 1
        return
    
    def derivative(self, X, Y):
        dev_W = np.zeros(len(self.W))
        dev_b = 0
        for x, y in zip(X, Y):
            y = 2*(y-1/2)
            if 1 > y*(np.dot(self.W, x) + self.b):
                dev_W = dev_W - y*x
                dev_b = dev_b - y
        dev_W = dev_W / len(Y)
        dev_b = dev_b / len(Y)
        dev_W = dev_W + self.lbd * 2 * self.W
        #print(dev_W, dev_b)
        return dev_W, dev_b
        
    def loss(self, X, Y):
        l = 0
        for x, y in zip(X, Y):
            y = 2*(y-1/2)
            l += max(0, 1 - y*(np.dot(self.W, x) + self.b))
        l /= len(Y)
        l += self.lbd * np.dot(self.W, self.W)
        return l
    
    def output(self, x):
        if np.dot(self.W, x) + self.b > 0:
            return 1
        else:
            return -1
    
    def test(self, X, Y):
        error = 0
        for x, y in zip(X, Y):
            y = 2*(y-1/2)
            if y!=self.output(x):
                error = error + 1
        return error/len(Y)

In [2]:
import pandas as pd
def read_data(s, delete_name):
    '''
    s, <string>, which dataset to access
    delete_name, <set>, which features do not take into account
    '''
    if s=='test':
        df = pd.read_csv('pro_data_test.csv', delimiter=',')
    elif s=='train':
        df = pd.read_csv('pro_data_train.csv', delimiter=',')
    else:
        return
    columns = df.columns
    for column in columns:
        if column in delete_name:
            del df[column]
    #print(df.head())
    feature_len = len(df.columns)-1
    feature = df[df.columns[0:feature_len-1]]
    label = df[df.columns[feature_len:feature_len+1]]
    X = feature.values
    Y = label.values
    return X, Y

In [3]:
delete_name = {'fnlwgt', 'capital-gain', 'capital-loss'}
X_train, Y_train = read_data('train', delete_name)
X_test, Y_test = read_data('test', delete_name)
#print(len(X_train[0]), len(X_test[0]))

In [4]:
lbd = 1
svm = SVM(len(X_train[0]), lbd)

In [5]:
epsilon = 0.5
eta = 0.0001
batch_size = 200
svm.train(X_train, Y_train, eta, epsilon, batch_size)

664.3567229743333 [0.8639851]
1012.5969858959824 [0.76742507]
1125.1538542903168 [0.67229343]
179.17435123221972 [0.60549806]
13.531071935715039 [0.58969033]
10.60511184170198 [0.5861775]
8.96576321082867 [0.58340679]
1.8211177509244754 [0.58101817]
23.11923187063066 [0.57911613]
15.923972550519785 [0.57728062]
0.7568453037182211 [0.57576936]
3.267687345700525 [0.57447846]
2.40647533359558 [0.57336351]


In [6]:
svm.test(X_test,Y_test)
#print(len(svm.W), len(X_test[0]))

0.2456839309428951