# ２値GA
- SVMでクラス分類
    - dataset wine trainが80% testが20%
    - errorを最小に
    - かつ特徴を減らす


In [1]:
import random
import numpy as np
import copy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine

wine = load_wine()
data = wine.data
label = wine.target
data.shape

(178, 13)

# 初期化

In [242]:

def init(num_features = 10, num_instances = 100):
    '''
    num_instances X num_featuresの配列を用意する。0か1を持つ
    '''
    proba = np.random.uniform(size=(num_instances, num_features))
    threshold = np.random.uniform(size=(num_instances, num_features))
    return proba >= 0.5

# 評価

In [243]:

def eval(X, X_train,X_valid,y_train,y_valid) -> np.ndarray:
    '''
    Xの評価をする関数。評価値は小さいほどよい。
    return :
        np.ndarray, shape = (1, X.shape[0])
    '''
    error = np.zeros((X.shape[0],1))
    for i,x in enumerate(X):
        
        X_train_new = X_train[:,x]
        X_test_new = X_valid[:,x]
        m = SVC()
        m.fit(X_train_new, y_train)
        predict = m.predict(X_test_new)
        accu = accuracy_score(y_valid, predict)
        error[i] = accu
    return error

# 選択

In [234]:

def election(X, accu_array):
    accu_array = accu_array.ravel()
    p_sum = np.sum(accu_array)
    accu_array = accu_array/p_sum
    choice1,choice2 = np.random.choice(list(range(accu_array.size)), 2,replace = False, p = accu_array)
    return choice1,choice2

# 交叉

In [235]:

def closs_over(x,y):
    num_features = x.size
    point = np.random.choice(num_features)
    
    _x = copy.deepcopy(x)
    _y = copy.deepcopy(y)
    _y[point:] = x[point:]
    _x[point:] = y[point:]
    # 普通にhstackしてもいいよね
    
    return _x, _y

## 突然変異
mutation rate にしたがって個体群から選び、反転させる

In [236]:

def mutation_(X, mutation_rate = 0.01):
    '''
    mutation rate にしたがって個体群から選び、反転させる
    '''
    num_instances, num_features = X.shape
    proba = np.random.random(size = num_instances)
    palette = np.array(range(X.shape[0]))
    mask = proba <= mutation_rate
    mutations = palette[mask]
    for x in mutations:
        point = np.random.choice(num_features)
        X[x,point] = not(X[x,point])
    return X

In [244]:

def svm_ga(data ,label ,init_num_features = 10 , init_num_instances = 100):
    X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=0.6, random_state=None )
    X_train, X_valid, y_train, y_valid = train_test_split(X_train,y_train,test_size=0.2, random_state = None)
    sc = StandardScaler()
    sc.fit(X_train)
    X_train = sc.transform(X_train)
    X_valid = sc.transform(X_valid)
    X_test = sc.transform(X_test)
    X = init(init_num_features, init_num_instances)
    for x in range(100):
        error = eval(X, X_train, X_valid, y_train, y_valid)
        ele1,ele2 = election(X, error)
        X[[ele1,ele2]]=closs_over(X[ele1], X[ele2])
        mutation_(X)
        if np.sum(error >= 0.95) >=1 :
            break
    pena_list = np.zeros(X.shape[0])
    for i in range(pena_list.size):
        pena_list[i] = error[i]-((1-error[i])*(np.sum(X[i])/X.shape[1]))
    selected = pena_list.argmax()
    return X,error,selected,X_test,y_test,pena_list,X_train,y_train


In [245]:
X,error,selected,X_test,y_test,pena_list,X_train,y_train = svm_ga(data, label,13,50)

In [247]:
selected

1

In [255]:
for x in X[pena_list==1]:
    print(x.sum())

7
7
8
6
9
10
10
8
8
8
8
6
9
8
6
7
9
9
6
10
6
6
5
9
7
7
7
8
7
6
7


In [249]:
X[1]

array([ True, False, False,  True, False, False, False, False,  True,
        True,  True,  True,  True])