# SAUTE-feature-selection guide

In [1]:
#load libraries
import sys
sys.path.append("../codes")
from pl_algorithms import Model
import numpy as np
from sklearn.model_selection import train_test_split
import math

In [2]:
# load data
X = np.loadtxt("../data/lost/X.csv", delimiter=",")
y_pl = np.loadtxt("../data/lost/y_pl.csv", delimiter=",").T
y = np.loadtxt("../data/lost/y.csv", delimiter=",").T

print(f"X shape: {X.shape}\ny_pl shape: {y_pl.shape}\ny shape: {y.shape}\n")

X shape: (1122, 108)
y_pl shape: (1122, 16)
y shape: (1122, 16)



In [3]:
# split data into training and test subsets
X_train, X_test, y_pl_train, y_pl_test, y_train, y_test = train_test_split(X,y_pl,y, train_size=0.7, random_state=25)

In [6]:
# initialize model on X_train and y_pl_train
model = Model(X_train, y_pl_train)

# classify X_test 
y_knn_without_saute= model.knn_predict(X_test, knn=8)
y_ipal_without_saute= model.ipal_predict(X_test, knn=8)

# check accuracy
acc_knn_without_saute = np.sum((y_knn_without_saute == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]
acc_ipal_without_saute = np.sum((y_ipal_without_saute == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]
print(f"Accuracy without saute using PL-KNN: {acc_knn_without_saute}\nAccuracy without saute using IPAL: {acc_ipal_without_saute}.")

Accuracy without saute using PL-KNN: 0.4807121661721068
Accuracy without saute using IPAL: 0.4658753709198813.


In [8]:
# select variables using saute and it's modification

vars_no = math.ceil(X.shape[1] / 2) # number of variables to select

# SAUTE - original method

model.select_saute(vars_no, criterium='original')

y_knn_with_saute = model.knn_predict(X_test,8, use_selected_vars=True)
y_ipal_with_saute = model.ipal_predict(X_test,8, use_selected_vars=True)

acc_knn_with_saute = np.sum((y_knn_with_saute == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]
acc_ipal_with_saute = np.sum((y_ipal_with_saute == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]

# SAUTE-IPALALL - modification

model.select_saute(vars_no, criterium='original',learning_type='IPALall')

y_knn_with_saute_ipalall = model.knn_predict(X_test,8, use_selected_vars=True)
y_ipal_with_saute_ipalall = model.ipal_predict(X_test,8, use_selected_vars=True)

acc_knn_with_saute_ipalall = np.sum((y_knn_with_saute_ipalall == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]
acc_ipal_with_saute_ipalall = np.sum((y_ipal_with_saute_ipalall == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]

# SAUTE-IPALFRAC - modification

model.select_saute(vars_no, criterium='original', learning_type='IPALfrac')

y_knn_with_saute_ipalfrac = model.knn_predict(X_test,8, use_selected_vars=True)
y_ipal_with_saute_ipalfrac = model.ipal_predict(X_test,8, use_selected_vars=True)

acc_knn_with_saute_ipalfrac = np.sum((y_knn_with_saute_ipalfrac == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]
acc_ipal_with_saute_ipalfrac = np.sum((y_ipal_with_saute_ipalfrac == np.argmax(y_pl_test, axis=1)))/y_pl_test.shape[0]



print(f"accuracy knn without saute: {acc_knn_without_saute}; accuracy ipal without saute: {acc_ipal_without_saute}")
print(f"accuracy knn with saute: {acc_knn_with_saute}; accuracy ipal without saute: {acc_ipal_with_saute}")
print(f"accuracy knn with saute_ipalall: {acc_knn_with_saute_ipalall}; accuracy ipal without saute_ipalall: {acc_ipal_with_saute_ipalall}")
print(f"accuracy knn with saute_ipalfrac: {acc_knn_with_saute_ipalfrac}; accuracy ipal without saute_ipalfrac: {acc_ipal_with_saute_ipalfrac}")

accuracy knn without saute: 0.4807121661721068; accuracy ipal without saute: 0.4658753709198813
accuracy knn with saute: 0.49851632047477745; accuracy ipal without saute: 0.456973293768546
accuracy knn with saute_ipalall: 0.4629080118694362; accuracy ipal without saute_ipalall: 0.4540059347181009
accuracy knn with saute_ipalfrac: 0.4836795252225519; accuracy ipal without saute_ipalfrac: 0.4421364985163205
