# SFFS

## imports

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

## Classifiers
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

## Read and prepare data

In [None]:
task_name = 'ZL_trace' 
# [ZL_trace, ZL_predict, PL_trace, PL_predict]

In [None]:
f=pd.read_csv(f'Datasets/Full_DS/{task_name}.csv')
df = f.to_numpy()
df.shape

In [None]:
y = f['Class'].values
X = f.drop('Class', axis=1).values

y=y.astype('int')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
cv = LeaveOneOut()

## SVM

In [None]:
tuned_parameters = {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000], 
                    'kernel': ['rbf'], 
                    'gamma': [0.00001, 0.0001, 0.001, 0.01, 0.1, 1]
                    }

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
for i in range(1,X.shape[1]+1):
    sffs = SFS(
       SVC(),
        k_features=i,
        forward=True,
        floating=True,
        verbose=2,
        scoring='accuracy',
        cv=cv,
        n_jobs=-1
    )
    
    sffs = sffs.fit(X_std, y)

    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    gs = GridSearchCV(
        SVC(),
        tuned_parameters,
        verbose=1,
        cv=cv,
        n_jobs=-1
    )

    gs.fit(X_train_sffs_std, y)

    print(gs.best_score_, gs.best_params_)

    if gs.best_score_ > best_acc:
        best_acc = gs.best_score_

    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]],
                        columns=['Feature','accuracy','param'])

    re = pd.concat([re,re_i])

re.to_csv(f'SFFS/SVM/{task_name}.csv')

## KNN

In [None]:
tuned_parameters = {
    'n_neighbors': np.arange(1, 20),
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
for i in range(1,X.shape[1]+1):
    sffs = SFS(
       KNeighborsClassifier(),
        k_features=i,
        forward=True,
        floating=True,
        verbose=2,
        scoring='accuracy',
        cv=cv,
        n_jobs=-1
    )
    
    sffs = sffs.fit(X_std, y)

    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    gs = GridSearchCV(
        KNeighborsClassifier(),
        tuned_parameters,
        verbose=1,
        cv=cv,
        n_jobs=-1
    )

    gs.fit(X_train_sffs_std, y)

    print(gs.best_score_, gs.best_params_)

    if gs.best_score_ > best_acc:
        best_acc = gs.best_score_

    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]],
                        columns=['Feature','accuracy','param'])

    re = pd.concat([re,re_i])

re.to_csv(f'SFFS/KNN/{task_name}.csv')

## DT

In [None]:
tuned_parameters = {'max_depth':[1, 2, 3, 4, 5],
         'min_samples_leaf':[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
         'min_samples_split':[2, 3, 4, 5]}

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
for i in range(1,X.shape[1]+1):
    sffs = SFS(
       DecisionTreeClassifier(random_state=0),
        k_features=i,
        forward=True,
        floating=True,
        verbose=2,
        scoring='accuracy',
        cv=cv,
        n_jobs=-1
    )
    
    sffs = sffs.fit(X_std, y)

    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    gs = GridSearchCV(
        DecisionTreeClassifier(random_state=0),
        tuned_parameters,
        verbose=1,
        cv=cv,
        n_jobs=-1
    )

    gs.fit(X_train_sffs_std, y)

    print(gs.best_score_, gs.best_params_)

    if gs.best_score_ > best_acc:
        best_acc = gs.best_score_

    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]],
                        columns=['Feature','accuracy','param'])

    re = pd.concat([re,re_i])

re.to_csv(f'SFFS/DT/{task_name}.csv')

## RF

tuned_parameters = {
    "max_depth": [2,3,5,10, None],
    "n_estimators":[50,100,200,300,400],
    "min_samples_split": [2, 3, 10],
    "min_samples_leaf": [1, 3, 10],
    "bootstrap": [True],
    "criterion": ["gini"]
}

In [None]:
best_acc = 0
re = pd.DataFrame()
stdsc = StandardScaler()
X_std = stdsc.fit_transform(X)

In [None]:
for i in range(1,X.shape[1]+1):
    sffs = SFS(
       RandomForestClassifier(random_state=0),
        k_features=i,
        forward=True,
        floating=True,
        verbose=2,
        scoring='accuracy',
        cv=cv,
        n_jobs=-1
    )
    
    sffs = sffs.fit(X_std, y)

    X_train_sffs = X_std[:,list(sffs.k_feature_idx_)]

    X_train_sffs_std = stdsc.fit_transform(X_train_sffs)
    
    gs = GridSearchCV(
        RandomForestClassifier(random_state=0),
        tuned_parameters,
        verbose=1,
        cv=cv,
        n_jobs=-1
    )

    gs.fit(X_train_sffs_std, y)

    print(gs.best_score_, gs.best_params_)

    if gs.best_score_ > best_acc:
        best_acc = gs.best_score_

    re_i = pd.DataFrame([[sffs.k_feature_idx_,gs.best_score_,gs.best_params_]],
                        columns=['Feature','accuracy','param'])

    re = pd.concat([re,re_i])

re.to_csv(f'SFFS/RF/{task_name}.csv')