In [8]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
import os
import sys

from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

import warnings
import sklearn.exceptions
warnings.filterwarnings("ignore", category=sklearn.exceptions.UndefinedMetricWarning)

In [9]:
scaler = StandardScaler()

d_num = list(map(int,input('Enter space separated dataset numbers: ').split()))
classifier = input('Enter classification method (NB/DT/SVM/MLP): ')
if classifier =='NB':
    clf = GaussianNB()
elif classifier == 'DT':
    clf = DecisionTreeClassifier()
elif classifier == 'SVM':
    clf = SVC(kernel='rbf', gamma=0.5,C=10000)
elif classifier == 'MLP':
    clf = MLPClassifier(hidden_layer_sizes=(10,10,10), max_iter=1000)
else:
    print('[ERROR] Choose one of the given methods!')
    sys.exit()
print('Classifier initialized')
fs_method = input('Enter Feature Selection Method (UDFS/LLCFS/CFS): ').lower()

if fs_method != 'cfs':
    n_features = int(input('Enter the number of features to be selected: '))

dataset_path = 'E:/UWindsor/Term 3/AML/Project/Datasets'
fs_rank_path = 'E:/UWindsor/Term 3/AML/Project/'+fs_method+'_results'

for num in d_num:
    print('Dataset D'+str(num))
    filename = os.path.join(dataset_path,'D'+str(num)+'.csv')
    feat_rank_file = os.path.join(fs_rank_path,'D'+str(num)+'.txt')

    df = pd.read_csv(filename, header=None)

    X = df[df.columns[:-1]]
    Y = df[df.columns[-1]]
    if fs_method == 'udfs':
        if d_num == 13:
            with open(feat_rank_file) as file:
                data = file.read()
            data = data.replace(' ','').replace('\n','')
            sel = list(map(int,data[1:-1].split(',')))
        else:
            with open(feat_rank_file) as file:
                data = file.read()
            data = data.replace('\n','')[1:-1].split(' ')
            sel = [int(l) for l in data if l != '' ]
        X_selected = X.iloc[:,sel[-n_features:]]
    elif fs_method == 'cfs':
        with open(feat_rank_file) as file:
            data = file.read()
        sel = list(map(int, data[1:-1].split(',')))
        X_selected = X.iloc[:,sel[:]]
    elif fs_method == 'llcfs':
        llcfs = pd.read_excel('llcfs_results/llcfs.xlsx')
        sel = list(llcfs.iloc[num-1])[1:df.shape[1]-1]
        if 354 in sel:
            sel.remove(354)
        elif 219 in sel:
            sel.remove(219)
        X_selected = X.iloc[:,sel[:n_features]]
    
    skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
    j=1
    for train_idx, test_idx in skf.split(X_selected, Y):
        print('\tfold',j)
        x_train, x_test = np.array(X_selected.iloc[train_idx]),np.array(X_selected.iloc[test_idx])
        y_train, y_test = np.array(Y.iloc[train_idx]),np.array(Y.iloc[test_idx])
        if classifier == ' MLP':
            scaler.fit(x_train)
            x_train = scaler.transform(x_train)  
            x_test = scaler.transform(x_test)
        clf.fit(x_train, y_train)
        y_pred = clf.predict(x_test)
        acc = clf.score(x_test, y_test)
        f1 = f1_score(y_test, y_pred, average='weighted')
        print('\tacc :'.format(j), acc)
        print('\tf1 :'.format(j),f1)
        print()
        j+=1

Enter space separated dataset numbers: 5
Enter classification method (NB/DT/SVM/MLP): DT
Classifier initialized
Enter Feature Selection Method (UDFS/LLCFS/CFS): UDFS
Enter the number of features to be selected: 215
Dataset D5
	fold 1
	acc : 0.9765714285714285
	f1 : 0.9765523883761833

	fold 2
	acc : 0.9828571428571429
	f1 : 0.9827370626120738

	fold 3
	acc : 0.9771428571428571
	f1 : 0.9770662832190433

	fold 4
	acc : 0.9825714285714285
	f1 : 0.9826679703176916

	fold 5
	acc : 0.986
	f1 : 0.9860015599357685

	fold 6
	acc : 0.9808571428571429
	f1 : 0.9808362586913306

	fold 7
	acc : 0.9811428571428571
	f1 : 0.9811512015544203

	fold 8
	acc : 0.9854285714285714
	f1 : 0.9853855269866629

	fold 9
	acc : 0.9814285714285714
	f1 : 0.98140640723855

	fold 10
	acc : 0.978
	f1 : 0.9778214207963809

