## Gene Disease Association

Select the dataset here

In [None]:
from utils.datasets import BrainCancer, BreastCancer, LiverCancer, ProstateCancer

cancer_types = [BrainCancer, BreastCancer, ProstateCancer]
print("Select the cancer type to train for")
print("1) Brain Cancer\n2) Breast Cancer\n3) Prostate Cancer\n")

In [None]:
i = int(input("Option: "))
dataset = cancer_types[i-1]()
print("Training for", dataset.name)

Dataset details

In [None]:
from sklearn.model_selection import train_test_split

from utils.classifiers import classify

print('-' * 20, dataset.name, '-' * 20)
samples, sample_labels = dataset.get_samples_and_labels()

print('Total Samples:', samples.shape[0])
print('Total Features:', samples.shape[1])

pos_samples = sample_labels.count(1)
print('Positive Samples:', pos_samples)
print('Negative Samples:', samples.shape[0]-pos_samples)

print('Splitting dataset into 70% training and 30% test..')
X_train, X_test, y_train, y_test = train_test_split(samples, sample_labels, test_size=0.3, random_state=109)

results = []

1. No feature selection

In [None]:
%%time
classify(X_train, X_test, y_train, y_test, results=results)

2. Binary BAT Algorithm (BBA)

In [None]:
%%time
from Py_FS.wrapper.nature_inspired import BBA

sol = BBA(150, 10, samples, sample_labels, weight_acc=0.7, val_size=14)
idx = []
for i in range(len(sol.best_agent)):
    if sol.best_agent[i] == 1:
        idx.append(i)
_X_train = X_train[:, idx]
_X_test = X_test[:, idx]

print('=> Features Selected:', len(idx))
classify(_X_train, _X_test, y_train, y_test, results=results, fs='Binary BAT Algorithm')

3. Mayfly Algorithm (MA)

In [None]:
%%time
from Py_FS.wrapper.nature_inspired import MA

sol = MA(150, 10, samples, sample_labels, weight_acc=0.7, val_size=14)
idx = []
for i in range(len(sol.best_agent)):
    if sol.best_agent[i] == 1:
        idx.append(i)
_X_train = X_train[:, idx]
_X_test = X_test[:, idx]

print('=> Features Selected:', len(idx))
classify(_X_train, _X_test, y_train, y_test, results=results, fs='Mafly Algorithm')

4. Particle Swarm Optimization (PSO)

In [None]:
%%time
from Py_FS.wrapper.nature_inspired import PSO

sol = PSO(150, 10, samples, sample_labels, weight_acc=0.7, val_size=14)
idx = []
for i in range(len(sol.best_agent)):
    if sol.best_agent[i] == 1:
        idx.append(i)
_X_train = X_train[:, idx]
_X_test = X_test[:, idx]

print('=> Features Selected:', len(idx))
classify(_X_train, _X_test, y_train, y_test, results=results, fs='Particle Swarm Optimization')

5. Grey Wolf Optimizer (GWO)

In [None]:
%%time
from Py_FS.wrapper.nature_inspired import GWO

sol = GWO(150, 10, samples, sample_labels, weight_acc=0.7, val_size=14)
idx = []
for i in range(len(sol.best_agent)):
    if sol.best_agent[i] == 1:
        idx.append(i)
_X_train = X_train[:, idx]
_X_test = X_test[:, idx]

print('=> Features Selected:', len(idx))
classify(_X_train, _X_test, y_train, y_test, results=results, fs='Grey Wolf Optimiser')

6. Harmony Search (HS)

In [None]:
%%time
from joblib import dump

from Py_FS.wrapper.nature_inspired import HS

sol = HS(150, 10, samples, sample_labels, weight_acc=0.7, val_size=14)
idx = []
for i in range(len(sol.best_agent)):
    if sol.best_agent[i] == 1:
        idx.append(i)
_X_train = X_train[:, idx]
_X_test = X_test[:, idx]

# Save indexes of selected features
dump(idx, f'joblibs/{dataset.name}-idx.joblib')

print('=> Features Selected:', len(idx))
classify(_X_train, _X_test, y_train, y_test, results=results, fs='Harmony Search', dump_name=f'joblibs/{dataset.name}.joblib')

### Final Result

In [None]:
from tabulate import tabulate

print(tabulate(
    results,
    tablefmt='fancy_grid',
    headers=[
        'Feature Selection',
        'SVM',
        'Decision Tree',
        'Extra Trees',
        'Linear DA',
        'kNN',
        'GaussianNB',
        'MLP',
        'Random Forest'
    ]
))