In [None]:
from numpy import mean
from numpy import std
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.feature_selection import RFE
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from matplotlib import pyplot
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import seaborn as sns

In [None]:
df = pd.read_csv("Differentially_expressed_genes.csv", sep = ";", index_col= "gene")

In [None]:
X = df.iloc[: , 0:231]
y = df.iloc[ : , 231]

In [None]:
def get_models(svm_c, n_features):
    models = dict()
    for i in range(2, n_features):
        rfe = RFE(estimator=SVC(kernel='linear', C=svm_c, gamma=0.1), n_features_to_select=i)
        model = SVC(kernel='linear', C=svm_c)
        models[str(i)] = Pipeline(steps=[('s',rfe), ('m',model)])
    return models

In [None]:
def evaluate_model(model, X, y):
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=123)
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1, error_score='raise')
    return scores

In [None]:
def run_rfe(models, X, y):    
    results, names = list(), list()
    for name, model in models.items():
        scores = evaluate_model(model, X, y)
        results.append(scores)
        names.append(name)
        print('>%s %.3f (%.3f)' % (name, mean(scores), std(scores)))
    return results, names

In [None]:
def selected_features(svm_c, n, X, y, xcols):
    rfe = RFE(estimator=SVC(kernel='linear', C=svm_c), n_features_to_select=n)
    rfe.fit(X,y)
    cols = []
    for i in range(X.shape[1]):
        # print('Column: %d, Selected %s, Rank: % .3f' % (i, rfe.support_[i], rfe.ranking_[i]))
        if rfe.support_[i]:
            cols.append(xcols[i])
    return cols

In [None]:
svm_c = 5
xcols = X.columns
n_features = len(xcols)
#n_features = 20
models = get_models(svm_c, n_features)
#scaler = StandardScaler()
#scaler.fit(X)
#X = scaler.transform(X)
#y = np.ravel(y)
scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(X)

results, names = run_rfe(models, X, y)
pyplot.boxplot(results, labels=names, showmeans=True)
pyplot.show()
pyplot.savefig("RFE.png")

In [None]:
gene_remained_FRE =selected_features(svm_c, 46, X, y, xcols)