# Import

In [17]:
from MESA import *
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve
from sklearn.datasets import make_classification

In [18]:
def plot_ROC(y_true, y_pred):
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    auc = roc_auc_score(y_true, y_pred)
    plt.plot(fpr,tpr,label="AUC="+str(auc))
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.legend(loc=4)    
    plt.show()

# Create random datasets for testing

In [62]:
dataset1, label = make_classification(n_samples=20,
                                    n_features=40,
                                    n_informative=30,
                                    n_redundant=1,
                                    random_state=0,
                                    shuffle=False)
dataset2, label_ = make_classification(n_samples=20,
                                    n_features=40,
                                    n_informative=20,
                                    n_redundant=8,
                                    random_state=0,
                                    shuffle=False)

In [64]:
dataset1 = pd.DataFrame(dataset1,columns=['feature_' + str(_) for _ in range(40)],
                        index=[('Cancer' if _==1 else 'Normal') for _ in label]).T
dataset2 = pd.DataFrame(dataset2,columns=['feature_' + str(_) for _ in range(40)],
                        index=[('Cancer' if _==1 else 'Normal') for _ in label]).T

In [67]:
dataset1.head()

Unnamed: 0,Normal,Normal.1,Normal.2,Normal.3,Normal.4,Cancer,Cancer.1,Cancer.2,Cancer.3,Cancer.4,Normal.5,Normal.6,Normal.7,Normal.8,Normal.9,Cancer.5,Cancer.6,Cancer.7,Cancer.8,Cancer.9
feature_0,-2.657438,0.250462,-6.316733,-0.183008,-3.157091,-0.373797,-1.271039,-1.655216,2.316599,-2.70839,-3.514962,-0.63665,-2.301711,-0.555753,3.657662,0.797684,-1.345706,-4.046547,0.323794,5.727181
feature_1,-4.047969,0.059135,12.176013,2.180394,-1.959148,7.983654,4.252196,-4.331032,3.810289,0.977776,-0.197668,7.882415,4.29904,-2.233456,1.555099,-1.260301,5.317385,0.997258,4.693458,1.638596
feature_2,0.336807,2.574391,1.643657,2.056295,-2.111729,-3.422055,-1.489284,-0.688571,-0.44913,-1.06702,-0.043363,-0.856425,-1.341088,1.084733,6.465461,1.609761,2.851012,6.886433,-5.484811,0.894754
feature_3,2.617077,1.375779,2.223775,1.451959,1.754885,4.09396,3.859569,0.435443,4.673766,1.010637,2.421923,5.816843,-3.427341,-5.362654,0.817183,-0.067298,-6.467959,-6.892887,-1.956304,0.193378
feature_4,-8.929103,5.587176,-1.891227,7.925671,0.886867,3.427853,8.519712,-3.558961,1.644851,-0.283583,1.569537,0.003211,-1.846819,1.642075,-5.320329,-0.843546,-5.943937,-3.330491,-0.213274,4.349994


In [68]:
dataset2.head()

Unnamed: 0,Normal,Normal.1,Normal.2,Normal.3,Normal.4,Cancer,Cancer.1,Cancer.2,Cancer.3,Cancer.4,Normal.5,Normal.6,Normal.7,Normal.8,Normal.9,Cancer.5,Cancer.6,Cancer.7,Cancer.8,Cancer.9
feature_0,-0.602556,3.643032,3.037892,-1.109481,2.385997,-5.984252,-2.916217,1.923783,-0.342886,-0.208612,1.133495,0.177237,0.658569,0.143992,-2.691958,-2.962459,1.106371,-0.519772,-2.7376,2.896222
feature_1,5.912015,4.584149,0.131728,-2.882158,5.968676,2.285868,-0.926496,1.840909,-0.130376,-1.32026,3.914576,-1.048612,5.839853,2.979434,2.506142,-0.443105,1.252658,-1.382861,-4.003773,-3.995266
feature_2,-3.712827,2.34971,-1.67497,6.494007,-0.958747,-4.429147,-2.819524,-1.71712,-0.549012,-0.380938,-2.246815,2.602718,-1.78721,2.916498,3.777609,-0.087982,0.793247,-4.773273,-0.976853,-3.822507
feature_3,-0.249867,-2.310414,0.478615,0.945218,-2.274545,1.600593,-2.064119,1.653003,1.510509,0.423255,-1.516784,-1.658048,-1.392895,-1.889464,1.077493,3.039729,-7.109191,-1.954977,4.625182,-3.502716
feature_4,-2.576757,3.888342,2.090072,-3.132203,-2.847686,-1.642812,2.669854,-2.681256,1.341412,5.071602,-1.507957,3.669974,-2.416891,4.494471,4.878578,0.287927,-0.834164,2.106695,-0.603902,0.547497


# Run MESA

## Single modality

In [69]:
random_state = 0
# Train-test split inside SBS
cv_sbs = StratifiedKFold(n_splits=3, random_state=random_state, shuffle=True)
# Classifiers used for final evaluation on test sets
svm = SVC(kernel="linear", random_state=random_state, probability=True)
rf = RandomForestClassifier(random_state=random_state, n_jobs=-1)

In [None]:
mesa_result_1 = MESA_single(
    X=dataset1,
    y=label,
    estimator=svm,
    cv=cv_sbs,
    classifiers=[svm, rf],
    min_feature=25,
    boruta_top_n_feature=30,
)

Best combination: (27, 9, 36, 29, 6, 19, 37, 39, 14, 31, 35, 8, 28, 22, 33, 23, 1, 3, 17, 38, 2, 13, 32, 21, 10)
Best score: 1.0
Best dimension: 25
Best combination: (39, 27, 36, 9, 7, 37, 22, 20, 15, 19, 13, 8, 6, 31, 35, 23, 29, 3, 38, 17, 34, 21, 2, 0, 4)
Best score: 1.0
Best dimension: 25


In [None]:
#AUC
print('AUC of dataset 1: %s' % mesa_result_1[-1])

In [None]:
plot_ROC(mesa_result_1[1],mesa_result_1[2][0])

In [None]:
mesa_result_2 = MESA_single(
    X=dataset2,
    y=label,
    estimator=svm,
    cv=cv_sbs,
    classifiers=[svm, rf],
    min_feature=25,
    boruta_top_n_feature=30,
)

In [None]:
#AUC
print('AUC of dataset 2: %s' % mesa_result_2[-1])

In [None]:
plot_ROC(mesa_result_2[1],mesa_result_2[2][0])

## Multimodality

In [None]:
mesa_multimodal = MESA_integration(
    X_list=[dataset1, dataset2],
    y=label,
    feature_selected=[mesa_result_1[0], mesa_result_2[0]],
    classifiers=[svm, rf]
)

In [None]:
# Multimodal AUC
#AUC
print('AUC of multimodal integration: %s' % mesa_multimodal[-1])

In [None]:
plot_ROC(mesa_multimodal[0],mesa_multimodal[1][0])