In [12]:
import numpy as np
from numpy import mean
from numpy import std

import pandas as pd
from matplotlib import pyplot as plt
import math

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer

from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import LeaveOneOut, train_test_split, cross_val_score, GridSearchCV, cross_validate, KFold, StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import roc_curve, auc, precision_recall_curve, classification_report, average_precision_score, f1_score, accuracy_score, roc_auc_score, balanced_accuracy_score, matthews_corrcoef

In [2]:
data_hvg_95 = pd.read_pickle("data_normalized_highly_variable_genes_95.pkl")
metadata_final = pd.read_pickle("metadata_final.pkl")

In [3]:
X = data_hvg_95.values
labels = metadata_final["Cell class"].replace('Tumor', 1)
labels = labels.replace('Periphery', 0)
y = np.array(labels)

In [17]:
inner_metric = 'matthews_corrcoef'
outer_metrics = ['roc_auc', 'balanced_accuracy', 'matthews_corrcoef']


# configure the cross-validation procedure
cv_inner = StratifiedKFold(n_splits=3, shuffle=True)
cv_outer = StratifiedKFold(n_splits=10, shuffle=True)

In [None]:
rf = SVC()

bayes_params = {
             'C': Real(1e-6, 1e+6, prior='log-uniform'),
             'gamma': Real(0.01, 1e+1, prior='log-uniform'),
             'degree': Integer(1,8),
             'kernel': Categorical(['linear', 'poly', 'rbf']),
         }


rf_model = BayesSearchCV(estimator = rf, search_spaces = bayes_params, cv = cv_inner, verbose=3,  n_jobs = -1, n_iter = 30)
# execute the nested cross-validation
scores = cross_validate(rf_model, X, y, scoring=outer_metrics, cv=cv_outer, n_jobs=-1)

# perform the search
rf_model.fit(X, y)
# report the best result

params = rf_model.best_params_
print("\tBest parameters:", params)

score = rf_model.best_score_
print("\tBest score:", score)

result = pd.DataFrame(rf_model.cv_results_)

In [None]:
scores = pd.Dataframe(scores)
scores.mean(axis=0)

In [None]:
final_svc = SVC()
scores = cross_validate(final_svc, X, y, scoring=outer_metrics, cv=cv_outer, n_jobs=-1)

scores = pd.DataFrame(scores)
scores.mean(axis=0)

In [None]:
# define the model
rf = MultinomialNB()

# define parameters
grid_params = {'alpha':Real(1e-6, 1e+6, prior='log-uniform'),
               'fit_prior': [True, False]}

rf_grid = BayesSearchCV(estimator = rf, search_spaces = bayes_params, cv = cv_inner, verbose=3,  n_jobs = -1, n_iter = 30)

# execute the nested cross-validation
scores = cross_validate(rf_grid, X, y, scoring=outer_metrics, verbose=3, cv=cv_outer, n_jobs=-1)

#Best
rf_grid.fit(X, y)
print("Best parameters:  ", rf_grid.best_params_)
print("Best score: ", rf_grid.best_score_)