# Imports

In [None]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import (cross_val_score, cross_val_predict,
                                     StratifiedKFold, permutation_test_score)
from sklearn.feature_selection import SelectKBest
from sklearn.metrics import roc_auc_score

seed = 243452

# Set parameters

In [None]:
n_inform = 100
n_samples = 1000
n_samples_signal = 400
n_samples_no_signal = n_samples - n_samples_signal

In [None]:
X_1, y = make_classification(n_samples=n_samples,
                             n_features=1000,
                             n_informative=n_inform,
                             n_redundant=0,
                             shuffle=True,
                             random_state=seed)

In [None]:
clf = LogisticRegression()
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)

In [None]:
scores = cross_val_score(clf, X_1, y, cv=cv, scoring='roc_auc')

In [None]:
print(scores)
print(scores.mean(), scores.std())


In [None]:
kbest = SelectKBest(k=n_inform)
kbest.fit(X_1, y)

In [None]:
# remove information from X
X_2 = X_1.copy()
X_2 = X_2[:, kbest.get_support() != True]
X_2 = np.concatenate((np.random.permutation(X_2[:, -n_inform:]), X_2), axis=1)

In [None]:
n_samples_no_signal

In [None]:
# Make combine X
X_mixed = np.concatenate((X_1[:n_samples_signal], X_2[n_samples_signal:]))

In [None]:
scores_mixed = cross_val_score(clf, X_mixed, y, cv=cv, scoring='roc_auc')

In [None]:
print(scores_mixed)
print(scores_mixed.mean(), scores_mixed.std())

In [None]:
scores_2 = cross_val_score(clf, X_2, y, cv=cv, scoring='roc_auc')

In [None]:
print(scores_2)
print(scores_2.mean(), scores_2.std())

In [None]:
clf = LogisticRegression()
clf.fit(X_1, y)
roc_auc_score(y, clf.predict(X_2))