In [1]:
from sklearn.datasets import make_classification

In [37]:
x,y = make_classification(n_samples=1000,n_features=20,n_informative=15,n_redundant=5,
                         random_state=7)

In [3]:
x.shape , y.shape

((1000, 20), (1000,))

In [7]:
x[0] , y[0]

(array([ 0.2929949 , -4.21223056, -1.288332  , -2.17849815, -0.64527665,
         2.58097719,  0.28422388, -7.1827928 , -1.91211104,  2.73729512,
         0.81395695,  3.96973717, -2.66939799,  3.34692332,  4.19791821,
         0.99990998, -0.30201875, -4.43170633, -2.82646737,  0.44916808]),
 1)

Train-Test Evaluation With Naive Data Preparation

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [9]:
scaler = MinMaxScaler()

In [10]:
x = scaler.fit_transform(x)

In [11]:
x[0]

array([0.47831936, 0.18693602, 0.42403057, 0.42931963, 0.58533255,
       0.67424929, 0.529972  , 0.31480621, 0.39374285, 0.72749419,
       0.51740542, 0.53017703, 0.2799069 , 0.63190289, 0.74417484,
       0.5808874 , 0.5110039 , 0.2731369 , 0.34014356, 0.54389622])

In [12]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size = 0.33,random_state=1)

In [13]:
model = LogisticRegression()

In [14]:
model.fit(xtrain,ytrain)

In [15]:
ypred = model.predict(xtest)

In [16]:
accuracy = accuracy_score(ytest,ypred)
print(accuracy*100)

84.84848484848484


Train-Test Evaluation With Correct Data Preparation

In [18]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size = 0.33,random_state=1)

In [19]:
scaler = MinMaxScaler()

In [20]:
scaler.fit(xtrain)

In [21]:
xtrain = scaler.transform(xtrain)
xtest = scaler.transform(xtest)

In [22]:
model = LogisticRegression()

In [23]:
model.fit(xtrain,ytrain)

In [24]:
ypred = model.predict(xtest)

In [25]:
accuracy = accuracy_score(ytest,ypred)

In [26]:
print(accuracy*100)

85.15151515151516


k-fold Cross-Validation Evaluation With Naive Data Preparation

In [28]:
scaler = MinMaxScaler()

In [29]:
x = scaler.fit_transform(x)

In [30]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold

In [31]:
model = LogisticRegression()

In [32]:
cv = RepeatedStratifiedKFold(n_splits=10 , n_repeats=20,random_state=1)

In [33]:
scores = cross_val_score(model , x,y,scoring = 'accuracy',cv = cv,n_jobs = -1)

In [35]:
from numpy import mean
from numpy import std
print(mean(scores)*100 , std(scores)*100)

85.35499999999999 3.468281274637339


 Cross-Validation Evaluation With Correct Data Preparation

In [36]:
from sklearn.pipeline import Pipeline

In [38]:
steps = list()

In [39]:
steps.append(('Scaler',MinMaxScaler()))
steps.append(('model',LogisticRegression()))
pipeline = Pipeline(steps = steps)

In [40]:
cv = RepeatedStratifiedKFold(n_splits=10,n_repeats=3,random_state=1)

In [41]:
scores = cross_val_score(pipeline,x,y,scoring='accuracy',cv=cv,n_jobs=-1)

In [42]:
mean(scores)*100 , std(scores)*100

(85.39999999999999, 3.48903042883454)