In [64]:
# Reference : https://elitedatascience.com/python-machine-learning-tutorial-scikit-learn
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split # Sampling helper
from sklearn import preprocessing # preprocessing module
from sklearn.feature_selection import SelectKBest  # Feature selection
from sklearn.ensemble import RandomForestRegressor # Random forest family
from sklearn.pipeline import make_pipeline # CV tools
from sklearn.pipeline import Pipeline 
from sklearn.model_selection import GridSearchCV # CV tools
from sklearn.metrics import mean_squared_error, r2_score # for evaluating model performance
from sklearn.externals import joblib # for saving the models 
import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn import tree
import xgboost as xgb
from sklearn.metrics import mean_absolute_error
import os


In [16]:
# Load data from Web
dataset_url = 'http://mlr.cs.umass.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
data = pd.read_csv(dataset_url)
# We missed using the seperator while loading data
data=pd.read_csv(dataset_url, sep=';')
data.head() 

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [17]:
# Seperate target from training features
y = data.quality
X = data.drop('quality', axis=1)

In [18]:
# Split train test data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=123,stratify=y) 
# random_state : To reproduce the results as always same
# stratify = y : Target variable. Eg. Data has 25% 0 and 75 % 1 in target variable -y. split will also have records with same
#                proportional target variable


In [32]:
# Construct some pipelines
pipe_lr = Pipeline([('scl', preprocessing.StandardScaler()),
                    ('select_best', SelectKBest(k=6)),
                    ('clf', LogisticRegression(random_state=42))])

pipe_svm = Pipeline([('scl', preprocessing.StandardScaler()),
                     ('select_best', SelectKBest(k=6)),
                     ('clf', svm.SVC(random_state=42))])

pipe_dt = Pipeline([('scl', preprocessing.StandardScaler()),
                    ('select_best', SelectKBest(k=6)),
                    ('clf', tree.DecisionTreeClassifier(random_state=42))])

pipe_xgb = Pipeline([('scl', preprocessing.StandardScaler()),
                    ('select_best', SelectKBest(k=6)),
                    ('clf', xgb.XGBClassifier(random_state=42))])

In [33]:
# List of pipelines for ease of iteration
pipelines = [pipe_lr, pipe_svm, pipe_dt,pipe_xgb]

# Dictionary of pipelines and classifier types for ease of reference
pipe_dict = {0: 'Logistic Regression', 1: 'Support Vector Machine', 2: 'Decision Tree', 3: 'XGB'}

In [34]:
# Compare accuracies in test data
for idx, val in enumerate(pipelines):
    val.fit(X_train, y_train)
    print('%s pipeline test accuracy: %.3f' % (pipe_dict[idx], val.score(X_test, y_test)))

Logistic Regression pipeline test accuracy: 0.572
Support Vector Machine pipeline test accuracy: 0.591
Decision Tree pipeline test accuracy: 0.622
XGB pipeline test accuracy: 0.603


In [35]:
# Identify the most accurate model on test data
best_acc = 0.0
best_clf = 0
best_pipe = ''
for idx, val in enumerate(pipelines):
    if val.score(X_test, y_test) > best_acc:
        best_acc = val.score(X_test, y_test)
        best_pipe = val
        best_clf = idx
print('Classifier with best accuracy: %s' % pipe_dict[best_clf])

Classifier with best accuracy: Decision Tree


In [37]:
# To understand which features are used in the best pipeline
# Reference : https://github.com/scikit-learn/scikit-learn/issues/7536
select_indices = best_pipe.named_steps['select_best'].transform(
    np.arange(len(X_train.columns)).reshape(1, -1)
)
feature_names = X_train.columns[select_indices]
feature_names

Index([['volatile acidity', 'citric acid', 'total sulfur dioxide', 'density', 'sulphates', 'alcohol']], dtype='object')

In [38]:
# We are selecting XGBoost and trying to tune it more
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [39]:
params = {
    # Parameters that we are going to tune.
    'max_depth':6,
    'min_child_weight': 1,
    'eta':.3,
    'subsample': 1,
    'colsample_bytree': 1,
    # Other parameters
    'objective':'reg:linear',
}
params['eval_metric'] = "mae"
num_boost_round = 999

In [40]:
model = xgb.train(
    params,
    dtrain,
    num_boost_round=num_boost_round,
    evals=[(dtest, "Test")],
    early_stopping_rounds=10
)

[0]	Test-mae:3.60132
Will train until Test-mae hasn't improved in 10 rounds.
[1]	Test-mae:2.52454
[2]	Test-mae:1.78974
[3]	Test-mae:1.29322
[4]	Test-mae:0.960265
[5]	Test-mae:0.745897
[6]	Test-mae:0.618605
[7]	Test-mae:0.556925
[8]	Test-mae:0.521014
[9]	Test-mae:0.503697
[10]	Test-mae:0.491839
[11]	Test-mae:0.487743
[12]	Test-mae:0.48485
[13]	Test-mae:0.479677
[14]	Test-mae:0.47905
[15]	Test-mae:0.477097
[16]	Test-mae:0.476059
[17]	Test-mae:0.467647
[18]	Test-mae:0.467863
[19]	Test-mae:0.464524
[20]	Test-mae:0.458683
[21]	Test-mae:0.454049
[22]	Test-mae:0.452066
[23]	Test-mae:0.45233
[24]	Test-mae:0.451817
[25]	Test-mae:0.448922
[26]	Test-mae:0.45043
[27]	Test-mae:0.449446
[28]	Test-mae:0.449242
[29]	Test-mae:0.449779
[30]	Test-mae:0.449267
[31]	Test-mae:0.44877
[32]	Test-mae:0.448487
[33]	Test-mae:0.447805
[34]	Test-mae:0.446794
[35]	Test-mae:0.443683
[36]	Test-mae:0.443083
[37]	Test-mae:0.442846
[38]	Test-mae:0.442079
[39]	Test-mae:0.440409
[40]	Test-mae:0.437447
[41]	Test-mae:0.4379

In [41]:
print("Best MAE: {:.2f} with {} rounds".format(
                 model.best_score,
                 model.best_iteration+1))

Best MAE: 0.43 with 63 rounds


In [42]:
cv_results = xgb.cv(
    params,
    dtrain,
    num_boost_round=num_boost_round,
    seed=42,
    nfold=5,
    metrics={'mae'},
    early_stopping_rounds=10
)

cv_results

Unnamed: 0,test-mae-mean,test-mae-std,train-mae-mean,train-mae-std
0,3.603634,0.037727,3.602256,0.006436
1,2.526601,0.035555,2.527314,0.005073
2,1.780472,0.034488,1.777668,0.003358
3,1.274145,0.033301,1.257809,0.002446
4,0.943049,0.029007,0.902930,0.002072
5,0.725343,0.022921,0.664967,0.002062
6,0.600883,0.019795,0.512613,0.004750
7,0.536407,0.018764,0.425674,0.004549
8,0.505434,0.015999,0.373991,0.005113
9,0.491461,0.014334,0.343761,0.007701


In [43]:
cv_results['test-mae-mean'].min()

0.4420398

In [45]:
# ####  Parameters max_depth and min_child_weight   #####

# You can try wider intervals with a larger step between
# each value and then narrow it down. Here after several
# iteration I found that the optimal value was in the
# following ranges.
gridsearch_params = [
    (max_depth, min_child_weight)
    for max_depth in range(9,12)
    for min_child_weight in range(5,8)
]

# Define initial best params and MAE
min_mae = float("Inf")
best_params = None
for max_depth, min_child_weight in gridsearch_params:
    print("CV with max_depth={}, min_child_weight={}".format(
                             max_depth,
                             min_child_weight))

    # Update our parameters
    params['max_depth'] = max_depth
    params['min_child_weight'] = min_child_weight

    # Run CV
    cv_results = xgb.cv(
        params,
        dtrain,
        num_boost_round=num_boost_round,
        seed=42,
        nfold=5,
        metrics={'mae'},
        early_stopping_rounds=10
    )

    # Update best MAE
    mean_mae = cv_results['test-mae-mean'].min()
    boost_rounds = cv_results['test-mae-mean'].argmin()
    print("\tMAE {} for {} rounds".format(mean_mae, boost_rounds))
    if mean_mae < min_mae:
        min_mae = mean_mae
        best_params = (max_depth,min_child_weight)

print("Best params: {}, {}, MAE: {}".format(best_params[0], best_params[1], min_mae))

CV with max_depth=9, min_child_weight=5
	MAE 0.444947 for 145 rounds
CV with max_depth=9, min_child_weight=6
	MAE 0.4401874 for 165 rounds
CV with max_depth=9, min_child_weight=7
	MAE 0.45239060000000003 for 180 rounds
CV with max_depth=10, min_child_weight=5
	MAE 0.4440518 for 129 rounds
CV with max_depth=10, min_child_weight=6
	MAE 0.4409674 for 158 rounds
CV with max_depth=10, min_child_weight=7
	MAE 0.44899940000000005 for 216 rounds
CV with max_depth=11, min_child_weight=5
	MAE 0.4473432 for 118 rounds
CV with max_depth=11, min_child_weight=6
	MAE 0.4369554 for 163 rounds
CV with max_depth=11, min_child_weight=7
	MAE 0.4467796 for 156 rounds
Best params: 11, 6, MAE: 0.4369554


In [46]:
params['max_depth'] = 11
params['min_child_weight'] = 6

In [47]:
# ####  Parameters subsample and colsample_bytree   #####

gridsearch_params = [
    (subsample, colsample)
    for subsample in [i/10. for i in range(7,11)]
    for colsample in [i/10. for i in range(7,11)]
]
min_mae = float("Inf")
best_params = None

# We start by the largest values and go down to the smallest
for subsample, colsample in reversed(gridsearch_params):
    print("CV with subsample={}, colsample={}".format(
                             subsample,
                             colsample))

    # We update our parameters
    params['subsample'] = subsample
    params['colsample_bytree'] = colsample

    # Run CV
    cv_results = xgb.cv(
        params,
        dtrain,
        num_boost_round=num_boost_round,
        seed=42,
        nfold=5,
        metrics={'mae'},
        early_stopping_rounds=10
    )

    # Update best score
    mean_mae = cv_results['test-mae-mean'].min()
    boost_rounds = cv_results['test-mae-mean'].argmin()
    print("\tMAE {} for {} rounds".format(mean_mae, boost_rounds))
    if mean_mae < min_mae:
        min_mae = mean_mae
        best_params = (subsample,colsample)

print("Best params: {}, {}, MAE: {}".format(best_params[0], best_params[1], min_mae))

CV with subsample=1.0, colsample=1.0
	MAE 0.4369554 for 163 rounds
CV with subsample=1.0, colsample=0.9
	MAE 0.44720079999999995 for 175 rounds
CV with subsample=1.0, colsample=0.8
	MAE 0.44263059999999993 for 140 rounds
CV with subsample=1.0, colsample=0.7
	MAE 0.4411468 for 144 rounds
CV with subsample=0.9, colsample=1.0
	MAE 0.4411554000000001 for 155 rounds
CV with subsample=0.9, colsample=0.9
	MAE 0.4441414 for 173 rounds
CV with subsample=0.9, colsample=0.8
	MAE 0.43517339999999993 for 174 rounds
CV with subsample=0.9, colsample=0.7
	MAE 0.44569200000000003 for 159 rounds
CV with subsample=0.8, colsample=1.0
	MAE 0.4593526 for 193 rounds
CV with subsample=0.8, colsample=0.9
	MAE 0.45411660000000004 for 203 rounds
CV with subsample=0.8, colsample=0.8
	MAE 0.45859440000000007 for 186 rounds
CV with subsample=0.8, colsample=0.7
	MAE 0.4529866 for 189 rounds
CV with subsample=0.7, colsample=1.0
	MAE 0.45007459999999994 for 230 rounds
CV with subsample=0.7, colsample=0.9
	MAE 0.459487

In [48]:
params['subsample'] = .9
params['colsample_bytree'] = 0.8

In [56]:
# ####  Parameters ETA   #####
# This can take some time…
min_mae = float("Inf")
best_params = None
for eta in [.3, .2, .1, .05, .01, .005]:
    print("CV with eta={}".format(eta))
    # We update our parameters
    params['eta'] = eta
    # Run and time CV
    cv_results = xgb.cv(params,dtrain,num_boost_round=num_boost_round,seed=42,nfold=5,metrics=['mae'],early_stopping_rounds=10
                              )
    # Update best score
    mean_mae = cv_results['test-mae-mean'].min()
    boost_rounds = cv_results['test-mae-mean'].argmin()
    print("\tMAE {} for {} rounds\n".format(mean_mae, boost_rounds))
    if mean_mae < min_mae:
        min_mae = mean_mae
        best_params = eta
print("Best params: {}, MAE: {}".format(best_params, min_mae))

CV with eta=0.3
	MAE 0.43517339999999993 for 174 rounds

CV with eta=0.2
	MAE 0.43249960000000004 for 245 rounds

CV with eta=0.1
	MAE 0.4244778 for 481 rounds

CV with eta=0.05
	MAE 0.42598579999999997 for 912 rounds

CV with eta=0.01
	MAE 0.4300216 for 996 rounds

CV with eta=0.005
	MAE 0.44242780000000004 for 998 rounds

Best params: 0.1, MAE: 0.4244778


In [57]:
params['eta'] = .01

In [58]:
params

{'colsample_bytree': 0.8,
 'eta': 0.01,
 'eval_metric': 'mae',
 'max_depth': 11,
 'min_child_weight': 6,
 'objective': 'reg:linear',
 'subsample': 0.9}

In [59]:
model = xgb.train(
    params,
    dtrain,
    num_boost_round=num_boost_round,
    evals=[(dtest, "Test")],
    early_stopping_rounds=10
)
print("Best MAE: {:.2f} in {} rounds".format(model.best_score, model.best_iteration+1))

[0]	Test-mae:5.07983
Will train until Test-mae hasn't improved in 10 rounds.
[1]	Test-mae:5.02927
[2]	Test-mae:4.9796
[3]	Test-mae:4.93004
[4]	Test-mae:4.88088
[5]	Test-mae:4.83246
[6]	Test-mae:4.78458
[7]	Test-mae:4.73696
[8]	Test-mae:4.68962
[9]	Test-mae:4.64286
[10]	Test-mae:4.5965
[11]	Test-mae:4.55097
[12]	Test-mae:4.50567
[13]	Test-mae:4.46131
[14]	Test-mae:4.41675
[15]	Test-mae:4.37285
[16]	Test-mae:4.3294
[17]	Test-mae:4.28629
[18]	Test-mae:4.24366
[19]	Test-mae:4.20155
[20]	Test-mae:4.15973
[21]	Test-mae:4.11863
[22]	Test-mae:4.07766
[23]	Test-mae:4.03746
[24]	Test-mae:3.99728
[25]	Test-mae:3.95751
[26]	Test-mae:3.91846
[27]	Test-mae:3.87951
[28]	Test-mae:3.84103
[29]	Test-mae:3.80279
[30]	Test-mae:3.76532
[31]	Test-mae:3.72782
[32]	Test-mae:3.69092
[33]	Test-mae:3.65428
[34]	Test-mae:3.61804
[35]	Test-mae:3.58212
[36]	Test-mae:3.54651
[37]	Test-mae:3.5111
[38]	Test-mae:3.47625
[39]	Test-mae:3.44164
[40]	Test-mae:3.40784
[41]	Test-mae:3.37388
[42]	Test-mae:3.34043
[43]	Test-ma

[353]	Test-mae:0.452503
[354]	Test-mae:0.452074
[355]	Test-mae:0.45149
[356]	Test-mae:0.451042
[357]	Test-mae:0.450541
[358]	Test-mae:0.45022
[359]	Test-mae:0.449902
[360]	Test-mae:0.449369
[361]	Test-mae:0.448814
[362]	Test-mae:0.448453
[363]	Test-mae:0.447886
[364]	Test-mae:0.447347
[365]	Test-mae:0.446837
[366]	Test-mae:0.446342
[367]	Test-mae:0.445861
[368]	Test-mae:0.445502
[369]	Test-mae:0.445126
[370]	Test-mae:0.444594
[371]	Test-mae:0.44401
[372]	Test-mae:0.44381
[373]	Test-mae:0.443505
[374]	Test-mae:0.443081
[375]	Test-mae:0.442677
[376]	Test-mae:0.442432
[377]	Test-mae:0.442189
[378]	Test-mae:0.441795
[379]	Test-mae:0.441252
[380]	Test-mae:0.440955
[381]	Test-mae:0.440533
[382]	Test-mae:0.440243
[383]	Test-mae:0.439996
[384]	Test-mae:0.439634
[385]	Test-mae:0.439113
[386]	Test-mae:0.438851
[387]	Test-mae:0.438659
[388]	Test-mae:0.438353
[389]	Test-mae:0.437955
[390]	Test-mae:0.437594
[391]	Test-mae:0.437183
[392]	Test-mae:0.436874
[393]	Test-mae:0.436506
[394]	Test-mae:0.436

[696]	Test-mae:0.405255
[697]	Test-mae:0.405208
[698]	Test-mae:0.405188
[699]	Test-mae:0.40513
[700]	Test-mae:0.405117
[701]	Test-mae:0.40504
[702]	Test-mae:0.404974
[703]	Test-mae:0.404977
[704]	Test-mae:0.404883
[705]	Test-mae:0.404814
[706]	Test-mae:0.404828
[707]	Test-mae:0.404829
[708]	Test-mae:0.404761
[709]	Test-mae:0.40479
[710]	Test-mae:0.404797
[711]	Test-mae:0.404788
[712]	Test-mae:0.40471
[713]	Test-mae:0.404706
[714]	Test-mae:0.40455
[715]	Test-mae:0.404496
[716]	Test-mae:0.404453
[717]	Test-mae:0.404386
[718]	Test-mae:0.404342
[719]	Test-mae:0.404296
[720]	Test-mae:0.404163
[721]	Test-mae:0.404143
[722]	Test-mae:0.404136
[723]	Test-mae:0.404071
[724]	Test-mae:0.404125
[725]	Test-mae:0.404146
[726]	Test-mae:0.40417
[727]	Test-mae:0.40402
[728]	Test-mae:0.403981
[729]	Test-mae:0.403923
[730]	Test-mae:0.403867
[731]	Test-mae:0.403724
[732]	Test-mae:0.403703
[733]	Test-mae:0.403708
[734]	Test-mae:0.403674
[735]	Test-mae:0.403578
[736]	Test-mae:0.403593
[737]	Test-mae:0.403613

In [62]:
num_boost_round = model.best_iteration + 1
best_model = xgb.train(
    params,
    dtrain,
    num_boost_round=num_boost_round,
    evals=[(dtest, "Test")]
)
mean_absolute_error(best_model.predict(dtest), y_test)

[0]	Test-mae:5.07983
[1]	Test-mae:5.02927
[2]	Test-mae:4.9796
[3]	Test-mae:4.93004
[4]	Test-mae:4.88088
[5]	Test-mae:4.83246
[6]	Test-mae:4.78458
[7]	Test-mae:4.73696
[8]	Test-mae:4.68962
[9]	Test-mae:4.64286
[10]	Test-mae:4.5965
[11]	Test-mae:4.55097
[12]	Test-mae:4.50567
[13]	Test-mae:4.46131
[14]	Test-mae:4.41675
[15]	Test-mae:4.37285
[16]	Test-mae:4.3294
[17]	Test-mae:4.28629
[18]	Test-mae:4.24366
[19]	Test-mae:4.20155
[20]	Test-mae:4.15973
[21]	Test-mae:4.11863
[22]	Test-mae:4.07766
[23]	Test-mae:4.03746
[24]	Test-mae:3.99728
[25]	Test-mae:3.95751
[26]	Test-mae:3.91846
[27]	Test-mae:3.87951
[28]	Test-mae:3.84103
[29]	Test-mae:3.80279
[30]	Test-mae:3.76532
[31]	Test-mae:3.72782
[32]	Test-mae:3.69092
[33]	Test-mae:3.65428
[34]	Test-mae:3.61804
[35]	Test-mae:3.58212
[36]	Test-mae:3.54651
[37]	Test-mae:3.5111
[38]	Test-mae:3.47625
[39]	Test-mae:3.44164
[40]	Test-mae:3.40784
[41]	Test-mae:3.37388
[42]	Test-mae:3.34043
[43]	Test-mae:3.30711
[44]	Test-mae:3.27406
[45]	Test-mae:3.24172
[4

[355]	Test-mae:0.45149
[356]	Test-mae:0.451042
[357]	Test-mae:0.450541
[358]	Test-mae:0.45022
[359]	Test-mae:0.449902
[360]	Test-mae:0.449369
[361]	Test-mae:0.448814
[362]	Test-mae:0.448453
[363]	Test-mae:0.447886
[364]	Test-mae:0.447347
[365]	Test-mae:0.446837
[366]	Test-mae:0.446342
[367]	Test-mae:0.445861
[368]	Test-mae:0.445502
[369]	Test-mae:0.445126
[370]	Test-mae:0.444594
[371]	Test-mae:0.44401
[372]	Test-mae:0.44381
[373]	Test-mae:0.443505
[374]	Test-mae:0.443081
[375]	Test-mae:0.442677
[376]	Test-mae:0.442432
[377]	Test-mae:0.442189
[378]	Test-mae:0.441795
[379]	Test-mae:0.441252
[380]	Test-mae:0.440955
[381]	Test-mae:0.440533
[382]	Test-mae:0.440243
[383]	Test-mae:0.439996
[384]	Test-mae:0.439634
[385]	Test-mae:0.439113
[386]	Test-mae:0.438851
[387]	Test-mae:0.438659
[388]	Test-mae:0.438353
[389]	Test-mae:0.437955
[390]	Test-mae:0.437594
[391]	Test-mae:0.437183
[392]	Test-mae:0.436874
[393]	Test-mae:0.436506
[394]	Test-mae:0.436182
[395]	Test-mae:0.435789
[396]	Test-mae:0.435

[698]	Test-mae:0.405188
[699]	Test-mae:0.40513
[700]	Test-mae:0.405117
[701]	Test-mae:0.40504
[702]	Test-mae:0.404974
[703]	Test-mae:0.404977
[704]	Test-mae:0.404883
[705]	Test-mae:0.404814
[706]	Test-mae:0.404828
[707]	Test-mae:0.404829
[708]	Test-mae:0.404761
[709]	Test-mae:0.40479
[710]	Test-mae:0.404797
[711]	Test-mae:0.404788
[712]	Test-mae:0.40471
[713]	Test-mae:0.404706
[714]	Test-mae:0.40455
[715]	Test-mae:0.404496
[716]	Test-mae:0.404453
[717]	Test-mae:0.404386
[718]	Test-mae:0.404342
[719]	Test-mae:0.404296
[720]	Test-mae:0.404163
[721]	Test-mae:0.404143
[722]	Test-mae:0.404136
[723]	Test-mae:0.404071
[724]	Test-mae:0.404125
[725]	Test-mae:0.404146
[726]	Test-mae:0.40417
[727]	Test-mae:0.40402
[728]	Test-mae:0.403981
[729]	Test-mae:0.403923
[730]	Test-mae:0.403867
[731]	Test-mae:0.403724
[732]	Test-mae:0.403703
[733]	Test-mae:0.403708
[734]	Test-mae:0.403674
[735]	Test-mae:0.403578
[736]	Test-mae:0.403593
[737]	Test-mae:0.403613
[738]	Test-mae:0.403646
[739]	Test-mae:0.403581

0.3952957034111023

In [66]:
best_model.save_model("my_model.model")
print(os.getcwd())
loaded_model = xgb.Booster()
loaded_model.load_model("my_model.model")

# And use it for predictions.
loaded_model.predict(dtest)

C:\Users\S719789


array([6.3422294, 5.53761  , 4.824486 , 5.5407825, 6.5453014, 5.6014166,
       4.9063163, 4.5156164, 5.007569 , 5.9649997, 5.4268074, 5.8138485,
       5.8935633, 5.0658545, 5.8576674, 5.8261523, 6.6784153, 5.9021482,
       5.862014 , 6.990755 , 5.290447 , 5.804095 , 5.033767 , 6.225123 ,
       6.02978  , 5.04914  , 5.668996 , 5.1588306, 6.2753477, 5.960626 ,
       5.997584 , 6.6348147, 6.021369 , 4.967975 , 4.7627974, 6.020675 ,
       4.935376 , 6.3050947, 4.7564244, 6.0034094, 4.880407 , 6.1122694,
       6.700346 , 5.123817 , 6.0976863, 5.431679 , 5.546999 , 5.6332965,
       5.142326 , 6.7484183, 5.773475 , 4.9480267, 5.6759753, 5.27335  ,
       5.4737463, 5.4351625, 5.1378493, 5.4285407, 4.970747 , 5.481803 ,
       5.409589 , 5.096921 , 4.935816 , 5.964625 , 6.23167  , 4.974471 ,
       6.3100457, 5.011791 , 4.9881473, 6.8983784, 5.8802752, 5.7458377,
       5.0196958, 4.9791074, 5.361423 , 5.98528  , 5.286178 , 5.120259 ,
       5.043299 , 5.3218675, 6.577366 , 5.753221 , 