### Outline:

0. Load libraries and custom functions.
1. Load data.
2. Preliminary data analysis: explore features and a target, delete unneeded features, create new features.
3. Train-test split.
4. Missing values. In some cases it may be useful to explore skew and perform log-transform before imputing missing values.
5. Feature engineering. Transform skewed variables, do OHC and scaling.
6. Fit models.
7. Evaluate models.
8. Feature importance, error analysis. Based on the results, go to 2. and iterate.
9. Make predictions.

To do: 
- try directly using roc curve.
- use feature importance to eliminate most features. does not work.
- try pca. does not work.
- [if evth fails] try nn.


In [63]:
import numpy as np
import pandas as pd
import os, time, warnings, random
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelBinarizer, LabelEncoder, OrdinalEncoder, OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, cross_val_predict, GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_curve
from sklearn.inspection import permutation_importance
from sklearn.decomposition import PCA
from xgboost import XGBClassifier

pd.set_option('display.max_columns', 20)
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.expand_frame_repr', False)
warnings.filterwarnings('ignore')

def draw_histograms(df, variables, n_rows, n_cols):
    # stolen from https://stackoverflow.com/questions/29530355/plotting-multiple-histograms-in-grid
    fig=plt.figure()
    for i, var_name in enumerate(variables):
        ax=fig.add_subplot(n_rows,n_cols,i+1)
        df[var_name].hist(bins=10,ax=ax)
        ax.set_title(var_name+" Distribution")
    fig.tight_layout()  
    plt.show()


def fillna_mp_i1(df_train, df_test, df_pred, num_features, cat_features, num_fill='median', cat_fill='mode'):
    """This function speeds up filling missing values for 3 main datasets using different imputation methods.
    Later may replace it with some subclass.
    Example: fillna_mp_i1(X_train, X_test, X_pred, num_cols, cat_cols)"""
    # set df_pred to None if it does not exist
    if not ((cat_fill=='mode') and (num_fill=='median')):
        print ('Imputation method not Implemented yet!')
        return None
    
    df_train[num_features] = df_train[num_features].fillna(value=df_train[num_features].median())
    df_test[num_features] = df_test[num_features].fillna(value=df_train[num_features].median())
    df_train[cat_features] = df_train[cat_features].fillna(value=df_train[cat_features].mode().iloc[0])
    df_test[cat_features] = df_test[cat_features].fillna(value=df_train[cat_features].mode().iloc[0])
    if (df_pred is not None):
        df_pred[num_features] = df_pred[num_features].fillna(value=df_train[num_features].median())
        df_pred[cat_features] = df_pred[cat_features].fillna(value=df_train[cat_features].mode().iloc[0])
    df_train[num_features+cat_features].count
    
    all_good = (
    (np.prod(df_train[num_features+cat_features].shape)==df_train[num_features+cat_features].count().sum()) and 
    (np.prod(df_test[num_features+cat_features].shape) == df_test[num_features+cat_features].count().sum()) and 
    (np.prod(df_pred[num_features+cat_features].shape) == df_pred[num_features+cat_features].count().sum()))
    if (all_good):
        print('Missing values imputed successfully')
    else:
        print('There are still some missing values...')
    
def add_misDummy_mp_i1(df_train, df_test, df_pred, features):
    """This function creates new dummy columns for missing features.
    Example: add_misDummy_mp_i1(X_train, X_test, X_pred, ['Age'])"""
    # set df_pred to None if it does not exist
    for feature_name in features:
        misColName = 'mis'+feature_name
        df_train.loc[df_train[feature_name].isnull(), misColName]=1
        df_train.loc[df_train[feature_name].notnull(), misColName]=0
        df_test.loc[df_test[feature_name].isnull(), misColName]=1
        df_test.loc[df_test[feature_name].notnull(), misColName]=0
        if (df_pred is not None):
            df_pred.loc[df_pred[feature_name].isnull(), misColName]=1
            df_pred.loc[df_pred[feature_name].notnull(), misColName]=0
   

def discretize_mp_i1(df_train, df_test, df_pred, feature, ntiles, delete_feature=False):
    """This function divides a continuous feature into quantile groups.
    Example: discretize_mp_i1(X_train, X_test, X_pred, 'Age', 15)"""
    # set df_pred to None if it does not exist
    _,bin = pd.qcut(df_train[feature], ntiles, retbins = True, labels = False, duplicates = 'drop')
    df_train[feature+'Ntile'] = pd.cut(df_train[feature], labels=False, duplicates = 'drop', bins = bin ,include_lowest = True)
    df_test[feature+'Ntile'] = pd.cut(df_test[feature], labels=False, duplicates = 'drop', bins = bin ,include_lowest = True)
    if (df_pred is not None):
        df_pred[feature+'Ntile'] = pd.cut(df_pred[feature], labels=False, duplicates = 'drop', bins = bin ,include_lowest = True)
    if (delete_feature==True):
        df_train.drop(columns=[feature], inplace=True)
        df_test.drop(columns=[feature], inplace=True)
        df_pred.drop(columns=[feature], inplace=True)
    print('Discretized ',feature, ' into ', len(bin)-1, ' bins')


def log_transformer_mp_i1(df_train, df_test, df_pred, feature_subset=False, min_skew=3):
    """This function divides a continuous feature into quantile groups.
    Example: log_transformer_mp_i1(X_train, X_test, X_pred, feature_subset=num_cols)"""
    # set df_pred to None if it does not exist
    if (feature_subset==False):
        features_totransform = df_train.columns
    else:
        features_totransform = feature_subset.copy()
    skewed_vars = list(df_train.skew()[abs(df_train.skew())>min_skew].index)
    for col in list(set(skewed_vars)&set(features_totransform)):
        df_train[col] = np.log1p(df_train[col])
        df_test[col] = np.log1p(df_test[col])
        if (df_pred is not None):
            df_pred[col] = np.log1p(df_pred[col])
    print('Skewed columns log-transformed: ', list(set(skewed_vars)&set(features_totransform)))
    
    
def add_dummyfeatures(df_train, df_test, df_pred, feature_dict):
    """This function adds dummy feature when some feature is equal to value, specified in a dictionary.
    Example: add_dummyfeatures(X_train, X_test, X_pred, {'RoomService':0, 'Spa':0, 'VRDeck':0, 'ShoppingMall':0})"""
    input_dimensions = np.array([df_train.shape[1], df_test.shape[1], df_pred.shape[1]])
    for i in range(len(list(feature_dict.items()))):
        feature,value = list(feature_dict.keys())[i], list(feature_dict.values())[i]
        df_train.loc[df_train[feature]==value,(str(feature)+str(value))]=1
        df_train.loc[df_train[feature]!=value,(str(feature)+str(value))]=0
        df_test.loc[df_test[feature]==value,(str(feature)+str(value))]=1
        df_test.loc[df_test[feature]!=value,(str(feature)+str(value))]=0
        df_pred.loc[df_pred[feature]==value,(str(feature)+str(value))]=1
        df_pred.loc[df_pred[feature]!=value,(str(feature)+str(value))]=0
    output_dimensions = np.array([df_train.shape[1], df_test.shape[1], df_pred.shape[1]])
    print(output_dimensions-input_dimensions, ' variables created') 
    

In [64]:
# 1. Import data #

time0 = time.time()

path = '../input/santander-customer-transaction-prediction/train.csv'
df = pd.read_csv(path) 
df0 = df.copy()
df = df.sample(50000)

#df.drop(columns=['Name', 'Ticket', 'Cabin', 'PassengerId'],inplace=True)
pred=pd.read_csv('../input/santander-customer-transaction-prediction/test.csv')
pred0 = pred.copy()
#pred.drop(columns=['Name', 'Ticket', 'Cabin', 'PassengerId'],inplace=True)

print(df.shape, pred.shape)
print(df.target.mean())
# unbalanced responsed variable
df.head()

(50000, 202) (200000, 201)
0.09932


Unnamed: 0,ID_code,target,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,...,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
29766,train_29766,0,4.0575,3.1282,10.9422,5.7491,10.9238,5.1884,4.218,17.1228,...,5.9104,6.0949,0.2068,0.9172,20.5987,0.9002,-3.0356,10.4767,19.3722,12.5272
160350,train_160350,0,11.0842,0.7578,12.29,6.2904,9.2915,5.6645,7.3467,16.6274,...,-0.9506,8.1208,1.2807,0.9828,15.3905,0.5163,-2.4259,9.5435,13.7225,-6.3445
77021,train_77021,0,9.742,-3.0096,9.9384,8.7416,9.4978,-16.4983,6.4157,11.6054,...,0.2453,7.178,2.4672,5.6003,11.1643,-1.0242,8.4328,9.4066,17.3258,-1.8064
170658,train_170658,0,12.7545,-7.1376,12.4236,6.5482,12.4901,-10.8539,5.7909,14.215,...,-1.8734,4.0006,-0.6499,0.9699,17.606,-0.976,10.5717,9.6052,14.4371,-24.0395
57252,train_57252,0,12.7732,-1.2835,17.899,6.6622,11.8737,5.205,6.0504,17.6241,...,-1.4776,3.3151,3.401,1.8828,16.4264,1.6129,-3.6881,9.3074,16.2287,-15.9875


In [65]:
# 2. EDA #

# with all features unnamed normally-distributed features, there is not much EDA and feature engineering to do.
df.drop(columns = ['ID_code'], inplace= True)

# 3. Train-test split #

train_y = df[['target']]
train_x = df.drop(columns = ['target'])
X_pred = pred.copy()
X_pred.drop(columns = ['ID_code'], inplace= True)
#train_x = train_x[features_i1]
#X_pred = X_pred[features_i1]

X_train, X_test, y_train, y_test = train_test_split(train_x, train_y, test_size = 0.02, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, X_pred.shape)

X_train.count().sum() == np.prod(X_train.shape)
# no missing values, good.

(49000, 200) (1000, 200) (49000, 1) (200000, 200)


True

In [66]:
# 5. feature engineering #

ss = StandardScaler()

for col in X_train.columns:
    X_train[[col]] = ss.fit_transform(X_train[[col]])
    X_test[[col]] = ss.transform(X_test[[col]])
    X_pred[[col]] = ss.transform(X_pred[[col]])

#X_test.iloc[:,:30].describe()
random.seed(1)
fewfeatures = random.sample(list(X_train.columns),5)

In [69]:
# 5.1 try PCA 

#pca = PCA(n_components=100)
#X_train = pca.fit_transform(X_train)
#X_test = pca.transform(X_test)


In [70]:
X_train

array([[-1.47896839,  1.49130082,  0.00890269, ...,  0.11907016,
        -1.45367916, -0.02672313],
       [-0.11243186, -0.80810058,  0.66568296, ...,  0.1414197 ,
        -1.1915048 , -0.37776644],
       [-0.86432551, -0.13644105, -1.57463357, ...,  1.47395466,
         1.38866493,  0.04686168],
       ...,
       [-0.43895756,  1.14252199,  1.41333481, ..., -0.74365259,
         0.09456825,  0.93623754],
       [-0.80432707,  0.16645594, -0.21297424, ..., -0.57880697,
        -0.94506783, -1.2441951 ],
       [-0.5152883 ,  0.71151737,  0.40286025, ...,  0.4692386 ,
         0.00521073,  0.60564372]])

In [71]:
# 6. Fit models #

time1 = time.time()
lr = LogisticRegression()
param_grid = {'C':[0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]}
lrm = GridSearchCV(lr, param_grid, cv=2, scoring='f1')
lrm.fit(X_train, y_train)
print('Logistic ', lrm.best_params_, lrm.best_score_, f1_score(y_train, lrm.predict(X_train)), time.time()-time1)

Logistic  {'C': 3} 0.31554391409898885 0.3167604752970607 1.896143913269043


In [None]:
time1 = time.time()
knn = KNeighborsClassifier(n_jobs=-1)
param_grid = dict(n_neighbors=range(10, 41, 10))
knnm = GridSearchCV(knn, param_grid, cv=2)
knnm.fit(X_train[fewfeatures], y_train)
print('KNN ', knnm.best_params_, f1_score(y_train, knnm.predict(X_train[fewfeatures])), time.time()-time1)

In [None]:
time1 = time.time()
rf = RandomForestClassifier(n_jobs=-1)
param_grid = {'n_estimators':[100], 'max_depth':[4,5,6], 'max_features':[10]}
rfm = GridSearchCV(rf, param_grid, cv=2, scoring = 'f1')
rfm.fit(X_train, y_train)
print('RF ', rfm.best_params_, rfm.best_score_, f1_score(y_train, rfm.predict(X_train)), time.time()-time1)

In [72]:
time1 = time.time()
param_grid_nb = {
    'var_smoothing': np.logspace(-3,-7, num=17)
}
nb = GaussianNB()
nbm = GridSearchCV(nb, param_grid_nb, cv=4, scoring='f1')
nbm.fit(X_train, y_train)
print('NB ', nbm.best_params_, nbm.best_score_, f1_score(y_train, nbm.predict(X_train)), time.time()-time1)

NB  {'var_smoothing': 3.1622776601683795e-05} 0.334513549052591 0.3462897526501767 5.09296441078186


In [73]:
time1 = time.time()
xgb = XGBClassifier(tree_method='gpu_hist', gpu_id=0, min_child_weight=3, n_jobs=-1)
param_grid = {'n_estimators':[200], 'max_depth':[2,3], 'eta':[0.35],
'subsample':[0.3],'colsample_bytree':[0.6]}
xgbm = GridSearchCV(xgb, param_grid, cv=2, scoring='f1')
xgbm.fit(X_train, y_train)
print('XGB ', xgbm.best_params_, xgbm.best_score_, f1_score(y_train, xgbm.predict(X_train)), time.time()-time1)


XGB  {'colsample_bytree': 0.6, 'eta': 0.35, 'max_depth': 2, 'n_estimators': 200, 'subsample': 0.3} 0.34585549578355335 0.4165251839275608 1.9083058834075928


In [74]:
# 7. accuracy #

print('Out of Sample:')
print('Logistic ', f1_score(y_test, lrm.predict(X_test)))
#print('SVM ', accuracy_score(y_test, svmm.predict(X_test)))
#print('KNN ', accuracy_score(y_test, knnm.predict(X_test[fewfeatures])))
print('Bayes ', f1_score(y_test, nbm.predict(X_test)))
#print('RF ', f1_score(y_test, rfm.predict(X_test)))
print('XGB ', f1_score(y_test, xgbm.predict(X_test)))
print('Total time ', time.time()-time0)

Out of Sample:
Logistic  0.25
Bayes  0.25925925925925924
XGB  0.21238938053097342
Total time  167.97339057922363


In [75]:
# VotingClassifier:

estimator = []
#estimator.append(('LR', LogisticRegression(C=10)))
estimator.append(('NB', GaussianNB(var_smoothing = 1e-5)))
#estimator.append(('RF', RandomForestClassifier(max_depth=4, max_features=10, n_estimators=100)))
estimator.append(('XGB', XGBClassifier(tree_method='gpu_hist', gpu_id=0, min_child_weight=3, n_jobs=-1,
                                       eta=0.35, max_depth=3, n_estimators=200, 
                                       subsample=0.3, colsample_bytree=0.6)))
vot_soft = VotingClassifier(estimators = estimator, voting ='soft')
vot_soft.fit(X_train, y_train)
vot_hard = VotingClassifier(estimators = estimator, voting ='hard')
vot_hard.fit(X_train, y_train)
print('VotingClassifiers3 in sample', f1_score(y_train, vot_soft.predict(X_train)), f1_score(y_train, vot_hard.predict(X_train)))
print('VotingClassifiers3 out of sample', f1_score(y_test, vot_soft.predict(X_test)), f1_score(y_test, vot_hard.predict(X_test)))

VotingClassifiers3 in sample 0.4316588785046729 0.3222748815165876
VotingClassifiers3 out of sample 0.27027027027027023 0.1473684210526316


In [8]:
# 8. feature importance #

results = permutation_importance(xgbm, X_test, y_test, scoring='f1', n_jobs=-1)
fi_lr = pd.DataFrame({'col':X_test.columns, 'FI':results.importances_mean})
fi_lr.sort_values('FI', ascending = False)[:20]

Unnamed: 0,col,FI
145,var_145,0.044367
1,var_1,0.042451
26,var_26,0.040462
146,var_146,0.039477
127,var_127,0.032903
169,var_169,0.031797
81,var_81,0.028117
109,var_109,0.026571
179,var_179,0.026385
91,var_91,0.025633


In [10]:
temp = fi_lr.sort_values('FI', ascending = False)

In [26]:
features_i1 = list(temp.loc[temp.FI>0]['col'])
features_i1

['var_145',
 'var_1',
 'var_26',
 'var_146',
 'var_127',
 'var_169',
 'var_81',
 'var_109',
 'var_179',
 'var_91',
 'var_174',
 'var_9',
 'var_90',
 'var_164',
 'var_40',
 'var_104',
 'var_110',
 'var_94',
 'var_133',
 'var_115',
 'var_119',
 'var_105',
 'var_22',
 'var_186',
 'var_86',
 'var_8',
 'var_106',
 'var_195',
 'var_197',
 'var_168',
 'var_67',
 'var_137',
 'var_198',
 'var_2',
 'var_0',
 'var_36',
 'var_177',
 'var_132',
 'var_191',
 'var_33',
 'var_149',
 'var_154',
 'var_58',
 'var_93',
 'var_157',
 'var_160',
 'var_114',
 'var_87',
 'var_28',
 'var_118',
 'var_76',
 'var_129',
 'var_75',
 'var_193',
 'var_5',
 'var_131',
 'var_43',
 'var_88',
 'var_151',
 'var_62',
 'var_116',
 'var_190',
 'var_165',
 'var_97',
 'var_184',
 'var_16',
 'var_6',
 'var_60',
 'var_156',
 'var_32',
 'var_125',
 'var_172',
 'var_141',
 'var_135',
 'var_70',
 'var_196',
 'var_13',
 'var_138',
 'var_74',
 'var_182',
 'var_53',
 'var_178',
 'var_52',
 'var_143',
 'var_99',
 'var_83',
 'var_10',
 '

In [20]:
temp

Unnamed: 0,col,FI
145,var_145,0.044367
1,var_1,0.042451
26,var_26,0.040462
146,var_146,0.039477
127,var_127,0.032903
...,...,...
89,var_89,-0.012431
122,var_122,-0.012645
25,var_25,-0.014718
78,var_78,-0.015582


In [18]:
# 9. predictions #

submission_df_vc = pd.DataFrame({'ID_code': pred0.ID_code, 'target': vot_soft.predict(X_pred)}, columns=['ID_code', 'target'])
#submission_df_svm = pd.DataFrame({'PassengerId': pred0.PassengerId, 'Transported': svmm.predict(X_pred)}, columns=['PassengerId', 'Transported'])
#submission_df_rf = pd.DataFrame({'PassengerId': pred0.PassengerId, 'Transported': rfm.predict(X_pred)}, columns=['PassengerId', 'Transported'])
#submission_df_bt = pd.DataFrame({'PassengerId': pred0.PassengerId, 'Transported': xgbm.predict(X_pred)}, columns=['PassengerId', 'Transported'])

#submission_df_bt.Transported = np.array([bool(x) for x in submission_df_bt.Transported])

submission_df_vc.to_csv('KP12_vc.csv',index=False)
#submission_df_svm.to_csv('KP11_svm.csv',index=False)
#submission_df_rf.to_csv('KP11_rf.csv',index=False)
#submission_df_bt.to_csv('KP11_bt.csv',index=False)

os.chdir(r'/kaggle/working')

from IPython.display import FileLink
FileLink(r'KP12_vc.csv')

In [30]:
X_pred

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,...,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
0,0.125259,2.331709,0.840949,1.289605,0.214591,0.343497,0.500328,0.498248,0.554992,1.001973,...,-1.178210,1.464626,-2.277710,-0.225311,-1.354710,1.835909,0.378425,1.965291,-0.132127,-0.520262
1,-0.704888,0.713600,0.217328,-0.787407,-1.167470,0.136490,0.696696,0.604629,-1.411729,-1.293410,...,1.622107,0.464314,-0.673566,1.697615,-0.759920,0.438390,-0.692806,1.045309,1.077906,-1.693878
2,-1.702851,-2.165893,-0.222903,0.124033,-0.508745,1.889922,-0.600053,1.078570,0.371622,0.624980,...,-0.869678,1.180586,0.030273,-0.293137,-1.587454,1.597932,-1.722437,-2.000625,1.331453,-1.904867
3,-0.702595,0.074712,0.488614,-0.107486,-1.384861,1.048441,-0.548510,1.169817,0.928041,-0.092423,...,1.392703,0.544515,-0.187850,0.057841,-0.883992,2.326684,0.303593,0.349415,-0.944551,-0.088507
4,0.334891,0.369669,1.285680,0.467983,-1.225527,-0.443859,1.665165,-1.740619,0.811932,-0.346638,...,0.220951,0.576270,-0.441299,0.006871,0.508454,-0.097761,-1.365872,-1.755181,-0.643722,-0.564779
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,0.813620,0.653914,-0.112240,0.002547,-1.538443,0.117149,-0.642832,-0.525834,-0.904277,-0.468040,...,-0.255156,0.746363,1.852877,-1.181289,-1.622275,0.946936,-1.266689,0.270600,0.773801,0.782003
199996,-0.316305,-1.865382,-1.280514,1.149078,1.117849,1.028733,0.315259,0.679018,1.443543,-0.985373,...,0.392231,-0.259093,-0.078416,-0.235430,0.301028,-0.552713,0.128332,0.221164,0.761824,-1.674047
199997,0.312035,0.967171,0.180528,0.474982,0.985535,2.082463,-0.031241,0.530942,0.412778,-1.487491,...,0.424352,-1.588042,0.377725,-0.855108,0.670026,-0.721643,-1.181164,0.310170,-1.350300,-1.818380
199998,0.946793,0.275268,1.107508,0.338224,0.085616,-0.788901,-1.406168,0.077658,1.512427,1.077404,...,0.045549,0.353584,0.227212,0.604056,-0.304221,0.929376,-0.855927,-0.866531,-2.348542,0.294384


In [29]:
df

Unnamed: 0,var_145,var_1,var_26,var_146,var_127,var_169,var_81,var_109,var_179,var_91,...,var_56,var_34,var_199,var_29,var_51,var_30,var_57,var_175,var_66,var_192
63485,3.7302,-6.5343,3.5244,12.5729,-2.5786,5.5541,17.0787,13.4587,-0.8599,6.9761,...,10.3291,12.1366,6.9620,1.8351,25.5535,-15.5938,6.8119,10.9163,6.4143,1.7578
62112,5.7610,6.5115,9.1616,7.9293,6.5498,5.8648,10.2828,24.5511,7.1604,6.7877,...,16.2924,11.3314,-19.1191,8.5291,15.3414,-16.7469,6.2807,13.4937,6.1077,1.2087
21978,8.4847,-2.3043,-5.0289,12.5389,5.4715,5.4766,12.7119,20.4244,1.9686,6.7359,...,20.7685,11.0577,-9.0884,7.9931,15.3078,-13.2923,5.5607,12.6695,7.4567,2.8253
31851,5.9911,-5.4637,-6.9243,12.4996,0.2053,4.8540,15.9185,19.0704,7.1132,7.1453,...,14.9180,11.2198,-10.8850,5.9737,6.1923,-10.7076,5.7365,8.0737,7.1607,2.1305
56894,1.8250,-0.9907,5.0259,13.0393,3.8482,5.4160,15.9166,18.7577,0.4515,6.8716,...,10.8098,11.0897,-10.5049,1.8259,20.2231,-8.5295,6.9632,9.2688,6.2233,4.7463
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85546,-0.8488,-5.3114,-0.9775,11.7203,1.2664,6.0708,16.4149,22.3338,2.2093,7.0156,...,12.3681,11.1337,-4.8848,11.7524,20.5557,4.0395,6.5628,12.9680,6.6213,2.7904
66401,5.2884,0.6840,-1.6091,17.3113,-1.0054,6.1161,12.0078,13.1328,-1.9426,7.1055,...,19.7999,11.0900,1.1993,1.9996,4.1981,-7.6903,5.8222,9.2910,8.2988,0.2941
180711,6.7302,1.2606,-6.1734,10.8160,2.0804,5.9161,13.7476,18.3676,-1.5794,6.9773,...,22.3933,11.9135,-17.3833,6.8985,4.6935,-21.6111,5.7956,8.5723,6.3251,2.6507
158339,7.6564,-4.9032,3.1733,16.0088,1.7351,5.8368,16.2128,22.0024,0.9193,6.6638,...,15.3887,11.4414,0.0608,4.0958,14.2601,-14.4895,6.4237,6.6656,5.0913,1.8980


In [51]:
np.logspace(-3,-7, num=17)

array([1.00000000e-03, 5.62341325e-04, 3.16227766e-04, 1.77827941e-04,
       1.00000000e-04, 5.62341325e-05, 3.16227766e-05, 1.77827941e-05,
       1.00000000e-05, 5.62341325e-06, 3.16227766e-06, 1.77827941e-06,
       1.00000000e-06, 5.62341325e-07, 3.16227766e-07, 1.77827941e-07,
       1.00000000e-07])