In [1]:
import pandas as pd
import pandas_profiling
import numpy as np 

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
train = pd.read_csv("../data/modulbank/train.csv",delimiter="\t")
test = pd.read_csv("../data/modulbank/test.csv",delimiter="\t")

In [4]:
test.drop(["Unnamed: 0"],axis=1,inplace=True)
train.drop(["Unnamed: 0"],axis=1,inplace=True)

In [5]:
train["0"].value_counts(normalize=True)

0    0.822197
1    0.177803
Name: 0, dtype: float64

In [6]:
all_dataframe = pd.concat([train,test])

In [10]:
report = pandas_profiling.ProfileReport(all_dataframe)

In [333]:
report.to_file("report_all.html")

In [11]:
rejected = report.get_rejected_variables()

According to the report shows that some features are constant

In [12]:
all_dataframe.drop(rejected,axis=1,inplace=True)

In [13]:
train,test = all_dataframe[:len(train)],all_dataframe[len(train):]

In [14]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC,SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score,GridSearchCV,RandomizedSearchCV

In [15]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=8,random_state=42,shuffle=True)


In [16]:
X_train_new,Y_train_new = train.drop(["0"],axis=1),train["0"]
myIterator=skf.split(X_train_new,Y_train_new)

In [397]:
log_reg  = LogisticRegression()
params = {"C":[0.01,0.1,1,5,10]}
grid_search = GridSearchCV(log_reg,param_grid=params,scoring="roc_auc",cv=myIterator,verbose=3,n_jobs=1)

In [398]:
grid_search.fit(X_train_new,Y_train_new)

Fitting 8 folds for each of 5 candidates, totalling 40 fits
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7119386694142167, total=   1.2s
[CV] C=0.01 ..........................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.4s remaining:    0.0s


[CV] ................. C=0.01, score=0.7212224715717962, total=   1.2s
[CV] C=0.01 ..........................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.6s remaining:    0.0s


[CV] ................. C=0.01, score=0.7089897578486306, total=   1.2s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7196111087587566, total=   1.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7300517988454643, total=   1.1s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7184194475662306, total=   1.1s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7248283645166818, total=   1.0s
[CV] C=0.01 ..........................................................
[CV] ................. C=0.01, score=0.7230079586448341, total=   1.0s
[CV] C=0.1 ...........................................................
[CV] .................... C=0.1, score=0.72474206433219, total=   1.7s
[CV] C=0.1 ...........................................................
[CV] .

[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:  1.8min finished


GridSearchCV(cv=<generator object _BaseKFold.split at 0x1142a4e60>,
       error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.01, 0.1, 1, 5, 10]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring='roc_auc', verbose=3)

In [366]:
log_reg = LogisticRegression(**grid_search.best_params_)

In [368]:
myIterator=skf.split(X_train_new,Y_train_new)
scores1 = cross_val_score(log_reg,X_train_new,Y_train_new,cv=myIterator,scoring="roc_auc",n_jobs=1,verbose=3)


[CV]  ................................................................
[CV] ......................... , score=0.72474206433219, total=   2.1s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    2.1s remaining:    0.0s


[CV] ....................... , score=0.7321016405320085, total=   1.7s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.8s remaining:    0.0s


[CV] ....................... , score=0.7180881944738489, total=   2.0s
[CV]  ................................................................
[CV] ....................... , score=0.7286582640564941, total=   2.0s
[CV]  ................................................................
[CV] ....................... , score=0.7380662705301737, total=   1.5s
[CV]  ................................................................
[CV] ....................... , score=0.7292409071314144, total=   2.0s
[CV]  ................................................................
[CV] ....................... , score=0.7324387769124626, total=   2.2s
[CV]  ................................................................
[CV] ....................... , score=0.7221350810993733, total=   1.7s


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:   15.4s finished


In [369]:
print("Log regression val {:.4f}".format(scores1.mean()))

Log regression val 0.7282


In [401]:

cls = RandomForestClassifier()

In [402]:
param_grid = {"max_depth": list(range(5,40,8)),
              "max_features": [0.3,0.7,0.9],
              "min_samples_split": [ 10,20,30],
              "min_samples_leaf": [10,20,30],
              "criterion": ["gini", "entropy"]}

# run grid search
grid_search = RandomizedSearchCV(cls, param_distributions=param_grid,n_iter=20,cv=3,verbose=3,scoring="roc_auc",n_jobs=8)

In [403]:
grid_search.fit(X_train_new,Y_train_new)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV] max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.3 
[CV] max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.3 
[CV] max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.3 
[CV] max_depth=13, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV] max_depth=13, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV] max_depth=13, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV] max_depth=29, min_samples_leaf=20, criterion=gini, min_samples_split=20, max_features=0.9 
[CV] max_depth=29, min_samples_leaf=20, criterion=gini, min_samples_split=20, max_features=0.9 
[CV]  max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.3, score=0.7100055832460799, total=  15.8s
[CV] max_dept

[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:  1.7min


[CV]  max_depth=29, min_samples_leaf=10, criterion=entropy, min_samples_split=10, max_features=0.9, score=0.6975466338015197, total=  54.0s
[CV] max_depth=29, min_samples_leaf=10, criterion=gini, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=30, max_features=0.7, score=0.7047703140698331, total=  17.0s
[CV] max_depth=29, min_samples_leaf=10, criterion=gini, min_samples_split=10, max_features=0.9 
[CV]  max_depth=29, min_samples_leaf=10, criterion=entropy, min_samples_split=10, max_features=0.9, score=0.6991734301737478, total=  53.7s
[CV] max_depth=29, min_samples_leaf=10, criterion=gini, min_samples_split=10, max_features=0.9 
[CV]  max_depth=29, min_samples_leaf=10, criterion=entropy, min_samples_split=10, max_features=0.9, score=0.7030199419416515, total=  54.7s
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=30, max_features=0.7, score=0.7012266599404806, total=  16.7s
[CV] max_depth

[CV]  max_depth=37, min_samples_leaf=30, criterion=entropy, min_samples_split=20, max_features=0.9, score=0.7053866017445031, total=  37.3s
[CV] max_depth=21, min_samples_leaf=10, criterion=gini, min_samples_split=20, max_features=0.7 
[CV]  max_depth=37, min_samples_leaf=30, criterion=entropy, min_samples_split=20, max_features=0.9, score=0.7152528816668016, total=  35.0s
[CV]  max_depth=37, min_samples_leaf=30, criterion=entropy, min_samples_split=20, max_features=0.9, score=0.7166220408398769, total=  37.4s
[CV]  max_depth=21, min_samples_leaf=30, criterion=entropy, min_samples_split=10, max_features=0.9, score=0.7055920199414123, total=  37.2s
[CV]  max_depth=21, min_samples_leaf=30, criterion=entropy, min_samples_split=10, max_features=0.9, score=0.7065368311617916, total=  37.0s
[CV]  max_depth=21, min_samples_leaf=10, criterion=gini, min_samples_split=20, max_features=0.7, score=0.7010828109599425, total=  23.9s
[CV]  max_depth=21, min_samples_leaf=10, criterion=gini, min_sample

[Parallel(n_jobs=8)]: Done  60 out of  60 | elapsed:  4.0min finished


RandomizedSearchCV(cv=3, error_score='raise',
          estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
          fit_params=None, iid=True, n_iter=20, n_jobs=8,
          param_distributions={'max_depth': [5, 13, 21, 29, 37], 'min_samples_split': [10, 20, 30], 'criterion': ['gini', 'entropy'], 'min_samples_leaf': [10, 20, 30], 'max_features': [0.3, 0.7, 0.9]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring='roc_auc', verbose=3)

In [406]:
grid_search.best_score_

0.7167775225371534

In [407]:
best_rf = RandomForestClassifier(**grid_search.best_params_)

In [409]:
myIterator=skf.split(X_train_new,Y_train_new)
scores2 = cross_val_score(best_rf,X_train_new,Y_train_new ,cv=myIterator,scoring="roc_auc",verbose=3,n_jobs=8)

[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ........................ , score=0.722588719048896, total=  21.2s
[CV] ....................... , score=0.7056816417552328, total=  21.2s


[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:   22.6s remaining:  1.1min


[CV] ....................... , score=0.7047112955357017, total=  21.0s
[CV] ....................... , score=0.7056654105093788, total=  21.3s
[CV] ....................... , score=0.7318409996565562, total=  20.6s


[Parallel(n_jobs=8)]: Done   5 out of   8 | elapsed:   23.3s remaining:   14.0s


[CV] ....................... , score=0.7171162979821655, total=  19.8s
[CV] ....................... , score=0.7237718203432522, total=  20.5s
[CV] ....................... , score=0.7265171431830244, total=  19.5s


[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:   24.0s remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:   24.0s finished


In [410]:
print("RF  val {:.4f}".format(scores2.mean()))

RF  val 0.7172


In [411]:
best_rf.fit(X_train_new,Y_train_new)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=29, max_features=0.3, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=30, min_samples_split=20,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [49]:
X_test = test.drop(["0"],axis=1)

In [46]:
def submit(vals,ID):
    test["id"] = test.index
    submission = pd.DataFrame.from_dict({'_ID_': test['id']})
    submission["_VAL_"]=vals[:,1]
    name = "submission"+str(ID)+".csv"
    submission.to_csv(name,index=False)
    return  pd.read_csv(name)

In [417]:
from sklearn.ensemble import BaggingClassifier

In [428]:
clas = BaggingClassifier(log_reg,n_estimators=50,oob_score=True,bootstrap_features=True,n_jobs=8,verbose=3)
clas.fit(X_train_new,Y_train_new)

Building estimator 1 of 7 for this parallel run (total 50)...
Building estimator 1 of 7 for this parallel run (total 50)...
Building estimator 1 of 6 for this parallel run (total 50)...
Building estimator 1 of 6 for this parallel run (total 50)...
Building estimator 1 of 6 for this parallel run (total 50)...
Building estimator 1 of 6 for this parallel run (total 50)...
Building estimator 1 of 6 for this parallel run (total 50)...
Building estimator 1 of 6 for this parallel run (total 50)...
Building estimator 2 of 6 for this parallel run (total 50)...
Building estimator 2 of 7 for this parallel run (total 50)...
Building estimator 2 of 6 for this parallel run (total 50)...
Building estimator 2 of 7 for this parallel run (total 50)...
Building estimator 2 of 6 for this parallel run (total 50)...
Building estimator 2 of 6 for this parallel run (total 50)...
Building estimator 2 of 6 for this parallel run (total 50)...
Building estimator 2 of 6 for this parallel run (total 50)...
Building

[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  1.3min remaining:  4.0min


Building estimator 7 of 7 for this parallel run (total 50)...


[Parallel(n_jobs=8)]: Done   5 out of   8 | elapsed:  1.4min remaining:   49.1s


Building estimator 7 of 7 for this parallel run (total 50)...


[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.4min remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.4min finished


BaggingClassifier(base_estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
         bootstrap=True, bootstrap_features=True, max_features=1.0,
         max_samples=1.0, n_estimators=50, n_jobs=8, oob_score=True,
         random_state=None, verbose=3, warm_start=False)

In [424]:
vals= np.c_[clas.oob_decision_function_,Y_train_new]

In [425]:
vals = vals[~np.isnan(vals[:,1])]

In [426]:
preds,true = vals[:,1],vals[:,2]

In [429]:
from sklearn.metrics import roc_auc_score
roc_auc_score(Y_train_new,clas.oob_decision_function_[:,1])

0.7250284208539598

Ensembles of models

In [430]:
from sklearn.ensemble import VotingClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier,ExtraTreesClassifier

In [434]:
extree = ExtraTreesClassifier()
rand_search = RandomizedSearchCV(extree,param_distributions=param_grid,scoring="roc_auc",cv=myIterator,n_jobs=8,verbose=3)

In [435]:
myIterator=skf.split(X_train_new,Y_train_new)
rand_search.fit(X_train_new,Y_train_new)

Fitting 8 folds for each of 10 candidates, totalling 80 fits
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV] max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3 
[CV]  max_depth=37, min_samples_leaf=10, criterion=entropy, min_samples_split=20, max_features=0.3, score=0.7173293249213136, total=  19.7s
[CV] ma

[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:  1.6min


[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=10, max_features=0.3, score=0.6891269471614139, total=   7.2s
[CV] max_depth=5, min_samples_leaf=10, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=10, max_features=0.3, score=0.7073772188583553, total=   7.1s
[CV] max_depth=5, min_samples_leaf=10, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=10, max_features=0.3, score=0.6937020413732105, total=   6.9s
[CV] max_depth=5, min_samples_leaf=10, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=10, max_features=0.3, score=0.702977139819245, total=   6.8s
[CV] max_depth=5, min_samples_leaf=10, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=ent

[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=20, max_features=0.9, score=0.7015050363909235, total=  17.3s
[CV] max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=20, max_features=0.9, score=0.7011112522523795, total=  17.2s
[CV] max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=20, max_features=0.9, score=0.6849801774429466, total=  16.6s
[CV] max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterion=entropy, min_samples_split=20, max_features=0.9, score=0.6999471021981767, total=  16.3s
[CV] max_depth=29, min_samples_leaf=20, criterion=entropy, min_samples_split=10, max_features=0.9 
[CV]  max_depth=5, min_samples_leaf=30, criterio

[Parallel(n_jobs=8)]: Done  80 out of  80 | elapsed:  5.1min finished


RandomizedSearchCV(cv=<generator object _BaseKFold.split at 0x129980150>,
          error_score='raise',
          estimator=ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=10, n_jobs=8,
          param_distributions={'max_depth': [5, 13, 21, 29, 37], 'min_samples_split': [10, 20, 30], 'criterion': ['gini', 'entropy'], 'min_samples_leaf': [10, 20, 30], 'max_features': [0.3, 0.7, 0.9]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring='roc_auc', verbose=3)

In [436]:
rand_search.best_score_

0.7249621736009902

In [437]:
extr_tree = ExtraTreesClassifier(**rand_search.best_params_)

In [438]:
preds = []
for clf in (best_rf,log_reg,extr_tree):
    print("Classifier is being trained {}".format(clf))
    clf.fit(X_train_new,Y_train_new)
    preds.append(clf.predict_proba(X_test)[:,1])
    

Classifier is being trained RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=29, max_features=0.3, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=30, min_samples_split=20,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)
Classifier is being trained LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
Classifier is being trained ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='entropy',
           max_depth=37, max_features=0.3, max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=

In [439]:
np.corrcoef(preds[0],preds[1])

array([[1.        , 0.80300428],
       [0.80300428, 1.        ]])

In [440]:
np.corrcoef(preds[1],preds[2])

array([[1.        , 0.78441624],
       [0.78441624, 1.        ]])

In [441]:
np.corrcoef(preds[0],preds[2])

array([[1.       , 0.8438106],
       [0.8438106, 1.       ]])

Seems like our predictions are not completely correlated which makes it a good choice for the ensemble

In [442]:
voting_clf = VotingClassifier(estimators=[("lr",log_reg),("rf",best_rf),("extrees",extr_tree)],voting="soft")
scores3 = cross_val_score(voting_clf,X_train_new,Y_train_new,cv=myIterator,n_jobs=1,scoring="roc_auc",verbose=3)

[CV]  ................................................................
[CV] ....................... , score=0.7369305537912897, total=   9.3s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    9.3s remaining:    0.0s


[CV] ....................... , score=0.7377463503220373, total=  11.6s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   20.9s remaining:    0.0s


[CV] ........................ , score=0.729756813594727, total=  11.4s
[CV]  ................................................................
[CV] ........................ , score=0.735573245261182, total=  10.6s
[CV]  ................................................................
[CV] ....................... , score=0.7536910794013729, total=   9.6s
[CV]  ................................................................
[CV] ....................... , score=0.7334586126469044, total=  10.3s
[CV]  ................................................................
[CV] ....................... , score=0.7384208406044279, total=   9.7s
[CV]  ................................................................
[CV] ......................... , score=0.73752261139322, total=   9.6s


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:  1.4min finished


In [444]:
print("Ensembles val {:.4f} ".format(scores3.mean()))

Ensembles val 0.7379 


In [445]:
voting_clf.fit(X_train_new,Y_train_new)

VotingClassifier(estimators=[('lr', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)), ('rf', RandomF...timators=10, n_jobs=1,
           oob_score=False, random_state=None, verbose=0, warm_start=False))],
         flatten_transform=None, n_jobs=1, voting='soft', weights=None)

## Stacking

In [447]:
rf_predictions = []
et_predictions = []
log_predictions=[]
for train, val in skf.split(X_train_new,Y_train_new):
    best_rf.fit(X_train_new.iloc[train], Y_train_new[train])
    extr_tree.fit(X_train_new.iloc[train], Y_train_new[train])
    log_reg.fit(X_train_new.iloc[train],Y_train_new[train])
    rf_predictions.append([Y_train_new[val], best_rf.predict_proba(X_train_new.iloc[val])[:,1]])
    et_predictions.append([Y_train_new[val], extr_tree.predict_proba(X_train_new.iloc[val])[:,1]])
    log_predictions.append([Y_train_new[val], log_reg.predict_proba(X_train_new.iloc[val])[:,1]])
    

In [448]:
X_train_aug = X_train_new.copy()

In [450]:
X_train_aug["rf_preds"] = 0
X_train_aug["et_preds"] = 0
X_train_aug["log_preds"]=0

In [451]:
for i, fold in enumerate(skf.split(X_train_new,Y_train_new)):
    train, val = fold[0], fold[1]
    X_train_aug.iloc[val, -1] = rf_predictions[i][1]
    X_train_aug.iloc[val, -2] = et_predictions[i][1]
    X_train_aug.iloc[val, -3] = log_predictions[i][1]

In [456]:
import xgboost 

In [457]:
xgbClas = xgboost.XGBClassifier()

In [458]:
myIterator=skf.split(X_train_aug,Y_train_new)
scores_xgb = cross_val_score(xgbClas,X_train_aug,Y_train_new,cv=myIterator,n_jobs=8,scoring="roc_auc",verbose=3)

[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ....................... , score=0.7472564489797839, total= 1.3min
[CV] ....................... , score=0.7397832540589877, total= 1.3min


[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  1.4min remaining:  4.1min


[CV] ......................... , score=0.73902955968629, total= 1.3min
[CV] ....................... , score=0.7300376847186348, total= 1.3min
[CV] ........................ , score=0.749017892008111, total= 1.3min


[Parallel(n_jobs=8)]: Done   5 out of   8 | elapsed:  1.4min remaining:   49.3s


[CV] ....................... , score=0.7387542755919001, total= 1.3min
[CV] ....................... , score=0.7386147364616453, total= 1.3min
[CV] ....................... , score=0.7333677237031501, total= 1.3min


[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.4min remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.4min finished


In [459]:
print("Using stacking {:.5f}".format(scores_xgb.mean()))

Using stacking 0.73948


In [468]:
xgbClas.fit(X_train_aug,Y_train_new)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [466]:
def predict_test_proba():
    X_test["rf_preds"] = 0
    X_test["et_preds"] = 0
    X_test["log_preds"]=0
    for index,clas in enumerate([best_rf,extr_tree,log_reg]):
        clas.fit(X_train_new,Y_train_new)
        res = clas.predict_proba(X_test.iloc[:,:-3])[:,1]
        X_test.iloc[:,-(index+1)] = res

In [467]:
predict_test_proba()

In [470]:
vals = xgbClas.predict_proba(X_test)

In [474]:
submit(vals)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,_ID_,_VAL_
0,0,0.153728
1,1,0.412388
2,2,0.275629
3,3,0.324806
4,4,0.653551
5,5,0.263956
6,6,0.076646
7,7,0.138943
8,8,0.096029
9,9,0.274639


## Boosting tuning

In [187]:
!pip install bayesian-optimization

Collecting bayesian-optimization
  Downloading bayesian-optimization-0.6.0.tar.gz
Building wheels for collected packages: bayesian-optimization
  Running setup.py bdist_wheel for bayesian-optimization ... [?25ldone
[?25h  Stored in directory: /Users/mac/Library/Caches/pip/wheels/c5/92/8b/a2c219cb16b9a6271dd0e72b5c9e930c32dee10908870e5512
Successfully built bayesian-optimization
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-0.6.0


Inspired by  __[this course material ](https://www.coursera.org/learn/bayesian-methods-in-machine-learning/lecture/iRLaF/bayesian-optimization)__ I decided to use bayesian optimization for my paramaters tuning

In [499]:
def xgb_evaluate(min_child_weight, colsample_bytree, max_depth,
                 subsample,
                 gamma,
                 alpha,
                 n_estimators,
                 learning_rate):
    
 
    '''According to the idea, this is our surrogate function which we will optimize '''

    params['min_child_weight'] = int(min_child_weight)
    params['cosample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['max_depth'] = int(max_depth)
    params['subsample'] = max(min(subsample, 1), 0)
    params['gamma'] = max(gamma, 0)
    params['alpha'] = max(alpha, 0)
    params["n_estimators"] = int(n_estimators)
    params["learning_rate"] = max(learning_rate,0)


    cv_result = xgb.cv(params, xgtrain, num_boost_round=num_rounds, nfold=5,
             seed=random_state,
             callbacks=[xgb.callback.early_stop(50)])

    return cv_result['test-auc-mean'].values[-1]

In [500]:
from bayes_opt import BayesianOptimization

In [501]:
xgtrain = xgboost.DMatrix(X_train_aug,Y_train_new)

In [502]:
num_rounds = 3000
random_state = 2016
num_iter = 25
init_points = 10
params = {
    'eta': 0.1,
    'silent': 1,
    'eval_metric': 'auc',
    'verbose_eval': True,
    'seed': random_state
}

xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 20),
                                            'colsample_bytree': (0.1, 1),
                                            'max_depth': (3, 15),
                                            'subsample': (0.5, 1),
                                            'gamma': (0, 10),
                                            'alpha': (0, 10),
                                            "n_estimators":(50,1000),
                                            "learning_rate":(0.01,0.1),
                                            })

In [503]:
xgbBO.maximize(init_points=init_points, n_iter=num_iter)

[31mInitialization[0m
[94m--------------------------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |   learning_rate |   max_depth |   min_child_weight |   n_estimators |   subsample | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[268]	train-auc:0.751394+0.00255534	test-auc:0.740383+0.00800751

    1 | 15m18s | [35m   0.74038[0m | [32m   6.9163[0m | [32m            0.1734[0m | [32m   2.0084[0m | [32m         0.0626[0m | [32m     6.8430[0m | [32m            4.4632[0m | [32m      757.6568[0m | [32m     0.6534[0m | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[212]	t

  " state: %s" % convergence_dict)


[31mBayesian Optimization[0m
[94m--------------------------------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |   learning_rate |   max_depth |   min_child_weight |   n_estimators |   subsample | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[32]	train-auc:0.901771+0.00155279	test-auc:0.733541+0.00701052

   11 | 68m47s |    0.73354 |    1.1497 |             0.2581 |    0.6100 |          0.0521 |     12.2419 |             3.3777 |        50.5792 |      0.8343 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[68]	train-auc:0.944801+0.00345983	test-auc:0.735019+0.00657698

   12 | 12m44s | 

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[21]	train-auc:0.826319+0.003326	test-auc:0.737174+0.00645488

   13 | 05m45s |    0.73717 |    0.0330 |             0.7143 |    0.0530 |          0.0979 |      7.4087 |            19.6819 |       775.1989 |      0.8997 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[37]	train-auc:0.772549+0.00125129	test-auc:0.738277+0.00673704

   14 | 09m28s |    0.73828 |    9.5055 |             0.7337 |    0.9096 |          0.0998 |     14.9314 |             2.1799 |       865.1332 |      0.6109 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[121]	train-auc:0.747955+0.00192407	test-auc:0.7402

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[69]	train-auc:0.739343+0.00288806	test-auc:0.735726+0.00759502

   18 | 06m31s |    0.73573 |    0.5061 |             0.4319 |    9.8621 |          0.0840 |      7.0254 |            19.5997 |        53.3090 |      0.8441 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[88]	train-auc:0.749549+0.00208844	test-auc:0.740392+0.00805895

   19 | 05m36s | [35m   0.74039[0m | [32m   6.2402[0m | [32m            0.1666[0m | [32m   1.2251[0m | [32m         0.0434[0m | [32m     3.1262[0m | [32m            1.9284[0m | [32m      924.8154[0m | [32m     0.8917[0m | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[145]	train-auc:0.738003+0.00270229	test-auc:0.735077+0.00769037

   20 | 05m27s |    0.73508 |    9.8730 |             0.7281 |    7.2938 |          0.0775 |      3.1994 |             3.8235 |       996.7281 |      0.5002 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[36]	train-auc:0.753232+0.00182126	test-auc:0.740671+0.00779251

   21 | 01m46s | [35m   0.74067[0m | [32m   0.3795[0m | [32m            0.6503[0m | [32m   0.2059[0m | [32m         0.0942[0m | [32m     3.1356[0m | [32m           13.9817[0m | [32m      301.0260[0m | [32m     0.9739[0m | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[114]	train-auc:0.74037+0.00231658	test-auc:0.737399+0.00772071

   22 | 19m30s |    0.73740 |    0.2486 |             0.1309 |    7.0136 |          0.0744 |     14.8200 |            17.9987 |       305.7656 |      0.5534 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[32]	train-auc:0.764212+0.00151011	test-auc:0.740507+0.00695284

   23 | 03m15s |    0.74051 |    0.3360 |             0.4798 |    0.5477 |          0.0855 |      4.1908 |             1.2782 |       700.8349 |      0.9376 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[62]	train-auc:0.755525+0.00163258	test-auc:0.740213+0.00782751

   24 | 02m51s |    0.74021 |    7.8244 |             0.3326 |    0.1979 |          0.0713 |      3.0913 |            19.1130 |        91.4993 |      0.7613 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[29]	train-auc:0.790761+0.00136794	test-auc:0.7386+0.00891448

   25 | 05m19s |    0.73860 |    0.3989 |             0.1006 |    0.7812 |          0.0506 |      6.4887 |             1.4042 |       526.3990 |      0.5198 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[35]	train-auc:0.770213+0.0014051	test-auc:0.736916+0.00677568

   26 | 07m35s |    0.73692 |    9.6457 |             0.3826 |    0.5351 |          0.0618 |     14.4826 |            19.2776 |       949.1730 |      0.6705 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[36]	train-auc:0.76214+0.0014864	test-auc:0.740203+0.00682429

   27 | 02m53s |    0.74020 |    0.6806 |             0.5421 |    0.2158 |          0.0675 |      4.0408 |            19.6053 |        54.6258 |      0.9928 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[48]	train-auc:0.756523+0.00164656	test-auc:0.740211+0.00715366

   28 | 03m32s |    0.74021 |    0.6076 |             0.5664 |    0.1205 |          0.0868 |      3.0533 |            19.8399 |       143.9707 |      0.7230 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[150]	train-auc:0.756223+0.00178005	test-auc:0.740753+0.0070435

   29 | 04m49s | [35m   0.74075[0m | [32m   0.5371[0m | [32m            0.9712[0m | [32m   0.3437[0m | [32m         0.0280[0m | [32m     3.8254[0m | [32m           19.6945[0m | [32m      361.9041[0m | [32m     0.9247[0m | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[225]	train-auc:0.747741+0.002193	test-auc:0.73949+0.00735738

   30 | 07m47s |    0.73949 |    2.5593 |             0.1458 |    2.1574 |          0.0582 |      3.2488 |             1.4223 |       819.0518 |      0.5267 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[24]	train-auc:0.872762+0.00237663	test-auc:0.727627+0.00622389

   31 | 09m39s |    0.72763 |    0.8491 |             0.2201 |    1.1279 |          0.0779 |     14.1954 |             1.2049 |       734.7474 |      0.5861 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[149]	train-auc:0.748439+0.00192036	test-auc:0.739891+0.00701191

   32 | 06m37s |    0.73989 |    8.9561 |             0.6328 |    1.4185 |          0.0780 |      3.9936 |            19.3899 |       678.7082 |      0.5601 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[133]	train-auc:0.755848+0.00174829	test-auc:0.740024+0.00705732

   33 | 06m41s |    0.74002 |    9.7390 |             0.6761 |    0.2984 |          0.0359 |      3.4775 |            18.1556 |       868.9848 |      0.6296 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[84]	train-auc:0.780223+0.000926588	test-auc:0.73906+0.00699998

   34 | 14m16s |    0.73906 |    7.8818 |             0.9382 |    0.8428 |          0.0405 |     13.9725 |            19.9323 |        55.0471 |      0.6676 | 
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 50 rounds.
Stopping. Best iteration:
[188]	train-auc:0.75667+0.0017583	test-auc:0.739

In [507]:
best_params = xgbBO.res['max']['max_params']
best_params

{'alpha': 0.5370730489004005,
 'colsample_bytree': 0.9711819342305409,
 'gamma': 0.343690951680623,
 'learning_rate': 0.02800171027744057,
 'max_depth': 3.825439512548069,
 'min_child_weight': 19.6945067158265,
 'n_estimators': 361.9041398081913,
 'subsample': 0.9247161516471999}

In [505]:
xgbBO.res["max"]["max_val"]

0.7407528000000001

In [514]:
best_params["max_depth"]=int(best_params["max_depth"])
best_params["n_estimators"]=int(best_params["n_estimators"])

In [518]:
xgbClasTuned = xgboost.XGBClassifier(**best_params)

In [519]:
xgbClasTuned.fit(X_train_aug,Y_train_new)

XGBClassifier(alpha=0.5370730489004005, base_score=0.5, booster='gbtree',
       colsample_bylevel=1, colsample_bytree=0.9711819342305409,
       gamma=0.343690951680623, learning_rate=0.02800171027744057,
       max_delta_step=0, max_depth=3, min_child_weight=19.6945067158265,
       missing=None, n_estimators=361, n_jobs=1, nthread=None,
       objective='binary:logistic', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=None, silent=True,
       subsample=0.9247161516471999)

In [521]:
vals = xgbClasTuned.predict_proba(X_test)

Ensembles stacking

In [17]:
from xgboost import XGBClassifier

In [81]:
xgb_clas = XGBClassifier(
    n_estimators =500,
    learning_rate=0.05,
    objective="binary:logistic",
    n_jobs=-1,
    nthread=4,
    subsample=0.85,
    colsample_bytree=0.9,
    colsample_bylevel=0.9,
    tree_method="hist",
    grow_policy="lossguide"
)

In [29]:
scores = cross_val_score(xgb_clas,X_train_new,Y_train_new,cv=8,n_jobs=8,scoring="roc_auc",verbose=4)

[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ....................... , score=0.7353812931363002, total= 1.5min
[CV] ....................... , score=0.7331733732292651, total= 1.5min


[Parallel(n_jobs=8)]: Done   2 out of   8 | elapsed:  1.5min remaining:  4.5min


[CV] ........................ , score=0.736607105051446, total= 1.5min
[CV] ....................... , score=0.7575155373012848, total= 1.5min
[CV] ....................... , score=0.7359938462407023, total= 1.5min


[Parallel(n_jobs=8)]: Done   5 out of   8 | elapsed:  1.6min remaining:   56.3s


[CV] ....................... , score=0.7364847057583306, total= 1.5min
[CV] ....................... , score=0.7405319651339151, total= 1.5min
[CV] ....................... , score=0.7446805706938648, total= 1.5min


[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.6min remaining:    0.0s
[Parallel(n_jobs=8)]: Done   8 out of   8 | elapsed:  1.6min finished


In [32]:
scores.mean()

0.7400460495681387

Now we add an index feature

In [51]:
#X_train_new["id"] = X_train_new.index
#X_test["id"] = X_test.index

In [58]:
X_train_new.drop(["id"],axis=1,inplace=True)
X_test.drop(["id"],axis=1,inplace=True)

In [42]:
scores

array([0.73538129, 0.73317337, 0.73660711, 0.75751554, 0.73599385,
       0.73648471, 0.74468057, 0.74053197])

In [84]:
scores = cross_val_score(xgb_clas,X_train_new,Y_train_new,cv=8,n_jobs=8,scoring="roc_auc",verbose=4)

[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................


KeyboardInterrupt: 

In [65]:
val_ids= scores_id
lb_result_ids = 0.75627114

In [66]:
val_without_ids=scores
lb_result_without_ids =0.75814019

In [69]:
val_ids.mean()

0.7374555893693024

In [70]:
val_without_ids.mean()

0.7400460495681387

In [71]:
lb_result_without_ids-lb_result_ids

0.0018690500000000387

In [73]:
np.abs(val_ids.mean()-val_without_ids.mean())

0.002590460198836242

Lookds like we have quite good validation,additionaly I want to check my score on training set

In [None]:
X_train_new,Y_train_new = train.drop(["0"],axis=1),train["0"]
myIterator=skf.split(X_train_new,Y_train_new)
for train,valid in myIterator:
    

In [59]:
xgb_clas.fit(X_train_new,Y_train_new)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=0.9,
       colsample_bytree=0.9, gamma=0, grow_policy='lossguide',
       learning_rate=0.05, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=500, n_jobs=-1,
       nthread=4, objective='binary:logistic', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=None, silent=True,
       subsample=0.85, tree_method='hist')

In [60]:
vals = xgb_clas.predict_proba(X_test)

In [61]:
submit(vals,"_xgb_without_ids")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,_ID_,_VAL_
0,0,0.176158
1,1,0.496563
2,2,0.185789
3,3,0.351222
4,4,0.583308
5,5,0.371492
6,6,0.094779
7,7,0.131359
8,8,0.120234
9,9,0.267031
