In [18]:
#compare to a base model
import pandas as pd
import numpy as np

In [19]:
#data
dataset = pd.read_csv('/home/kate/data/ClaimPrediction/fdata_v1_encd.csv', index_col=None)
target_column = 'hasclaim'

In [20]:
#models and analyzing results directory
ModelsDir='/home/kate/data/ClaimPrediction/s11_BestModelsNewParams/'

In [21]:
#comparing model metrics with t-test
#and save results
import scipy.stats as stats
def AnalyzeAndSaveModelsResults(result_df,result_lst,ModelName,filename):
    df=pd.DataFrame([result_lst])
    TestSizeColumns=['S0.45','S0.4','S0.35','S0.3','S0.25','S0.2','S0.15','S0.1']
    #TestSizeColumns=['S0.2','S0.15','S0.1']
    df.columns=['Model']+TestSizeColumns
    #mean
    df['Mean'] = df.drop('Model', axis=1).mean(axis=1)
    df['t-pvalue'] = 1
    df['t-statistic'] = 0
    df['Group'] = 1
    #t-test with base model
    if ModelName!='BaseModel':
        base_model=result_df[result_df['Model'] == 'BaseModel'].iloc[0]
        current_model=df.iloc[0]
        t=stats.ttest_ind(base_model[TestSizeColumns].tolist(),current_model[TestSizeColumns].tolist())
        line_to_save=[current_model['Model']]
        line_to_save.extend(current_model[TestSizeColumns].tolist())
        line_to_save.append(current_model[TestSizeColumns].mean())
        line_to_save.append(t.pvalue)
        line_to_save.append(t.statistic)
        if ((t.pvalue<=0.05) and (base_model['Mean']<current_model['Mean'])):
            line_to_save.append(2)
        elif ((t.pvalue<=0.05) and (base_model['Mean']>current_model['Mean'])):
            line_to_save.append(3)    
        else:
            line_to_save.append(1)                  
        df_to_save=pd.DataFrame([line_to_save])
        df_to_save.columns=['Model']+TestSizeColumns+['Mean','t-pvalue','t-statistic','Group']
        result_df=result_df.append(df_to_save, ignore_index=True)
    else:
        result_df=result_df.append(df, ignore_index=True)
    result_df.to_csv(ModelsDir+filename, index = False)
    return result_df

In [22]:
#xgb library and parameters to tune later
import xgboost as xgb
xgb_params = {'eta': 0.02, 'max_depth': 4, 'subsample': 0.9, 'colsample_bytree': 0.9, 
          'objective': 'binary:logistic', 'eval_metric': 'auc', 'silent': True}

In [23]:
#Evaluation metric to be used in tuning
from sklearn.metrics import roc_auc_score
def gini(y, pred):
    g = np.asarray(np.c_[y, pred, np.arange(len(y)) ], dtype=np.float)
    g = g[np.lexsort((g[:,2], -1*g[:,1]))]
    gs = g[:,0].cumsum().sum() / g[:,0].sum()
    gs -= (len(y) + 1) / 2.
    return gs / len(y)
def gini_xgb(pred, y):
    y = y.get_label()
    return 'gini', gini(y, pred) / gini(y, y)

In [24]:
#Random Undersampler to balance the dataset
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(random_state=42)
#ratio=0.918 to tune later

In [25]:
#StratifiedKFold
from sklearn.model_selection import StratifiedKFold
nrounds=5000 # need to change to 2000
kfold = 10  # need to change to 10
skf = StratifiedKFold(n_splits=kfold, random_state=42)

In [26]:
#splitting to train/test in the loop below
from sklearn.model_selection import train_test_split

In [27]:
#each line in the file contains the model name and set of features to analize
models = pd.read_csv(ModelsDir+'Models.csv', index_col=None)

In [28]:
#summary for test and train metrics for each model to test overfitting
models_test_gini_df=pd.DataFrame()
models_test_roc_auc_df=pd.DataFrame()
#
models_train_gini_df=pd.DataFrame()
models_train_roc_auc_df=pd.DataFrame()
#
base_model_df=pd.DataFrame()

In [29]:
for index, row in models.iterrows():
    #for test and train metrics for each model to test overfitting
    gini_test_lst=[]
    roc_auc_test_lst=[]
    gini_train_lst=[]
    roc_auc_train_lst=[]
    #Starting analyzing metric
    print (index, ': Analyzing model %s'%row['Model'])
    gini_test_lst.append(row['Model'])
    roc_auc_test_lst.append(row['Model'])
    gini_train_lst.append(row['Model'])
    roc_auc_train_lst.append(row['Model'])
    featureset=row[1:51].tolist()
    featureset=[x for x in featureset if str(x) != 'nan']
    #calculating metrics for the current featureset and 
    #several data sizes
    for s in (0.45,0.4,0.35,0.3,0.25,0.2,0.15,0.1):
    #for s in (0.2,0.15,0.1):
        print ('Test size %s'%s)
        X, X_test, y, y_test = train_test_split(dataset.loc[:,featureset], dataset[target_column], test_size=s, random_state=42)
        print('--------------------------------------------------------------------')
        print(row['Model'])
        print(len(X.columns))
        print(X.columns)
        print('--------------------------------------------------------------------')
        #prediction dataframes
        y_pred_test=pd.DataFrame(index=y_test.index)
        y_pred_test[target_column]=0
        y_pred_train=pd.DataFrame(index=y.index)
        y_pred_train[target_column]=0
        #
        X = X.values
        y = y.values
        #Stratified Fold
        for i, (train_index, test_index) in enumerate(skf.split(X, y)):
            print(' xgb kfold: {}  of  {} : '.format(i+1, kfold))
            #getting fold data
            X_train, X_valid = X[train_index], X[test_index]
            y_train, y_valid = y[train_index], y[test_index]
            #balancing dataset
            X_res, y_res = rus.fit_sample(X_train, y_train)
            #applying XGB
            d_train = xgb.DMatrix(X_res, y_res) 
            d_valid = xgb.DMatrix(X_valid, y_valid) 
            watchlist = [(d_train, 'train'), (d_valid, 'valid')]
            xgb_model = xgb.train(xgb_params, d_train, nrounds, watchlist, early_stopping_rounds=100, 
                          feval=gini_xgb, maximize=True, verbose_eval=1000)
            y_pred_test[target_column] +=  xgb_model.predict(xgb.DMatrix(X_test.values), ntree_limit=xgb_model.best_ntree_limit+50) / (kfold)
            y_pred_train[target_column] += xgb_model.predict(xgb.DMatrix(X), ntree_limit=xgb_model.best_ntree_limit+50) / (kfold)
        #Prediction results
        #test
        print('--------------------------------------------------------------------')
        print(row['Model'])
        print('Test Size %f'%s)
        print(len(X_test.columns))
        print(X_test.columns)
        print('--------------------------------------------------------------------')
        g=gini(y_test,y_pred_test)/gini(y_test,y_test)
        print('Test Gini - %f'%g)
        gini_test_lst.append(g)
        ROC_AUC=roc_auc_score(y_test, y_pred_test)
        print('Test ROC_AUC - %f'%ROC_AUC)
        roc_auc_test_lst.append(ROC_AUC)
        #train
        g=gini(y,y_pred_train)/gini(y,y)
        print('Train Gini - %f'%g)
        gini_train_lst.append(g)
        ROC_AUC=roc_auc_score(y, y_pred_train)
        print('Train ROC_AUC - %f'%ROC_AUC)
        roc_auc_train_lst.append(ROC_AUC)
    #save model analysis results
    models_test_gini_df=AnalyzeAndSaveModelsResults(models_test_gini_df,gini_test_lst,row['Model'],'models_test_gini.csv')
    models_test_roc_auc_df=AnalyzeAndSaveModelsResults(models_test_roc_auc_df,roc_auc_test_lst,row['Model'],'models_test_roc_auc.csv')
    models_train_gini_df=AnalyzeAndSaveModelsResults(models_train_gini_df,gini_train_lst,row['Model'],'models_train_gini.csv')
    models_train_roc_auc_df=AnalyzeAndSaveModelsResults(models_train_roc_auc_df,roc_auc_train_lst,row['Model'],'models_train_roc_auc.csv')

0 : Analyzing model BaseModel
Test size 0.45
--------------------------------------------------------------------
BaseModel
50
Index(['driverage', 'classcd_encd', 'vehicleage', 'mvrstatusage',
       'mvrstatus_encd', 'drivernumber', 'carpoolind_encd',
       'relationshiptoinsuredcd_encd', 'gooddriverind_encd',
       'accidentpreventioncourseind_encd', 'drivertrainingind_encd',
       'scholasticdiscountind_encd', 'vehbodytypecd_encd', 'ratingvalue',
       'vehnumber', 'maturedriverind_encd', 'driverstatuscd_encd',
       'estimatedannualdistance', 'daysperweekdriven',
       'acci_pointschargedterm', 'maritalstatuscd_encd', 'odometerreading',
       'licensedstateprovcd_encd', 'enginecylinders_encd', 'model_encd',
       'acci_last_infractionage', 'acci_infractioncdcountterm',
       'gendercd_encd', 'occupationclasscd_encd',
       'acci_driverpointsnumbercountterm', 'garageterritory',
       'manufacturer_encd', 'acci_last_convictionage', 'performancecd_encd',
       'registratio

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[269]	train-auc:0.769088	valid-auc:0.68665	train-gini:0.538176	valid-gini:0.373299

 xgb kfold: 2  of  10 : 
[0]	train-auc:0.651855	valid-auc:0.63335	train-gini:0.187148	valid-gini:0.25175
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[394]	train-auc:0.788749	valid-auc:0.711018	train-gini:0.577498	valid-gini:0.422036

 xgb kfold: 3  of  10 : 
[0]	train-auc:0.649603	valid-auc:0.627144	train-gini:0.159025	valid-gini:0.231092
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[391]	train-auc:0.792263	valid-auc:0.715368	train-gini:0.584526	valid-gini:0.430735

 xgb kfold: 4  of 

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[387]	train-auc:0.789777	valid-auc:0.713353	train-gini:0.579553	valid-gini:0.426706

 xgb kfold: 6  of  10 : 
[0]	train-auc:0.65402	valid-auc:0.624353	train-gini:0.185891	valid-gini:0.239683
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[632]	train-auc:0.816199	valid-auc:0.710896	train-gini:0.632398	valid-gini:0.421792

 xgb kfold: 7  of  10 : 
[0]	train-auc:0.650838	valid-auc:0.631225	train-gini:0.191215	valid-gini:0.269038
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[346]	train-auc:0.785918	valid-auc:0.705569	train-gini:0.571836	valid-gini:0.411139

 xgb kfold: 8  of  10 : 
[0]	train-auc:0.651305	valid-auc:0.60008	train-gini:0.201633	valid-gini:0.2056

Stopping. Best iteration:
[473]	train-auc:0.796708	valid-auc:0.69487	train-gini:0.593415	valid-gini:0.389738

 xgb kfold: 10  of  10 : 
[0]	train-auc:0.642748	valid-auc:0.623569	train-gini:0.175797	valid-gini:0.25638
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[339]	train-auc:0.780357	valid-auc:0.693362	train-gini:0.560714	valid-gini:0.386724

--------------------------------------------------------------------
BaseModel
Test Size 0.300000
50
Index(['driverage', 'classcd_encd', 'vehicleage', 'mvrstatusage',
       'mvrstatus_encd', 'drivernumber', 'carpoolind_encd',
       'relationshiptoinsuredcd_encd', 'gooddriverind_encd',
       'accidentpreventioncourseind_encd', 'drivertrainingind_encd',
       'scholasticdiscountind_encd', 'vehbodytypecd_encd', 'ratingvalue',
       'vehnumber', 'maturedriverind_encd', 'driverstatuscd_encd',
       'estimatedannualdista

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[694]	train-auc:0.809176	valid-auc:0.700455	train-gini:0.61835	valid-gini:0.400911

 xgb kfold: 2  of  10 : 
[0]	train-auc:0.64531	valid-auc:0.630569	train-gini:0.169057	valid-gini:0.280354
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[545]	train-auc:0.788126	valid-auc:0.711688	train-gini:0.576251	valid-gini:0.423377

 xgb kfold: 3  of  10 : 
[0]	train-auc:0.645894	valid-auc:0.627121	train-gini:0.181463	valid-gini:0.266858
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[414]	train-auc:0.775728	valid-auc:0.700724	train-gini:0.551455	valid-gini:0.401449

 xgb kfold: 4  of

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[349]	train-auc:0.77044	valid-auc:0.707792	train-gini:0.540878	valid-gini:0.415585

 xgb kfold: 6  of  10 : 
[0]	train-auc:0.641124	valid-auc:0.638375	train-gini:0.172366	valid-gini:0.269152
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[691]	train-auc:0.80562	valid-auc:0.727438	train-gini:0.611239	valid-gini:0.454877

 xgb kfold: 7  of  10 : 
[0]	train-auc:0.656109	valid-auc:0.65812	train-gini:0.218358	valid-gini:0.315326
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[301]	train-auc:0.76921	valid-auc:0.713698	train-gini:0.538419	valid-gini:0.427395

 xgb kfold: 8  of  10 : 
[0]	train-auc:0.649162	valid-auc:0.632126	train-gini:0.201171	valid-gini:0.268752

Stopping. Best iteration:
[355]	train-auc:0.769919	valid-auc:0.698412	train-gini:0.539837	valid-gini:0.396823

 xgb kfold: 10  of  10 : 
[0]	train-auc:0.641006	valid-auc:0.630026	train-gini:0.145708	valid-gini:0.267186
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[478]	train-auc:0.777238	valid-auc:0.70062	train-gini:0.554476	valid-gini:0.401239

--------------------------------------------------------------------
BaseModel
Test Size 0.100000
50
Index(['driverage', 'classcd_encd', 'vehicleage', 'mvrstatusage',
       'mvrstatus_encd', 'drivernumber', 'carpoolind_encd',
       'relationshiptoinsuredcd_encd', 'gooddriverind_encd',
       'accidentpreventioncourseind_encd', 'drivertrainingind_encd',
       'scholasticdiscountind_encd', 'vehbodytypecd_encd', 'ratingvalue',
       'vehnumber', 'maturedriverind_encd', 'driverstatuscd_encd',
       'estimatedannualdist

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[263]	train-auc:0.749212	valid-auc:0.702416	train-gini:0.498419	valid-gini:0.404832

 xgb kfold: 3  of  10 : 
[0]	train-auc:0.647094	valid-auc:0.633782	train-gini:0.134628	valid-gini:0.240088
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[440]	train-auc:0.770409	valid-auc:0.713727	train-gini:0.540817	valid-gini:0.427454

 xgb kfold: 4  of  10 : 
[0]	train-auc:0.640984	valid-auc:0.620849	train-gini:0.120299	valid-gini:0.212882
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[452]	train-auc:0.771867	valid-auc:0.692438	train-gini:0.543732	valid-gini:0.384877

 xgb kfold: 5  

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[447]	train-auc:0.776098	valid-auc:0.703999	train-gini:0.552195	valid-gini:0.407997

 xgb kfold: 2  of  10 : 
[0]	train-auc:0.641779	valid-auc:0.624542	train-gini:0.124008	valid-gini:0.244309
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[351]	train-auc:0.766925	valid-auc:0.69224	train-gini:0.533848	valid-gini:0.384482

 xgb kfold: 3  of  10 : 
[0]	train-auc:0.637824	valid-auc:0.605592	train-gini:0.086968	valid-gini:0.188692
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[286]	train-auc:0.762259	valid-auc:0.700279	train-gini:0.524514	valid-gini:0.400557

 xgb kfold: 4  o

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[430]	train-auc:0.764205	valid-auc:0.689596	train-gini:0.528408	valid-gini:0.379191

--------------------------------------------------------------------
BestModel
Test Size 0.250000
18
Index(['accidentpreventioncourseind_encd', 'carpoolind_encd', 'classcd_encd',
       'driverage', 'drivernumber', 'driverstatuscd_encd',
       'drivertrainingind_encd', 'estimatedannualdistance',
       'gooddriverind_encd', 'maturedriverind_encd', 'mvrstatus_encd',
       'mvrstatusage', 'ratingvalue', 'relationshiptoinsuredcd_encd',
       'scholasticdiscountind_encd', 'vehbodytypecd_encd', 'vehicleage',
       'vehnumber'],
      dtype='object')
--------------------------------------------------------------------
Test Gini - 0.407910
Test ROC_AUC - 0.703955
Train Gini - 0.477277
Train ROC_AUC - 0.738638
Test size 0.2
-------------

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[396]	train-auc:0.753943	valid-auc:0.705505	train-gini:0.507883	valid-gini:0.41101

 xgb kfold: 6  of  10 : 
[0]	train-auc:0.646354	valid-auc:0.650349	train-gini:0.195056	valid-gini:0.292561
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[538]	train-auc:0.76735	valid-auc:0.715312	train-gini:0.534699	valid-gini:0.430624

 xgb kfold: 7  of  10 : 
[0]	train-auc:0.652167	valid-auc:0.640276	train-gini:0.198492	valid-gini:0.280555
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[358]	train-auc:0.757142	valid-auc:0.705719	train-gini:0.514282	valid-gini:0.411436

 xgb kfold: 8  of

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[333]	train-auc:0.763573	valid-auc:0.695987	train-gini:0.527143	valid-gini:0.391972

 xgb kfold: 2  of  10 : 
[0]	train-auc:0.657757	valid-auc:0.641617	train-gini:0.229783	valid-gini:0.277402
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[650]	train-auc:0.793217	valid-auc:0.705685	train-gini:0.586432	valid-gini:0.41137

 xgb kfold: 3  of  10 : 
[0]	train-auc:0.659278	valid-auc:0.631776	train-gini:0.218023	valid-gini:0.253806
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[706]	train-auc:0.799391	valid-auc:0.698774	train-gini:0.59878	valid-gini:0.397547

 xgb kfold: 4  of

--------------------------------------------------------------------
ShortBestModel1
Test Size 0.400000
14
Index(['acci_last_infractionage', 'accidentpreventioncourseind_encd',
       'carpoolind_encd', 'classcd_encd', 'driverage', 'drivernumber',
       'drivertrainingind_encd', 'gooddriverind_encd', 'mvrstatus_encd',
       'mvrstatusage', 'ratingvalue', 'scholasticdiscountind_encd',
       'vehbodytypecd_encd', 'vehicleage'],
      dtype='object')
--------------------------------------------------------------------
Test Gini - 0.403184
Test ROC_AUC - 0.701592
Train Gini - 0.464391
Train ROC_AUC - 0.732195
Test size 0.35
--------------------------------------------------------------------
ShortBestModel1
14
Index(['acci_last_infractionage', 'accidentpreventioncourseind_encd',
       'carpoolind_encd', 'classcd_encd', 'driverage', 'drivernumber',
       'drivertrainingind_encd', 'gooddriverind_encd', 'mvrstatus_encd',
       'mvrstatusage', 'ratingvalue', 'scholasticdiscountind_encd',

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[426]	train-auc:0.76209	valid-auc:0.692449	train-gini:0.524177	valid-gini:0.384899

 xgb kfold: 8  of  10 : 
[0]	train-auc:0.642796	valid-auc:0.644938	train-gini:0.162814	valid-gini:0.290144
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[385]	train-auc:0.756242	valid-auc:0.697712	train-gini:0.512483	valid-gini:0.395424

 xgb kfold: 9  of  10 : 
[0]	train-auc:0.643492	valid-auc:0.61071	train-gini:0.15164	valid-gini:0.233242
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[359]	train-auc:0.754815	valid-auc:0.680369	train-gini:0.509626	valid-gini:0.360737

 xgb kfold: 10  of  10 : 
[0]	train-auc:0.642261	valid-auc:0.621772	train-gini:0.180878	valid-gini:0.2532

 xgb kfold: 4  of  10 : 
[0]	train-auc:0.647767	valid-auc:0.629074	train-gini:0.213887	valid-gini:0.259898
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[837]	train-auc:0.775349	valid-auc:0.696766	train-gini:0.550697	valid-gini:0.393531

 xgb kfold: 5  of  10 : 
[0]	train-auc:0.645337	valid-auc:0.626698	train-gini:0.205768	valid-gini:0.247234
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[446]	train-auc:0.750235	valid-auc:0.703912	train-gini:0.500466	valid-gini:0.407823

 xgb kfold: 6  of  10 : 
[0]	train-auc:0.642476	valid-auc:0.640707	train-gini:0.174026	valid-gini:0.278991
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iter

Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[418]	train-auc:0.750128	valid-auc:0.706523	train-gini:0.500254	valid-gini:0.413045

 xgb kfold: 2  of  10 : 
[0]	train-auc:0.645138	valid-auc:0.638848	train-gini:0.191726	valid-gini:0.267764
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[331]	train-auc:0.743059	valid-auc:0.697262	train-gini:0.486115	valid-gini:0.394524

 xgb kfold: 3  of  10 : 
[0]	train-auc:0.640373	valid-auc:0.626956	train-gini:0.161156	valid-gini:0.260595
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 100 rounds.
Stopping. Best iteration:
[526]	train-auc:0.7549	valid-auc:0.702381	train-gini:0.509798	valid-gini:0.404761

 xgb kfold: 4  of

ValueError: could not convert string to float: 'Male'

Red and Green bars below indicates models where mean of gini is likely different from a previous model

In [None]:
models_test_gini_df

In [None]:
models_test_gini_df.set_index('Model', inplace=True)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
colors = {1: 'blue', 2: 'green',3: 'red'}

In [None]:
customcmap = tuple(models_test_gini_df['Group'].map(colors))
models_test_gini_df['Mean'].plot(kind='barh',  color=[customcmap], figsize=(10, 12))