In [1]:
import pandas as pd
import sys
from catboost import CatBoostClassifier 
import numpy as np
from catboost import Pool, cv

In [2]:
prices = {'bk': 2450, 'GZ1': 2050, 'GZ2': 2050, 'GZ3': 2050,
          'GZ4': 2050, 'GZ5': 2050, 'GZ7': 2050,
          'DGK': 1300, 'NKTD': 2050, 'NKTM': 2050, 'NKTR': 2050,
          'ALPS': 1150}

train = pd.read_csv('fixed_train_1.csv').drop('Unnamed: 0', axis=1)
test = pd.read_csv('fixed_test_1.csv').drop('Unnamed: 0', axis=1)
result = pd.read_excel('result.xlsx')
train.head()

Unnamed: 0,well id,"depth, m",bk,GZ1,GZ2,GZ3,GZ4,GZ5,GZ7,DGK,...,GZ4_mean_50,GZ4_std_50,GZ1_mean_50,GZ1_std_50,GZ5_mean_50,GZ5_std_50,GZ3_mean_50,GZ3_std_50,NKTR_mean_50,NKTR_std_50
0,12.0,3042.463,0.455779,0.774,0.774,0.332846,0.476545,0.774,0.244898,0.123651,...,0.289396,0.083638,0.549723,0.093782,0.534064,0.126169,0.228647,0.042243,0.313424,0.071802
1,33.0,2412.8305,0.0,0.157153,0.755472,0.453216,0.715147,0.0,0.523432,0.388729,...,0.679849,0.035879,0.146223,0.014859,0.0,0.0,0.425147,0.026529,0.56626,0.014143
2,33.0,2522.5575,0.0,0.172235,0.941742,0.525711,0.6686,0.0,0.588924,0.515214,...,0.651627,0.023859,0.155702,0.029602,0.0,0.0,0.440186,0.041092,0.570614,0.010985
3,49.0,2396.3625,0.381248,0.246838,0.116913,0.732654,0.151299,0.165219,0.656955,0.445917,...,0.411672,0.214019,0.309506,0.079262,0.184682,0.076317,0.66817,0.248292,0.423467,0.027518
4,49.0,2399.0475,0.356867,0.226241,0.155473,0.6133,0.173949,0.297194,0.773218,0.392743,...,0.197349,0.170724,0.238382,0.022363,0.173726,0.030309,0.717243,0.079578,0.436785,0.011339


In [3]:
test.head()

Unnamed: 0,id,well id,"depth, m",bk,GZ1,GZ2,GZ3,GZ4,GZ5,GZ7,...,GZ1_mean_50,GZ1_std_50,NKTR_mean_50,NKTR_std_50,GZ7_mean_50,GZ7_std_50,GZ5_mean_50,GZ5_std_50,NKTD_mean_50,NKTD_std_50
0,1,47,2465.546,0.665198,0.259688,0.865866,0.526683,0.116218,0.46487,0.124486,...,0.41176,0.092512,0.399214,0.035714,0.426495,0.132352,0.685899,0.339916,0.407485,0.038407
1,2,47,2465.6355,0.115153,0.258287,0.161988,0.583114,0.122256,0.477795,0.118895,...,0.41176,0.092512,0.399214,0.035714,0.426495,0.132352,0.685899,0.339916,0.407485,0.038407
2,3,47,2465.725,0.155254,0.318849,0.125817,0.639544,0.128194,0.497212,0.111693,...,0.41176,0.092512,0.399214,0.035714,0.426495,0.132352,0.685899,0.339916,0.407485,0.038407
3,4,47,2465.8145,0.125315,0.474215,0.149127,0.638636,0.128239,0.478265,0.987464,...,0.41176,0.092512,0.399214,0.035714,0.426495,0.132352,0.685899,0.339916,0.407485,0.038407
4,5,47,2465.904,0.953756,0.638341,0.172437,0.637663,0.113455,0.465892,0.858188,...,0.41176,0.092512,0.399214,0.035714,0.426495,0.132352,0.685899,0.339916,0.407485,0.038407


In [4]:
train.fillna(0, inplace=True)
test.fillna(0, inplace=True)

for df in [train, test]:
    df['GZ2-1'] = df['GZ2'] - df['GZ1']
    df['GZ3-2'] = df['GZ3'] - df['GZ2']
    df['GZ4-3'] = df['GZ4'] - df['GZ3']
    df['GZ5-4'] = df['GZ5'] - df['GZ4']
    df['GZ7-5'] = df['GZ7'] - df['GZ5']

In [5]:
from sklearn import preprocessing
import random

rand_wells = random.sample(list(train['well id'].unique()), 40)

tr_train = train[~train['well id'].isin(rand_wells)]
val_train = train[train['well id'].isin(rand_wells)]

print(tr_train.shape, val_train.shape)

(409339, 93) (54558, 93)


In [6]:
first_features = []
for elem in tr_train.columns:
    if elem[:2] == 'GZ':
        first_features += [elem]

In [11]:
from sklearn.metrics import accuracy_score, f1_score


def get_feature_predictions(tr_train, val_train, test, features, idx=0):
    print('Preprocessing...')

    dense_train = tr_train[tr_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(features)
    dense_val = val_train[val_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(features)
    dense_test = test.filter(features)

    min_max_scaler = preprocessing.MinMaxScaler()
    min_max_scaler = min_max_scaler.fit(dense_train)

    dense_train_scaled = min_max_scaler.transform(dense_train)
    dense_val_scaled = min_max_scaler.transform(dense_val)
    dense_test_scaled = min_max_scaler.transform(dense_test)

    print(dense_train_scaled.shape, dense_val_scaled.shape, dense_test_scaled.shape)
    y_train, y_val = tr_train['goal'], val_train['goal']

    model = CatBoostClassifier(iterations=1000, 
                           class_weights=[np.sum(y_train == 1)/len(y_train), np.sum(y_train == 0)/len(y_train)], 
                           eval_metric="F1")
    print('Learning model...')
    model.fit(
        X=dense_train_scaled, y=y_train,
        eval_set=(dense_val_scaled, y_val),
        plot=True
    )

    print('Making predictions...')
    tr_train['first_predictions'] = model.predict(dense_train_scaled)
    val_train['first_predictions'] = model.predict(dense_val_scaled)
    test['first_predictions'] = model.predict(dense_test_scaled)

    print('Storing model...')
    model.save_model('catboost_models/features_'+str(idx)+'.pth')
    print('Val accuracy %.3f' % accuracy_score(val_train['goal'], val_train['first_predictions']))
    print('Val F1 score %.3f' % f1_score(val_train['goal'], val_train['first_predictions']))
    return tr_train, val_train, test

In [12]:
tr_train, val_train, test = get_feature_predictions(tr_train, val_train, test, first_features)

Preprocessing...
(409339, 47) (54558, 47) (37604, 47)
Learning model...


MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.14673
0:	learn: 0.7376620	test: 0.7889069	best: 0.7889069 (0)	total: 124ms	remaining: 2m 4s
1:	learn: 0.7322320	test: 0.7768632	best: 0.7889069 (0)	total: 189ms	remaining: 1m 34s
2:	learn: 0.7237841	test: 0.7985843	best: 0.7985843 (2)	total: 241ms	remaining: 1m 20s
3:	learn: 0.7377943	test: 0.8009807	best: 0.8009807 (3)	total: 295ms	remaining: 1m 13s
4:	learn: 0.7355492	test: 0.8045852	best: 0.8045852 (4)	total: 347ms	remaining: 1m 9s
5:	learn: 0.7480467	test: 0.8051872	best: 0.8051872 (5)	total: 409ms	remaining: 1m 7s
6:	learn: 0.7737047	test: 0.8217509	best: 0.8217509 (6)	total: 468ms	remaining: 1m 6s
7:	learn: 0.7765560	test: 0.8226562	best: 0.8226562 (7)	total: 526ms	remaining: 1m 5s
8:	learn: 0.7786999	test: 0.8237452	best: 0.8237452 (8)	total: 580ms	remaining: 1m 3s
9:	learn: 0.7589857	test: 0.8108336	best: 0.8237452 (8)	total: 633ms	remaining: 1m 2s
10:	learn: 0.7611194	test: 0.8096916	best: 0.8237452 (8)	total: 685ms	remaining: 1m 1s
11:	learn: 0.7635186	

94:	learn: 0.8029947	test: 0.8285898	best: 0.8292429 (71)	total: 7.44s	remaining: 1m 10s
95:	learn: 0.8031546	test: 0.8289992	best: 0.8292429 (71)	total: 7.58s	remaining: 1m 11s
96:	learn: 0.8033245	test: 0.8288547	best: 0.8292429 (71)	total: 7.73s	remaining: 1m 11s
97:	learn: 0.8037690	test: 0.8288176	best: 0.8292429 (71)	total: 7.82s	remaining: 1m 11s
98:	learn: 0.8040123	test: 0.8292073	best: 0.8292429 (71)	total: 7.9s	remaining: 1m 11s
99:	learn: 0.8042827	test: 0.8291367	best: 0.8292429 (71)	total: 7.99s	remaining: 1m 11s
100:	learn: 0.8046096	test: 0.8294392	best: 0.8294392 (100)	total: 8.07s	remaining: 1m 11s
101:	learn: 0.8046117	test: 0.8294694	best: 0.8294694 (101)	total: 8.18s	remaining: 1m 11s
102:	learn: 0.8048918	test: 0.8295804	best: 0.8295804 (102)	total: 8.29s	remaining: 1m 12s
103:	learn: 0.8051342	test: 0.8298914	best: 0.8298914 (103)	total: 8.46s	remaining: 1m 12s
104:	learn: 0.8053881	test: 0.8296663	best: 0.8298914 (103)	total: 8.64s	remaining: 1m 13s
105:	learn: 

188:	learn: 0.8207939	test: 0.8284247	best: 0.8313779 (126)	total: 14.3s	remaining: 1m 1s
189:	learn: 0.8209007	test: 0.8283275	best: 0.8313779 (126)	total: 14.3s	remaining: 1m 1s
190:	learn: 0.8209730	test: 0.8283158	best: 0.8313779 (126)	total: 14.4s	remaining: 1m
191:	learn: 0.8212288	test: 0.8284686	best: 0.8313779 (126)	total: 14.4s	remaining: 1m
192:	learn: 0.8213037	test: 0.8285720	best: 0.8313779 (126)	total: 14.5s	remaining: 1m
193:	learn: 0.8215034	test: 0.8286418	best: 0.8313779 (126)	total: 14.5s	remaining: 1m
194:	learn: 0.8215696	test: 0.8283510	best: 0.8313779 (126)	total: 14.6s	remaining: 1m
195:	learn: 0.8218803	test: 0.8283000	best: 0.8313779 (126)	total: 14.6s	remaining: 1m
196:	learn: 0.8218094	test: 0.8286214	best: 0.8313779 (126)	total: 14.7s	remaining: 59.9s
197:	learn: 0.8219432	test: 0.8284442	best: 0.8313779 (126)	total: 14.7s	remaining: 59.7s
198:	learn: 0.8221147	test: 0.8284544	best: 0.8313779 (126)	total: 14.8s	remaining: 59.5s
199:	learn: 0.8221906	test: 

282:	learn: 0.8328745	test: 0.8276060	best: 0.8313779 (126)	total: 19.7s	remaining: 50s
283:	learn: 0.8331430	test: 0.8275459	best: 0.8313779 (126)	total: 19.8s	remaining: 49.9s
284:	learn: 0.8332444	test: 0.8275767	best: 0.8313779 (126)	total: 19.9s	remaining: 49.9s
285:	learn: 0.8333262	test: 0.8273362	best: 0.8313779 (126)	total: 20s	remaining: 49.8s
286:	learn: 0.8335683	test: 0.8272042	best: 0.8313779 (126)	total: 20s	remaining: 49.8s
287:	learn: 0.8337837	test: 0.8272658	best: 0.8313779 (126)	total: 20.1s	remaining: 49.8s
288:	learn: 0.8338470	test: 0.8272364	best: 0.8313779 (126)	total: 20.2s	remaining: 49.7s
289:	learn: 0.8339702	test: 0.8274108	best: 0.8313779 (126)	total: 20.3s	remaining: 49.6s
290:	learn: 0.8341563	test: 0.8274285	best: 0.8313779 (126)	total: 20.3s	remaining: 49.5s
291:	learn: 0.8343836	test: 0.8272685	best: 0.8313779 (126)	total: 20.4s	remaining: 49.4s
292:	learn: 0.8344188	test: 0.8270558	best: 0.8313779 (126)	total: 20.4s	remaining: 49.2s
293:	learn: 0.83

377:	learn: 0.8428701	test: 0.8267161	best: 0.8313779 (126)	total: 25.7s	remaining: 42.3s
378:	learn: 0.8429866	test: 0.8264026	best: 0.8313779 (126)	total: 25.8s	remaining: 42.2s
379:	learn: 0.8431013	test: 0.8264142	best: 0.8313779 (126)	total: 25.8s	remaining: 42.1s
380:	learn: 0.8431554	test: 0.8263741	best: 0.8313779 (126)	total: 25.9s	remaining: 42s
381:	learn: 0.8432813	test: 0.8264360	best: 0.8313779 (126)	total: 25.9s	remaining: 41.9s
382:	learn: 0.8433466	test: 0.8265405	best: 0.8313779 (126)	total: 26s	remaining: 41.8s
383:	learn: 0.8434323	test: 0.8263985	best: 0.8313779 (126)	total: 26s	remaining: 41.7s
384:	learn: 0.8435031	test: 0.8264088	best: 0.8313779 (126)	total: 26.1s	remaining: 41.6s
385:	learn: 0.8436620	test: 0.8262163	best: 0.8313779 (126)	total: 26.1s	remaining: 41.6s
386:	learn: 0.8437863	test: 0.8261257	best: 0.8313779 (126)	total: 26.2s	remaining: 41.5s
387:	learn: 0.8437615	test: 0.8260673	best: 0.8313779 (126)	total: 26.2s	remaining: 41.4s
388:	learn: 0.84

469:	learn: 0.8506597	test: 0.8238766	best: 0.8313779 (126)	total: 31s	remaining: 34.9s
470:	learn: 0.8507181	test: 0.8238058	best: 0.8313779 (126)	total: 31s	remaining: 34.8s
471:	learn: 0.8507670	test: 0.8236823	best: 0.8313779 (126)	total: 31.1s	remaining: 34.7s
472:	learn: 0.8508184	test: 0.8232617	best: 0.8313779 (126)	total: 31.1s	remaining: 34.7s
473:	learn: 0.8508743	test: 0.8231900	best: 0.8313779 (126)	total: 31.2s	remaining: 34.6s
474:	learn: 0.8509276	test: 0.8234266	best: 0.8313779 (126)	total: 31.2s	remaining: 34.5s
475:	learn: 0.8510596	test: 0.8235294	best: 0.8313779 (126)	total: 31.2s	remaining: 34.4s
476:	learn: 0.8511225	test: 0.8235804	best: 0.8313779 (126)	total: 31.3s	remaining: 34.3s
477:	learn: 0.8512006	test: 0.8234680	best: 0.8313779 (126)	total: 31.4s	remaining: 34.3s
478:	learn: 0.8513104	test: 0.8232742	best: 0.8313779 (126)	total: 31.4s	remaining: 34.2s
479:	learn: 0.8513745	test: 0.8230789	best: 0.8313779 (126)	total: 31.5s	remaining: 34.1s
480:	learn: 0.

564:	learn: 0.8573285	test: 0.8213327	best: 0.8313779 (126)	total: 36.2s	remaining: 27.9s
565:	learn: 0.8573763	test: 0.8213952	best: 0.8313779 (126)	total: 36.2s	remaining: 27.8s
566:	learn: 0.8574014	test: 0.8215509	best: 0.8313779 (126)	total: 36.3s	remaining: 27.7s
567:	learn: 0.8574832	test: 0.8216029	best: 0.8313779 (126)	total: 36.3s	remaining: 27.6s
568:	learn: 0.8576287	test: 0.8218003	best: 0.8313779 (126)	total: 36.4s	remaining: 27.6s
569:	learn: 0.8576024	test: 0.8219144	best: 0.8313779 (126)	total: 36.5s	remaining: 27.5s
570:	learn: 0.8576484	test: 0.8217274	best: 0.8313779 (126)	total: 36.5s	remaining: 27.4s
571:	learn: 0.8578388	test: 0.8209511	best: 0.8313779 (126)	total: 36.6s	remaining: 27.4s
572:	learn: 0.8578378	test: 0.8209722	best: 0.8313779 (126)	total: 36.6s	remaining: 27.3s
573:	learn: 0.8579417	test: 0.8209101	best: 0.8313779 (126)	total: 36.7s	remaining: 27.2s
574:	learn: 0.8579949	test: 0.8208684	best: 0.8313779 (126)	total: 36.7s	remaining: 27.1s
575:	learn

657:	learn: 0.8624213	test: 0.8194460	best: 0.8313779 (126)	total: 41.1s	remaining: 21.4s
658:	learn: 0.8624651	test: 0.8194877	best: 0.8313779 (126)	total: 41.2s	remaining: 21.3s
659:	learn: 0.8625632	test: 0.8193313	best: 0.8313779 (126)	total: 41.2s	remaining: 21.2s
660:	learn: 0.8626929	test: 0.8194564	best: 0.8313779 (126)	total: 41.3s	remaining: 21.2s
661:	learn: 0.8627478	test: 0.8194251	best: 0.8313779 (126)	total: 41.3s	remaining: 21.1s
662:	learn: 0.8627590	test: 0.8196650	best: 0.8313779 (126)	total: 41.4s	remaining: 21s
663:	learn: 0.8628802	test: 0.8196442	best: 0.8313779 (126)	total: 41.4s	remaining: 21s
664:	learn: 0.8627534	test: 0.8197277	best: 0.8313779 (126)	total: 41.5s	remaining: 20.9s
665:	learn: 0.8628065	test: 0.8200401	best: 0.8313779 (126)	total: 41.6s	remaining: 20.9s
666:	learn: 0.8628593	test: 0.8198526	best: 0.8313779 (126)	total: 41.7s	remaining: 20.8s
667:	learn: 0.8629338	test: 0.8198839	best: 0.8313779 (126)	total: 41.7s	remaining: 20.7s
668:	learn: 0.

751:	learn: 0.8671418	test: 0.8200010	best: 0.8313779 (126)	total: 46.4s	remaining: 15.3s
752:	learn: 0.8671586	test: 0.8200323	best: 0.8313779 (126)	total: 46.5s	remaining: 15.2s
753:	learn: 0.8672598	test: 0.8195933	best: 0.8313779 (126)	total: 46.5s	remaining: 15.2s
754:	learn: 0.8673429	test: 0.8192796	best: 0.8313779 (126)	total: 46.6s	remaining: 15.1s
755:	learn: 0.8672737	test: 0.8193319	best: 0.8313779 (126)	total: 46.6s	remaining: 15s
756:	learn: 0.8673044	test: 0.8192378	best: 0.8313779 (126)	total: 46.6s	remaining: 15s
757:	learn: 0.8673102	test: 0.8191122	best: 0.8313779 (126)	total: 46.7s	remaining: 14.9s
758:	learn: 0.8674489	test: 0.8191332	best: 0.8313779 (126)	total: 46.7s	remaining: 14.8s
759:	learn: 0.8674780	test: 0.8194365	best: 0.8313779 (126)	total: 46.8s	remaining: 14.8s
760:	learn: 0.8676190	test: 0.8196143	best: 0.8313779 (126)	total: 46.8s	remaining: 14.7s
761:	learn: 0.8676342	test: 0.8197397	best: 0.8313779 (126)	total: 46.9s	remaining: 14.6s
762:	learn: 0.

844:	learn: 0.8709911	test: 0.8182199	best: 0.8313779 (126)	total: 51.2s	remaining: 9.39s
845:	learn: 0.8710617	test: 0.8183670	best: 0.8313779 (126)	total: 51.2s	remaining: 9.33s
846:	learn: 0.8712006	test: 0.8182304	best: 0.8313779 (126)	total: 51.3s	remaining: 9.26s
847:	learn: 0.8712362	test: 0.8182095	best: 0.8313779 (126)	total: 51.3s	remaining: 9.2s
848:	learn: 0.8713074	test: 0.8179259	best: 0.8313779 (126)	total: 51.4s	remaining: 9.14s
849:	learn: 0.8714022	test: 0.8179050	best: 0.8313779 (126)	total: 51.5s	remaining: 9.09s
850:	learn: 0.8714456	test: 0.8181150	best: 0.8313779 (126)	total: 51.6s	remaining: 9.03s
851:	learn: 0.8714905	test: 0.8182200	best: 0.8313779 (126)	total: 51.6s	remaining: 8.97s
852:	learn: 0.8715007	test: 0.8181466	best: 0.8313779 (126)	total: 51.7s	remaining: 8.9s
853:	learn: 0.8716051	test: 0.8182621	best: 0.8313779 (126)	total: 51.7s	remaining: 8.84s
854:	learn: 0.8715924	test: 0.8181360	best: 0.8313779 (126)	total: 51.8s	remaining: 8.78s
855:	learn: 

936:	learn: 0.8747863	test: 0.8170257	best: 0.8313779 (126)	total: 56.3s	remaining: 3.78s
937:	learn: 0.8747757	test: 0.8170891	best: 0.8313779 (126)	total: 56.3s	remaining: 3.72s
938:	learn: 0.8748191	test: 0.8171207	best: 0.8313779 (126)	total: 56.4s	remaining: 3.66s
939:	learn: 0.8748814	test: 0.8173529	best: 0.8313779 (126)	total: 56.4s	remaining: 3.6s
940:	learn: 0.8749291	test: 0.8171949	best: 0.8313779 (126)	total: 56.5s	remaining: 3.54s
941:	learn: 0.8749939	test: 0.8171844	best: 0.8313779 (126)	total: 56.6s	remaining: 3.48s
942:	learn: 0.8749822	test: 0.8171424	best: 0.8313779 (126)	total: 56.6s	remaining: 3.42s
943:	learn: 0.8750057	test: 0.8171000	best: 0.8313779 (126)	total: 56.7s	remaining: 3.37s
944:	learn: 0.8750609	test: 0.8165398	best: 0.8313779 (126)	total: 56.8s	remaining: 3.31s
945:	learn: 0.8751562	test: 0.8163597	best: 0.8313779 (126)	total: 56.9s	remaining: 3.25s
946:	learn: 0.8752142	test: 0.8164237	best: 0.8313779 (126)	total: 56.9s	remaining: 3.18s
947:	learn:

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Storing model...
Val accuracy 0.848
Val F1 score 0.597


In [16]:
second_features = first_features
for elem in tr_train.columns:
    if elem[:4] == 'ALPS':
        second_features += [elem]

In [None]:
tr_train, val_train, test = get_feature_predictions(tr_train, val_train, test, second_features)

In [15]:
ln = len(y_train)

model = CatBoostClassifier(iterations=1000, 
                           class_weights=[np.sum(y_train == 1)/ln, np.sum(y_train == 0)/ln], 
                           eval_metric="F1")
model.fit(
    X=dense_train_scaled, y=y_train,
    eval_set=(dense_val_scaled, y_val),
    plot=True
)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.146333
0:	learn: 0.7249110	test: 0.7177359	best: 0.7177359 (0)	total: 134ms	remaining: 2m 14s
1:	learn: 0.7296968	test: 0.7271774	best: 0.7271774 (1)	total: 186ms	remaining: 1m 32s
2:	learn: 0.7301426	test: 0.7288234	best: 0.7288234 (2)	total: 246ms	remaining: 1m 21s
3:	learn: 0.7273366	test: 0.7229414	best: 0.7288234 (2)	total: 295ms	remaining: 1m 13s
4:	learn: 0.7647213	test: 0.7499346	best: 0.7499346 (4)	total: 345ms	remaining: 1m 8s
5:	learn: 0.7414540	test: 0.7292910	best: 0.7499346 (4)	total: 398ms	remaining: 1m 5s
6:	learn: 0.7690456	test: 0.7415487	best: 0.7499346 (4)	total: 452ms	remaining: 1m 4s
7:	learn: 0.7737804	test: 0.7499083	best: 0.7499346 (4)	total: 500ms	remaining: 1m 1s
8:	learn: 0.7753841	test: 0.7624342	best: 0.7624342 (8)	total: 547ms	remaining: 1m
9:	learn: 0.7765974	test: 0.7603347	best: 0.7624342 (8)	total: 593ms	remaining: 58.7s
10:	learn: 0.7830730	test: 0.7756015	best: 0.7756015 (10)	total: 644ms	remaining: 57.9s
11:	learn: 0.7831170	

97:	learn: 0.8057137	test: 0.7982507	best: 0.7984141 (92)	total: 5.14s	remaining: 47.3s
98:	learn: 0.8059011	test: 0.7983373	best: 0.7984141 (92)	total: 5.19s	remaining: 47.2s
99:	learn: 0.8060625	test: 0.7988497	best: 0.7988497 (99)	total: 5.25s	remaining: 47.2s
100:	learn: 0.8061779	test: 0.7988418	best: 0.7988497 (99)	total: 5.31s	remaining: 47.3s
101:	learn: 0.8064393	test: 0.7997669	best: 0.7997669 (101)	total: 5.37s	remaining: 47.3s
102:	learn: 0.8065214	test: 0.7992468	best: 0.7997669 (101)	total: 5.42s	remaining: 47.2s
103:	learn: 0.8069721	test: 0.7993677	best: 0.7997669 (101)	total: 5.46s	remaining: 47.1s
104:	learn: 0.8070994	test: 0.7993677	best: 0.7997669 (101)	total: 5.51s	remaining: 47s
105:	learn: 0.8074017	test: 0.7992755	best: 0.7997669 (101)	total: 5.56s	remaining: 46.9s
106:	learn: 0.8075340	test: 0.7987965	best: 0.7997669 (101)	total: 5.61s	remaining: 46.8s
107:	learn: 0.8077326	test: 0.7986278	best: 0.7997669 (101)	total: 5.65s	remaining: 46.7s
108:	learn: 0.80773

189:	learn: 0.8230587	test: 0.8012359	best: 0.8018510 (185)	total: 12.6s	remaining: 53.8s
190:	learn: 0.8233306	test: 0.8011750	best: 0.8018510 (185)	total: 12.7s	remaining: 53.7s
191:	learn: 0.8235253	test: 0.8014592	best: 0.8018510 (185)	total: 12.7s	remaining: 53.5s
192:	learn: 0.8236870	test: 0.8014856	best: 0.8018510 (185)	total: 12.8s	remaining: 53.4s
193:	learn: 0.8238150	test: 0.8016529	best: 0.8018510 (185)	total: 12.8s	remaining: 53.2s
194:	learn: 0.8238469	test: 0.8017146	best: 0.8018510 (185)	total: 12.9s	remaining: 53.1s
195:	learn: 0.8239702	test: 0.8017306	best: 0.8018510 (185)	total: 12.9s	remaining: 53.1s
196:	learn: 0.8242250	test: 0.8021839	best: 0.8021839 (196)	total: 13s	remaining: 53s
197:	learn: 0.8243358	test: 0.8020854	best: 0.8021839 (196)	total: 13.1s	remaining: 52.9s
198:	learn: 0.8245684	test: 0.8016259	best: 0.8021839 (196)	total: 13.1s	remaining: 52.8s
199:	learn: 0.8247790	test: 0.8016717	best: 0.8021839 (196)	total: 13.2s	remaining: 52.7s
200:	learn: 0.

284:	learn: 0.8359809	test: 0.8027305	best: 0.8033637 (252)	total: 17.6s	remaining: 44.1s
285:	learn: 0.8360152	test: 0.8029130	best: 0.8033637 (252)	total: 17.6s	remaining: 44s
286:	learn: 0.8360818	test: 0.8028600	best: 0.8033637 (252)	total: 17.7s	remaining: 43.9s
287:	learn: 0.8363394	test: 0.8022325	best: 0.8033637 (252)	total: 17.7s	remaining: 43.8s
288:	learn: 0.8363606	test: 0.8025492	best: 0.8033637 (252)	total: 17.8s	remaining: 43.7s
289:	learn: 0.8364674	test: 0.8026244	best: 0.8033637 (252)	total: 17.8s	remaining: 43.7s
290:	learn: 0.8365808	test: 0.8030831	best: 0.8033637 (252)	total: 17.9s	remaining: 43.6s
291:	learn: 0.8367044	test: 0.8029977	best: 0.8033637 (252)	total: 18s	remaining: 43.5s
292:	learn: 0.8369695	test: 0.8032429	best: 0.8033637 (252)	total: 18s	remaining: 43.5s
293:	learn: 0.8370490	test: 0.8035514	best: 0.8035514 (293)	total: 18.1s	remaining: 43.4s
294:	learn: 0.8371910	test: 0.8032775	best: 0.8035514 (293)	total: 18.1s	remaining: 43.3s
295:	learn: 0.83

378:	learn: 0.8452500	test: 0.8019635	best: 0.8043387 (341)	total: 22.5s	remaining: 36.9s
379:	learn: 0.8453533	test: 0.8020825	best: 0.8043387 (341)	total: 22.6s	remaining: 36.9s
380:	learn: 0.8454844	test: 0.8016252	best: 0.8043387 (341)	total: 22.7s	remaining: 36.9s
381:	learn: 0.8454672	test: 0.8012107	best: 0.8043387 (341)	total: 22.7s	remaining: 36.8s
382:	learn: 0.8455563	test: 0.8011153	best: 0.8043387 (341)	total: 22.8s	remaining: 36.7s
383:	learn: 0.8456655	test: 0.8009905	best: 0.8043387 (341)	total: 22.8s	remaining: 36.6s
384:	learn: 0.8457735	test: 0.8013405	best: 0.8043387 (341)	total: 22.9s	remaining: 36.6s
385:	learn: 0.8458646	test: 0.8016357	best: 0.8043387 (341)	total: 22.9s	remaining: 36.5s
386:	learn: 0.8458948	test: 0.8016684	best: 0.8043387 (341)	total: 23s	remaining: 36.4s
387:	learn: 0.8458633	test: 0.8016170	best: 0.8043387 (341)	total: 23.1s	remaining: 36.4s
388:	learn: 0.8460058	test: 0.8015167	best: 0.8043387 (341)	total: 23.1s	remaining: 36.3s
389:	learn: 

470:	learn: 0.8529585	test: 0.7983531	best: 0.8043387 (341)	total: 27.8s	remaining: 31.2s
471:	learn: 0.8530286	test: 0.7988784	best: 0.8043387 (341)	total: 27.8s	remaining: 31.1s
472:	learn: 0.8530020	test: 0.7986404	best: 0.8043387 (341)	total: 27.9s	remaining: 31.1s
473:	learn: 0.8530847	test: 0.7988292	best: 0.8043387 (341)	total: 27.9s	remaining: 31s
474:	learn: 0.8532352	test: 0.7987772	best: 0.8043387 (341)	total: 28s	remaining: 30.9s
475:	learn: 0.8532484	test: 0.7988292	best: 0.8043387 (341)	total: 28s	remaining: 30.9s
476:	learn: 0.8533623	test: 0.7989713	best: 0.8043387 (341)	total: 28.1s	remaining: 30.8s
477:	learn: 0.8534339	test: 0.7986839	best: 0.8043387 (341)	total: 28.1s	remaining: 30.7s
478:	learn: 0.8534985	test: 0.7987441	best: 0.8043387 (341)	total: 28.2s	remaining: 30.6s
479:	learn: 0.8535925	test: 0.7986019	best: 0.8043387 (341)	total: 28.2s	remaining: 30.6s
480:	learn: 0.8536906	test: 0.7983392	best: 0.8043387 (341)	total: 28.3s	remaining: 30.5s
481:	learn: 0.85

566:	learn: 0.8588474	test: 0.7976915	best: 0.8043387 (341)	total: 32.8s	remaining: 25s
567:	learn: 0.8590288	test: 0.7973033	best: 0.8043387 (341)	total: 32.8s	remaining: 25s
568:	learn: 0.8590094	test: 0.7972015	best: 0.8043387 (341)	total: 32.9s	remaining: 24.9s
569:	learn: 0.8590441	test: 0.7971409	best: 0.8043387 (341)	total: 33s	remaining: 24.9s
570:	learn: 0.8590770	test: 0.7970114	best: 0.8043387 (341)	total: 33s	remaining: 24.8s
571:	learn: 0.8591968	test: 0.7970114	best: 0.8043387 (341)	total: 33.1s	remaining: 24.7s
572:	learn: 0.8592738	test: 0.7970885	best: 0.8043387 (341)	total: 33.1s	remaining: 24.7s
573:	learn: 0.8594469	test: 0.7972509	best: 0.8043387 (341)	total: 33.2s	remaining: 24.6s
574:	learn: 0.8594781	test: 0.7972262	best: 0.8043387 (341)	total: 33.2s	remaining: 24.6s
575:	learn: 0.8595411	test: 0.7970001	best: 0.8043387 (341)	total: 33.3s	remaining: 24.5s
576:	learn: 0.8596288	test: 0.7967687	best: 0.8043387 (341)	total: 33.3s	remaining: 24.4s
577:	learn: 0.8598

658:	learn: 0.8646599	test: 0.7942689	best: 0.8043387 (341)	total: 38.3s	remaining: 19.8s
659:	learn: 0.8647422	test: 0.7941100	best: 0.8043387 (341)	total: 38.4s	remaining: 19.8s
660:	learn: 0.8648383	test: 0.7944242	best: 0.8043387 (341)	total: 38.4s	remaining: 19.7s
661:	learn: 0.8649001	test: 0.7943265	best: 0.8043387 (341)	total: 38.5s	remaining: 19.6s
662:	learn: 0.8649372	test: 0.7942818	best: 0.8043387 (341)	total: 38.5s	remaining: 19.6s
663:	learn: 0.8650698	test: 0.7943513	best: 0.8043387 (341)	total: 38.6s	remaining: 19.5s
664:	learn: 0.8650963	test: 0.7942124	best: 0.8043387 (341)	total: 38.7s	remaining: 19.5s
665:	learn: 0.8651451	test: 0.7944619	best: 0.8043387 (341)	total: 38.7s	remaining: 19.4s
666:	learn: 0.8651360	test: 0.7940418	best: 0.8043387 (341)	total: 38.8s	remaining: 19.4s
667:	learn: 0.8650787	test: 0.7941854	best: 0.8043387 (341)	total: 38.8s	remaining: 19.3s
668:	learn: 0.8652861	test: 0.7941077	best: 0.8043387 (341)	total: 38.9s	remaining: 19.2s
669:	learn

753:	learn: 0.8698998	test: 0.7928587	best: 0.8043387 (341)	total: 44s	remaining: 14.3s
754:	learn: 0.8698918	test: 0.7926821	best: 0.8043387 (341)	total: 44s	remaining: 14.3s
755:	learn: 0.8699644	test: 0.7929286	best: 0.8043387 (341)	total: 44.1s	remaining: 14.2s
756:	learn: 0.8700425	test: 0.7930518	best: 0.8043387 (341)	total: 44.2s	remaining: 14.2s
757:	learn: 0.8700859	test: 0.7925295	best: 0.8043387 (341)	total: 44.2s	remaining: 14.1s
758:	learn: 0.8701134	test: 0.7924062	best: 0.8043387 (341)	total: 44.3s	remaining: 14.1s
759:	learn: 0.8701328	test: 0.7925055	best: 0.8043387 (341)	total: 44.3s	remaining: 14s
760:	learn: 0.8701515	test: 0.7923821	best: 0.8043387 (341)	total: 44.4s	remaining: 13.9s
761:	learn: 0.8702016	test: 0.7923949	best: 0.8043387 (341)	total: 44.4s	remaining: 13.9s
762:	learn: 0.8703506	test: 0.7927409	best: 0.8043387 (341)	total: 44.5s	remaining: 13.8s
763:	learn: 0.8703847	test: 0.7924731	best: 0.8043387 (341)	total: 44.5s	remaining: 13.8s
764:	learn: 0.87

846:	learn: 0.8736405	test: 0.7924828	best: 0.8043387 (341)	total: 49.1s	remaining: 8.87s
847:	learn: 0.8736194	test: 0.7925612	best: 0.8043387 (341)	total: 49.2s	remaining: 8.81s
848:	learn: 0.8737204	test: 0.7924625	best: 0.8043387 (341)	total: 49.2s	remaining: 8.75s
849:	learn: 0.8738031	test: 0.7922687	best: 0.8043387 (341)	total: 49.2s	remaining: 8.69s
850:	learn: 0.8738419	test: 0.7919676	best: 0.8043387 (341)	total: 49.3s	remaining: 8.63s
851:	learn: 0.8738823	test: 0.7923350	best: 0.8043387 (341)	total: 49.4s	remaining: 8.57s
852:	learn: 0.8739324	test: 0.7919721	best: 0.8043387 (341)	total: 49.4s	remaining: 8.51s
853:	learn: 0.8739867	test: 0.7920920	best: 0.8043387 (341)	total: 49.5s	remaining: 8.45s
854:	learn: 0.8740160	test: 0.7919765	best: 0.8043387 (341)	total: 49.5s	remaining: 8.39s
855:	learn: 0.8740417	test: 0.7920014	best: 0.8043387 (341)	total: 49.6s	remaining: 8.34s
856:	learn: 0.8740493	test: 0.7920882	best: 0.8043387 (341)	total: 49.6s	remaining: 8.28s
857:	learn

939:	learn: 0.8775483	test: 0.7903355	best: 0.8043387 (341)	total: 54.1s	remaining: 3.45s
940:	learn: 0.8775715	test: 0.7901861	best: 0.8043387 (341)	total: 54.1s	remaining: 3.4s
941:	learn: 0.8776488	test: 0.7902650	best: 0.8043387 (341)	total: 54.2s	remaining: 3.34s
942:	learn: 0.8776272	test: 0.7900202	best: 0.8043387 (341)	total: 54.3s	remaining: 3.28s
943:	learn: 0.8776709	test: 0.7898957	best: 0.8043387 (341)	total: 54.4s	remaining: 3.23s
944:	learn: 0.8777237	test: 0.7900119	best: 0.8043387 (341)	total: 54.5s	remaining: 3.17s
945:	learn: 0.8777342	test: 0.7898251	best: 0.8043387 (341)	total: 54.5s	remaining: 3.11s
946:	learn: 0.8777462	test: 0.7900949	best: 0.8043387 (341)	total: 54.6s	remaining: 3.05s
947:	learn: 0.8777487	test: 0.7899621	best: 0.8043387 (341)	total: 54.6s	remaining: 3s
948:	learn: 0.8777557	test: 0.7900410	best: 0.8043387 (341)	total: 54.7s	remaining: 2.94s
949:	learn: 0.8777920	test: 0.7898292	best: 0.8043387 (341)	total: 54.7s	remaining: 2.88s
950:	learn: 0.

<catboost.core.CatBoostClassifier at 0x7f6ebf0b8128>

In [16]:
from sklearn.metrics import accuracy_score, f1_score

tr_train['first_predictions'] = model.predict(dense_train_scaled)
val_train['first_predictions'] = model.predict(dense_val_scaled)
test['first_predictions'] = model.predict(dense_test_scaled)

model.save_model('catboost_models/first_features.pth')

accuracy_score(val_train['goal'], val_train['first_predictions']),\
    f1_score(val_train['goal'], val_train['first_predictions'])

(0.8290862647339426, 0.5629746212866418)

In [None]:
tr_train['first_predictions'] = model.predict(dense_train_scaled)

In [20]:
val_train['first_predictions'] = val_predictions
test['first_predictions'] = test_predictions

test.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,id,well id,"depth, m",bk,GZ1,GZ2,GZ3,GZ4,GZ5,GZ7,...,GZ5_mean_50,GZ5_std_50,NKTD_mean_50,NKTD_std_50,GZ2-1,GZ3-2,GZ4-3,GZ5-4,GZ7-5,first_predictions
0,1,47,2465.546,0.665198,0.259688,0.865866,0.526683,0.116218,0.46487,0.124486,...,0.685899,0.339916,0.407485,0.038407,0.606177,-0.339182,-0.410465,0.348652,-0.340383,1.0
1,2,47,2465.6355,0.115153,0.258287,0.161988,0.583114,0.122256,0.477795,0.118895,...,0.685899,0.339916,0.407485,0.038407,-0.096298,0.421125,-0.460858,0.355539,-0.3589,1.0
2,3,47,2465.725,0.155254,0.318849,0.125817,0.639544,0.128194,0.497212,0.111693,...,0.685899,0.339916,0.407485,0.038407,-0.193032,0.513727,-0.51135,0.369019,-0.38552,1.0
3,4,47,2465.8145,0.125315,0.474215,0.149127,0.638636,0.128239,0.478265,0.987464,...,0.685899,0.339916,0.407485,0.038407,-0.325088,0.489509,-0.510397,0.350026,0.509198,1.0
4,5,47,2465.904,0.953756,0.638341,0.172437,0.637663,0.113455,0.465892,0.858188,...,0.685899,0.339916,0.407485,0.038407,-0.465904,0.465226,-0.524208,0.352437,0.392296,0.0


In [11]:
second_features = first_features
for elem in tr_train.columns:
    if elem[:4] == 'ALPS':
        second_features += [elem]

dense_train = tr_train[tr_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(second_features)
dense_val = val_train[val_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(second_features)

min_max_scaler = preprocessing.MinMaxScaler()
min_max_scaler = min_max_scaler.fit(dense_train)

dense_train_scaled = min_max_scaler.transform(dense_train)
dense_val_scaled = min_max_scaler.transform(dense_val)

y_train = tr_train['goal']
y_val = val_train['goal']

dense_train_scaled.shape, dense_val_scaled.shape

((405234, 54), (58663, 54))

In [12]:
ln = len(y_train)

model = CatBoostClassifier(iterations=1000, 
                           class_weights=[np.sum(y_train == 1)/ln, np.sum(y_train == 0)/ln], 
                           eval_metric="F1")
model.fit(
    X=dense_train_scaled, y=y_train,
    eval_set=(dense_val_scaled, y_val),
    plot=True
)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.146575
0:	learn: 0.7707766	test: 0.8080744	best: 0.8080744 (0)	total: 120ms	remaining: 2m
1:	learn: 0.7698759	test: 0.8273868	best: 0.8273868 (1)	total: 217ms	remaining: 1m 48s
2:	learn: 0.7820310	test: 0.8311164	best: 0.8311164 (2)	total: 323ms	remaining: 1m 47s
3:	learn: 0.7990130	test: 0.8326889	best: 0.8326889 (3)	total: 426ms	remaining: 1m 46s
4:	learn: 0.7985074	test: 0.8303417	best: 0.8326889 (3)	total: 526ms	remaining: 1m 44s
5:	learn: 0.8036617	test: 0.8342548	best: 0.8342548 (5)	total: 629ms	remaining: 1m 44s
6:	learn: 0.8108756	test: 0.8331964	best: 0.8342548 (5)	total: 719ms	remaining: 1m 41s
7:	learn: 0.8146421	test: 0.8383584	best: 0.8383584 (7)	total: 827ms	remaining: 1m 42s
8:	learn: 0.8185405	test: 0.8373225	best: 0.8383584 (7)	total: 934ms	remaining: 1m 42s
9:	learn: 0.8206427	test: 0.8400089	best: 0.8400089 (9)	total: 1.02s	remaining: 1m 40s
10:	learn: 0.8218598	test: 0.8420424	best: 0.8420424 (10)	total: 1.12s	remaining: 1m 40s
11:	learn: 0.82

93:	learn: 0.8727304	test: 0.8720603	best: 0.8722242 (89)	total: 9.77s	remaining: 1m 34s
94:	learn: 0.8728258	test: 0.8721423	best: 0.8722242 (89)	total: 9.85s	remaining: 1m 33s
95:	learn: 0.8730294	test: 0.8723085	best: 0.8723085 (95)	total: 9.94s	remaining: 1m 33s
96:	learn: 0.8733018	test: 0.8732814	best: 0.8732814 (96)	total: 10s	remaining: 1m 33s
97:	learn: 0.8733484	test: 0.8726718	best: 0.8732814 (96)	total: 10.2s	remaining: 1m 33s
98:	learn: 0.8736075	test: 0.8728785	best: 0.8732814 (96)	total: 10.3s	remaining: 1m 33s
99:	learn: 0.8739386	test: 0.8732084	best: 0.8732814 (96)	total: 10.4s	remaining: 1m 33s
100:	learn: 0.8742152	test: 0.8729200	best: 0.8732814 (96)	total: 10.5s	remaining: 1m 33s
101:	learn: 0.8744973	test: 0.8726605	best: 0.8732814 (96)	total: 10.5s	remaining: 1m 32s
102:	learn: 0.8749796	test: 0.8727388	best: 0.8732814 (96)	total: 10.6s	remaining: 1m 32s
103:	learn: 0.8749565	test: 0.8731286	best: 0.8732814 (96)	total: 10.7s	remaining: 1m 32s
104:	learn: 0.87507

186:	learn: 0.8891677	test: 0.8706179	best: 0.8732814 (96)	total: 18.4s	remaining: 1m 20s
187:	learn: 0.8893745	test: 0.8711240	best: 0.8732814 (96)	total: 18.5s	remaining: 1m 19s
188:	learn: 0.8894029	test: 0.8707892	best: 0.8732814 (96)	total: 18.6s	remaining: 1m 19s
189:	learn: 0.8897427	test: 0.8700052	best: 0.8732814 (96)	total: 18.7s	remaining: 1m 19s
190:	learn: 0.8898727	test: 0.8701301	best: 0.8732814 (96)	total: 18.8s	remaining: 1m 19s
191:	learn: 0.8900248	test: 0.8698547	best: 0.8732814 (96)	total: 18.9s	remaining: 1m 19s
192:	learn: 0.8901208	test: 0.8697719	best: 0.8732814 (96)	total: 19s	remaining: 1m 19s
193:	learn: 0.8902470	test: 0.8698141	best: 0.8732814 (96)	total: 19.1s	remaining: 1m 19s
194:	learn: 0.8904084	test: 0.8700725	best: 0.8732814 (96)	total: 19.2s	remaining: 1m 19s
195:	learn: 0.8906227	test: 0.8702668	best: 0.8732814 (96)	total: 19.3s	remaining: 1m 19s
196:	learn: 0.8907851	test: 0.8702332	best: 0.8732814 (96)	total: 19.3s	remaining: 1m 18s
197:	learn: 

279:	learn: 0.8991872	test: 0.8694402	best: 0.8732814 (96)	total: 27s	remaining: 1m 9s
280:	learn: 0.8993383	test: 0.8697431	best: 0.8732814 (96)	total: 27.1s	remaining: 1m 9s
281:	learn: 0.8994053	test: 0.8699813	best: 0.8732814 (96)	total: 27.2s	remaining: 1m 9s
282:	learn: 0.8995610	test: 0.8699727	best: 0.8732814 (96)	total: 27.3s	remaining: 1m 9s
283:	learn: 0.8997209	test: 0.8696785	best: 0.8732814 (96)	total: 27.4s	remaining: 1m 9s
284:	learn: 0.8998217	test: 0.8700850	best: 0.8732814 (96)	total: 27.5s	remaining: 1m 8s
285:	learn: 0.8999068	test: 0.8699506	best: 0.8732814 (96)	total: 27.6s	remaining: 1m 8s
286:	learn: 0.8999653	test: 0.8693264	best: 0.8732814 (96)	total: 27.7s	remaining: 1m 8s
287:	learn: 0.9000523	test: 0.8691903	best: 0.8732814 (96)	total: 27.8s	remaining: 1m 8s
288:	learn: 0.9002274	test: 0.8691562	best: 0.8732814 (96)	total: 27.9s	remaining: 1m 8s
289:	learn: 0.9003066	test: 0.8692941	best: 0.8732814 (96)	total: 28s	remaining: 1m 8s
290:	learn: 0.9003709	tes

373:	learn: 0.9068824	test: 0.8672057	best: 0.8732814 (96)	total: 36s	remaining: 1m
374:	learn: 0.9068648	test: 0.8672877	best: 0.8732814 (96)	total: 36.1s	remaining: 1m
375:	learn: 0.9069457	test: 0.8673615	best: 0.8732814 (96)	total: 36.3s	remaining: 1m
376:	learn: 0.9070099	test: 0.8673082	best: 0.8732814 (96)	total: 36.4s	remaining: 1m
377:	learn: 0.9071065	test: 0.8673267	best: 0.8732814 (96)	total: 36.5s	remaining: 1m
378:	learn: 0.9072489	test: 0.8671258	best: 0.8732814 (96)	total: 36.6s	remaining: 59.9s
379:	learn: 0.9072646	test: 0.8673574	best: 0.8732814 (96)	total: 36.6s	remaining: 59.8s
380:	learn: 0.9073724	test: 0.8673267	best: 0.8732814 (96)	total: 36.8s	remaining: 59.8s
381:	learn: 0.9074789	test: 0.8675479	best: 0.8732814 (96)	total: 37s	remaining: 59.8s
382:	learn: 0.9075113	test: 0.8674107	best: 0.8732814 (96)	total: 37.1s	remaining: 59.8s
383:	learn: 0.9075893	test: 0.8671770	best: 0.8732814 (96)	total: 37.3s	remaining: 59.9s
384:	learn: 0.9076628	test: 0.8672693	be

467:	learn: 0.9129692	test: 0.8671211	best: 0.8732814 (96)	total: 45.2s	remaining: 51.4s
468:	learn: 0.9130705	test: 0.8673333	best: 0.8732814 (96)	total: 45.3s	remaining: 51.2s
469:	learn: 0.9131851	test: 0.8671211	best: 0.8732814 (96)	total: 45.3s	remaining: 51.1s
470:	learn: 0.9132133	test: 0.8669295	best: 0.8732814 (96)	total: 45.4s	remaining: 51s
471:	learn: 0.9132942	test: 0.8668450	best: 0.8732814 (96)	total: 45.5s	remaining: 50.9s
472:	learn: 0.9133436	test: 0.8668017	best: 0.8732814 (96)	total: 45.6s	remaining: 50.8s
473:	learn: 0.9133846	test: 0.8671190	best: 0.8732814 (96)	total: 45.7s	remaining: 50.7s
474:	learn: 0.9133905	test: 0.8673086	best: 0.8732814 (96)	total: 45.8s	remaining: 50.6s
475:	learn: 0.9134458	test: 0.8673065	best: 0.8732814 (96)	total: 45.9s	remaining: 50.6s
476:	learn: 0.9134381	test: 0.8674157	best: 0.8732814 (96)	total: 46s	remaining: 50.5s
477:	learn: 0.9134587	test: 0.8671087	best: 0.8732814 (96)	total: 46.1s	remaining: 50.3s
478:	learn: 0.9135216	tes

561:	learn: 0.9178631	test: 0.8646764	best: 0.8732814 (96)	total: 53.8s	remaining: 41.9s
562:	learn: 0.9178817	test: 0.8644006	best: 0.8732814 (96)	total: 53.9s	remaining: 41.8s
563:	learn: 0.9179131	test: 0.8648720	best: 0.8732814 (96)	total: 53.9s	remaining: 41.7s
564:	learn: 0.9180053	test: 0.8648386	best: 0.8732814 (96)	total: 54s	remaining: 41.6s
565:	learn: 0.9180142	test: 0.8649856	best: 0.8732814 (96)	total: 54.1s	remaining: 41.5s
566:	learn: 0.9181442	test: 0.8650142	best: 0.8732814 (96)	total: 54.2s	remaining: 41.4s
567:	learn: 0.9182383	test: 0.8644266	best: 0.8732814 (96)	total: 54.3s	remaining: 41.3s
568:	learn: 0.9182882	test: 0.8641918	best: 0.8732814 (96)	total: 54.4s	remaining: 41.2s
569:	learn: 0.9183144	test: 0.8645117	best: 0.8732814 (96)	total: 54.5s	remaining: 41.1s
570:	learn: 0.9183727	test: 0.8649037	best: 0.8732814 (96)	total: 54.6s	remaining: 41s
571:	learn: 0.9184150	test: 0.8644732	best: 0.8732814 (96)	total: 54.7s	remaining: 40.9s
572:	learn: 0.9185071	tes

655:	learn: 0.9221093	test: 0.8625595	best: 0.8732814 (96)	total: 1m 2s	remaining: 32.8s
656:	learn: 0.9221471	test: 0.8626762	best: 0.8732814 (96)	total: 1m 2s	remaining: 32.7s
657:	learn: 0.9221995	test: 0.8627488	best: 0.8732814 (96)	total: 1m 2s	remaining: 32.6s
658:	learn: 0.9222246	test: 0.8622346	best: 0.8732814 (96)	total: 1m 2s	remaining: 32.5s
659:	learn: 0.9222411	test: 0.8624268	best: 0.8732814 (96)	total: 1m 2s	remaining: 32.4s
660:	learn: 0.9222783	test: 0.8622761	best: 0.8732814 (96)	total: 1m 3s	remaining: 32.3s
661:	learn: 0.9223289	test: 0.8623665	best: 0.8732814 (96)	total: 1m 3s	remaining: 32.2s
662:	learn: 0.9224235	test: 0.8625172	best: 0.8732814 (96)	total: 1m 3s	remaining: 32.1s
663:	learn: 0.9226214	test: 0.8625822	best: 0.8732814 (96)	total: 1m 3s	remaining: 32s
664:	learn: 0.9226056	test: 0.8627094	best: 0.8732814 (96)	total: 1m 3s	remaining: 32s
665:	learn: 0.9227031	test: 0.8626679	best: 0.8732814 (96)	total: 1m 3s	remaining: 31.9s
666:	learn: 0.9227306	tes

749:	learn: 0.9259176	test: 0.8612980	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.8s
750:	learn: 0.9259504	test: 0.8611433	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.7s
751:	learn: 0.9260345	test: 0.8610467	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.6s
752:	learn: 0.9260278	test: 0.8612638	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.5s
753:	learn: 0.9260330	test: 0.8613054	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.4s
754:	learn: 0.9260485	test: 0.8611849	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.3s
755:	learn: 0.9260828	test: 0.8612608	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.2s
756:	learn: 0.9260950	test: 0.8612712	best: 0.8732814 (96)	total: 1m 11s	remaining: 23.1s
757:	learn: 0.9261048	test: 0.8614927	best: 0.8732814 (96)	total: 1m 11s	remaining: 23s
758:	learn: 0.9260891	test: 0.8613440	best: 0.8732814 (96)	total: 1m 12s	remaining: 22.9s
759:	learn: 0.9260364	test: 0.8613618	best: 0.8732814 (96)	total: 1m 12s	remaining: 22.8s
760:	learn: 

841:	learn: 0.9290717	test: 0.8585950	best: 0.8732814 (96)	total: 1m 20s	remaining: 15s
842:	learn: 0.9290942	test: 0.8584177	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.9s
843:	learn: 0.9291292	test: 0.8580769	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.8s
844:	learn: 0.9291500	test: 0.8581534	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.7s
845:	learn: 0.9291858	test: 0.8581360	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.6s
846:	learn: 0.9291789	test: 0.8581986	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.5s
847:	learn: 0.9292429	test: 0.8581152	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.4s
848:	learn: 0.9292204	test: 0.8579308	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.4s
849:	learn: 0.9292446	test: 0.8578751	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.3s
850:	learn: 0.9292719	test: 0.8578090	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.2s
851:	learn: 0.9293039	test: 0.8580143	best: 0.8732814 (96)	total: 1m 20s	remaining: 14.1s
852:	learn: 

933:	learn: 0.9317654	test: 0.8568126	best: 0.8732814 (96)	total: 1m 28s	remaining: 6.26s
934:	learn: 0.9318039	test: 0.8564736	best: 0.8732814 (96)	total: 1m 28s	remaining: 6.17s
935:	learn: 0.9318030	test: 0.8562123	best: 0.8732814 (96)	total: 1m 28s	remaining: 6.07s
936:	learn: 0.9318130	test: 0.8559815	best: 0.8732814 (96)	total: 1m 28s	remaining: 5.98s
937:	learn: 0.9317923	test: 0.8559710	best: 0.8732814 (96)	total: 1m 29s	remaining: 5.88s
938:	learn: 0.9318104	test: 0.8562019	best: 0.8732814 (96)	total: 1m 29s	remaining: 5.79s
939:	learn: 0.9318774	test: 0.8563766	best: 0.8732814 (96)	total: 1m 29s	remaining: 5.69s
940:	learn: 0.9319574	test: 0.8561667	best: 0.8732814 (96)	total: 1m 29s	remaining: 5.6s
941:	learn: 0.9319670	test: 0.8558693	best: 0.8732814 (96)	total: 1m 29s	remaining: 5.5s
942:	learn: 0.9320085	test: 0.8556840	best: 0.8732814 (96)	total: 1m 29s	remaining: 5.41s
943:	learn: 0.9320038	test: 0.8555652	best: 0.8732814 (96)	total: 1m 29s	remaining: 5.32s
944:	learn: 

<catboost.core.CatBoostClassifier at 0x7fc320d089e8>

In [13]:
predictions = model.predict(dense_val_scaled)
model.save_model('catboost_models/second_features.pth')

accuracy_score(y_val, predictions), f1_score(y_val, predictions)

(0.8737534732284404, 0.6492706952074256)

In [14]:
third_features = second_features
for elem in tr_train.columns:
    if elem[:2] == 'bk':
        third_features += [elem]

dense_train = tr_train[tr_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(third_features)
dense_val = val_train[val_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(third_features)

min_max_scaler = preprocessing.MinMaxScaler()
min_max_scaler = min_max_scaler.fit(dense_train)

dense_train_scaled = min_max_scaler.transform(dense_train)
dense_val_scaled = min_max_scaler.transform(dense_val)

y_train = tr_train['goal']
y_val = val_train['goal']

dense_train_scaled.shape, dense_val_scaled.shape

((405234, 61), (58663, 61))

In [15]:
ln = len(y_train)

model = CatBoostClassifier(iterations=1000, 
                           class_weights=[np.sum(y_train == 1)/ln, np.sum(y_train == 0)/ln], 
                           eval_metric="F1")
model.fit(
    X=dense_train_scaled, y=y_train,
    eval_set=(dense_val_scaled, y_val),
    plot=True
)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.146575
0:	learn: 0.7752477	test: 0.8118148	best: 0.8118148 (0)	total: 297ms	remaining: 4m 56s
1:	learn: 0.7830653	test: 0.8267629	best: 0.8267629 (1)	total: 397ms	remaining: 3m 18s
2:	learn: 0.7884314	test: 0.8294661	best: 0.8294661 (2)	total: 500ms	remaining: 2m 46s
3:	learn: 0.7914587	test: 0.8320040	best: 0.8320040 (3)	total: 646ms	remaining: 2m 40s
4:	learn: 0.8057784	test: 0.8360119	best: 0.8360119 (4)	total: 817ms	remaining: 2m 42s
5:	learn: 0.8130187	test: 0.8365845	best: 0.8365845 (5)	total: 1.01s	remaining: 2m 48s
6:	learn: 0.8168745	test: 0.8432890	best: 0.8432890 (6)	total: 1.15s	remaining: 2m 42s
7:	learn: 0.8206200	test: 0.8417635	best: 0.8432890 (6)	total: 1.24s	remaining: 2m 34s
8:	learn: 0.8228964	test: 0.8413816	best: 0.8432890 (6)	total: 1.35s	remaining: 2m 28s
9:	learn: 0.8262057	test: 0.8466956	best: 0.8466956 (9)	total: 1.5s	remaining: 2m 29s
10:	learn: 0.8283879	test: 0.8476568	best: 0.8476568 (10)	total: 1.63s	remaining: 2m 26s
11:	learn: 0

94:	learn: 0.8823661	test: 0.8811816	best: 0.8811816 (94)	total: 10.2s	remaining: 1m 37s
95:	learn: 0.8824649	test: 0.8812019	best: 0.8812019 (95)	total: 10.3s	remaining: 1m 37s
96:	learn: 0.8825748	test: 0.8807264	best: 0.8812019 (95)	total: 10.4s	remaining: 1m 36s
97:	learn: 0.8828985	test: 0.8811412	best: 0.8812019 (95)	total: 10.5s	remaining: 1m 36s
98:	learn: 0.8832896	test: 0.8816164	best: 0.8816164 (98)	total: 10.6s	remaining: 1m 36s
99:	learn: 0.8835373	test: 0.8815354	best: 0.8816164 (98)	total: 10.7s	remaining: 1m 36s
100:	learn: 0.8837892	test: 0.8820004	best: 0.8820004 (100)	total: 10.8s	remaining: 1m 36s
101:	learn: 0.8838031	test: 0.8817782	best: 0.8820004 (100)	total: 10.9s	remaining: 1m 35s
102:	learn: 0.8838223	test: 0.8818390	best: 0.8820004 (100)	total: 11s	remaining: 1m 35s
103:	learn: 0.8839715	test: 0.8816570	best: 0.8820004 (100)	total: 11.1s	remaining: 1m 35s
104:	learn: 0.8840831	test: 0.8818595	best: 0.8820004 (100)	total: 11.2s	remaining: 1m 35s
105:	learn: 0

186:	learn: 0.8975773	test: 0.8836726	best: 0.8841388 (175)	total: 19.1s	remaining: 1m 22s
187:	learn: 0.8975703	test: 0.8837340	best: 0.8841388 (175)	total: 19.2s	remaining: 1m 22s
188:	learn: 0.8977347	test: 0.8832454	best: 0.8841388 (175)	total: 19.3s	remaining: 1m 22s
189:	learn: 0.8978653	test: 0.8832041	best: 0.8841388 (175)	total: 19.4s	remaining: 1m 22s
190:	learn: 0.8979116	test: 0.8832962	best: 0.8841388 (175)	total: 19.5s	remaining: 1m 22s
191:	learn: 0.8980285	test: 0.8832351	best: 0.8841388 (175)	total: 19.6s	remaining: 1m 22s
192:	learn: 0.8980775	test: 0.8832969	best: 0.8841388 (175)	total: 19.7s	remaining: 1m 22s
193:	learn: 0.8984957	test: 0.8832873	best: 0.8841388 (175)	total: 19.8s	remaining: 1m 22s
194:	learn: 0.8986045	test: 0.8830611	best: 0.8841388 (175)	total: 19.9s	remaining: 1m 22s
195:	learn: 0.8987618	test: 0.8828366	best: 0.8841388 (175)	total: 20s	remaining: 1m 21s
196:	learn: 0.8988838	test: 0.8826220	best: 0.8841388 (175)	total: 20.1s	remaining: 1m 21s
1

277:	learn: 0.9064224	test: 0.8839995	best: 0.8844889 (267)	total: 28.3s	remaining: 1m 13s
278:	learn: 0.9065886	test: 0.8839479	best: 0.8844889 (267)	total: 28.4s	remaining: 1m 13s
279:	learn: 0.9067367	test: 0.8837838	best: 0.8844889 (267)	total: 28.5s	remaining: 1m 13s
280:	learn: 0.9068244	test: 0.8837838	best: 0.8844889 (267)	total: 28.6s	remaining: 1m 13s
281:	learn: 0.9068503	test: 0.8835271	best: 0.8844889 (267)	total: 28.7s	remaining: 1m 12s
282:	learn: 0.9069830	test: 0.8834447	best: 0.8844889 (267)	total: 28.8s	remaining: 1m 12s
283:	learn: 0.9069733	test: 0.8834244	best: 0.8844889 (267)	total: 28.8s	remaining: 1m 12s
284:	learn: 0.9069784	test: 0.8839178	best: 0.8844889 (267)	total: 29s	remaining: 1m 12s
285:	learn: 0.9070587	test: 0.8840614	best: 0.8844889 (267)	total: 29.1s	remaining: 1m 12s
286:	learn: 0.9073958	test: 0.8838976	best: 0.8844889 (267)	total: 29.1s	remaining: 1m 12s
287:	learn: 0.9074676	test: 0.8839182	best: 0.8844889 (267)	total: 29.2s	remaining: 1m 12s
2

369:	learn: 0.9133232	test: 0.8855526	best: 0.8864935 (342)	total: 37.4s	remaining: 1m 3s
370:	learn: 0.9133542	test: 0.8856268	best: 0.8864935 (342)	total: 37.6s	remaining: 1m 3s
371:	learn: 0.9133475	test: 0.8856469	best: 0.8864935 (342)	total: 37.7s	remaining: 1m 3s
372:	learn: 0.9134453	test: 0.8854114	best: 0.8864935 (342)	total: 37.8s	remaining: 1m 3s
373:	learn: 0.9135297	test: 0.8852356	best: 0.8864935 (342)	total: 37.8s	remaining: 1m 3s
374:	learn: 0.9135971	test: 0.8852356	best: 0.8864935 (342)	total: 37.9s	remaining: 1m 3s
375:	learn: 0.9136413	test: 0.8855350	best: 0.8864935 (342)	total: 38s	remaining: 1m 3s
376:	learn: 0.9137074	test: 0.8855045	best: 0.8864935 (342)	total: 38.2s	remaining: 1m 3s
377:	learn: 0.9138357	test: 0.8857121	best: 0.8864935 (342)	total: 38.3s	remaining: 1m 2s
378:	learn: 0.9138873	test: 0.8856704	best: 0.8864935 (342)	total: 38.3s	remaining: 1m 2s
379:	learn: 0.9139381	test: 0.8854335	best: 0.8864935 (342)	total: 38.5s	remaining: 1m 2s
380:	learn: 

462:	learn: 0.9188392	test: 0.8852767	best: 0.8864935 (342)	total: 46.9s	remaining: 54.4s
463:	learn: 0.9188515	test: 0.8852047	best: 0.8864935 (342)	total: 47s	remaining: 54.3s
464:	learn: 0.9188656	test: 0.8850797	best: 0.8864935 (342)	total: 47.1s	remaining: 54.2s
465:	learn: 0.9188797	test: 0.8851111	best: 0.8864935 (342)	total: 47.2s	remaining: 54.1s
466:	learn: 0.9190004	test: 0.8850797	best: 0.8864935 (342)	total: 47.3s	remaining: 54s
467:	learn: 0.9190281	test: 0.8851425	best: 0.8864935 (342)	total: 47.4s	remaining: 53.9s
468:	learn: 0.9191308	test: 0.8852035	best: 0.8864935 (342)	total: 47.5s	remaining: 53.8s
469:	learn: 0.9191744	test: 0.8848513	best: 0.8864935 (342)	total: 47.6s	remaining: 53.6s
470:	learn: 0.9191635	test: 0.8847896	best: 0.8864935 (342)	total: 47.7s	remaining: 53.5s
471:	learn: 0.9192494	test: 0.8849036	best: 0.8864935 (342)	total: 47.8s	remaining: 53.4s
472:	learn: 0.9192810	test: 0.8845191	best: 0.8864935 (342)	total: 47.9s	remaining: 53.4s
473:	learn: 0.

556:	learn: 0.9235892	test: 0.8823071	best: 0.8864935 (342)	total: 56.1s	remaining: 44.6s
557:	learn: 0.9236176	test: 0.8823384	best: 0.8864935 (342)	total: 56.2s	remaining: 44.5s
558:	learn: 0.9236625	test: 0.8825271	best: 0.8864935 (342)	total: 56.3s	remaining: 44.4s
559:	learn: 0.9236845	test: 0.8825483	best: 0.8864935 (342)	total: 56.4s	remaining: 44.3s
560:	learn: 0.9236883	test: 0.8825483	best: 0.8864935 (342)	total: 56.5s	remaining: 44.2s
561:	learn: 0.9236963	test: 0.8824540	best: 0.8864935 (342)	total: 56.6s	remaining: 44.1s
562:	learn: 0.9237474	test: 0.8823596	best: 0.8864935 (342)	total: 56.7s	remaining: 44s
563:	learn: 0.9238321	test: 0.8821919	best: 0.8864935 (342)	total: 56.8s	remaining: 43.9s
564:	learn: 0.9238130	test: 0.8821710	best: 0.8864935 (342)	total: 56.9s	remaining: 43.8s
565:	learn: 0.9239097	test: 0.8821608	best: 0.8864935 (342)	total: 57s	remaining: 43.7s
566:	learn: 0.9239265	test: 0.8822448	best: 0.8864935 (342)	total: 57.1s	remaining: 43.6s
567:	learn: 0.

648:	learn: 0.9274554	test: 0.8816379	best: 0.8864935 (342)	total: 1m 5s	remaining: 35.3s
649:	learn: 0.9275201	test: 0.8813536	best: 0.8864935 (342)	total: 1m 5s	remaining: 35.2s
650:	learn: 0.9275779	test: 0.8814905	best: 0.8864935 (342)	total: 1m 5s	remaining: 35.1s
651:	learn: 0.9276050	test: 0.8813114	best: 0.8864935 (342)	total: 1m 5s	remaining: 35s
652:	learn: 0.9276583	test: 0.8815853	best: 0.8864935 (342)	total: 1m 5s	remaining: 34.9s
653:	learn: 0.9276892	test: 0.8816485	best: 0.8864935 (342)	total: 1m 5s	remaining: 34.8s
654:	learn: 0.9277017	test: 0.8819117	best: 0.8864935 (342)	total: 1m 5s	remaining: 34.6s
655:	learn: 0.9277090	test: 0.8820172	best: 0.8864935 (342)	total: 1m 5s	remaining: 34.6s
656:	learn: 0.9277794	test: 0.8819646	best: 0.8864935 (342)	total: 1m 5s	remaining: 34.4s
657:	learn: 0.9278052	test: 0.8817750	best: 0.8864935 (342)	total: 1m 6s	remaining: 34.3s
658:	learn: 0.9278469	test: 0.8817014	best: 0.8864935 (342)	total: 1m 6s	remaining: 34.2s
659:	learn: 

740:	learn: 0.9308195	test: 0.8800319	best: 0.8864935 (342)	total: 1m 13s	remaining: 25.8s
741:	learn: 0.9308043	test: 0.8796927	best: 0.8864935 (342)	total: 1m 14s	remaining: 25.7s
742:	learn: 0.9308330	test: 0.8795231	best: 0.8864935 (342)	total: 1m 14s	remaining: 25.6s
743:	learn: 0.9309089	test: 0.8796610	best: 0.8864935 (342)	total: 1m 14s	remaining: 25.5s
744:	learn: 0.9308798	test: 0.8798199	best: 0.8864935 (342)	total: 1m 14s	remaining: 25.4s
745:	learn: 0.9309606	test: 0.8801377	best: 0.8864935 (342)	total: 1m 14s	remaining: 25.3s
746:	learn: 0.9310049	test: 0.8801060	best: 0.8864935 (342)	total: 1m 14s	remaining: 25.2s
747:	learn: 0.9310582	test: 0.8804977	best: 0.8864935 (342)	total: 1m 14s	remaining: 25.1s
748:	learn: 0.9310335	test: 0.8805824	best: 0.8864935 (342)	total: 1m 14s	remaining: 25s
749:	learn: 0.9311407	test: 0.8806035	best: 0.8864935 (342)	total: 1m 14s	remaining: 24.9s
750:	learn: 0.9311676	test: 0.8804659	best: 0.8864935 (342)	total: 1m 14s	remaining: 24.8s
7

833:	learn: 0.9338112	test: 0.8780519	best: 0.8864935 (342)	total: 1m 23s	remaining: 16.5s
834:	learn: 0.9338082	test: 0.8781799	best: 0.8864935 (342)	total: 1m 23s	remaining: 16.4s
835:	learn: 0.9338616	test: 0.8778816	best: 0.8864935 (342)	total: 1m 23s	remaining: 16.3s
836:	learn: 0.9338369	test: 0.8779774	best: 0.8864935 (342)	total: 1m 23s	remaining: 16.2s
837:	learn: 0.9338551	test: 0.8780091	best: 0.8864935 (342)	total: 1m 23s	remaining: 16.1s
838:	learn: 0.9338890	test: 0.8777218	best: 0.8864935 (342)	total: 1m 23s	remaining: 16s
839:	learn: 0.9339524	test: 0.8778710	best: 0.8864935 (342)	total: 1m 23s	remaining: 15.9s
840:	learn: 0.9339593	test: 0.8780202	best: 0.8864935 (342)	total: 1m 23s	remaining: 15.8s
841:	learn: 0.9339554	test: 0.8778705	best: 0.8864935 (342)	total: 1m 23s	remaining: 15.7s
842:	learn: 0.9340348	test: 0.8779769	best: 0.8864935 (342)	total: 1m 23s	remaining: 15.6s
843:	learn: 0.9340150	test: 0.8779235	best: 0.8864935 (342)	total: 1m 23s	remaining: 15.5s
8

925:	learn: 0.9363964	test: 0.8768156	best: 0.8864935 (342)	total: 1m 32s	remaining: 7.36s
926:	learn: 0.9364363	test: 0.8767513	best: 0.8864935 (342)	total: 1m 32s	remaining: 7.26s
927:	learn: 0.9364603	test: 0.8767088	best: 0.8864935 (342)	total: 1m 32s	remaining: 7.16s
928:	learn: 0.9364189	test: 0.8769653	best: 0.8864935 (342)	total: 1m 32s	remaining: 7.06s
929:	learn: 0.9364254	test: 0.8766339	best: 0.8864935 (342)	total: 1m 32s	remaining: 6.96s
930:	learn: 0.9364409	test: 0.8766445	best: 0.8864935 (342)	total: 1m 32s	remaining: 6.86s
931:	learn: 0.9364320	test: 0.8762910	best: 0.8864935 (342)	total: 1m 32s	remaining: 6.75s
932:	learn: 0.9364829	test: 0.8761191	best: 0.8864935 (342)	total: 1m 32s	remaining: 6.65s
933:	learn: 0.9364799	test: 0.8760334	best: 0.8864935 (342)	total: 1m 32s	remaining: 6.56s
934:	learn: 0.9364930	test: 0.8761516	best: 0.8864935 (342)	total: 1m 32s	remaining: 6.46s
935:	learn: 0.9365409	test: 0.8759690	best: 0.8864935 (342)	total: 1m 32s	remaining: 6.36s

<catboost.core.CatBoostClassifier at 0x7fc3206bdb38>

In [16]:
predictions = model.predict(dense_val_scaled)
model.save_model('catboost_models/third_features.pth')

accuracy_score(y_val, predictions), f1_score(y_val, predictions)

(0.8954366466085949, 0.6909823677581864)

In [17]:
fourth_features = third_features
for elem in tr_train.columns:
    if elem[:3] == 'NKT':
        fourth_features += [elem]

dense_train = tr_train[tr_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(fourth_features)
dense_val = val_train[val_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(fourth_features)

min_max_scaler = preprocessing.MinMaxScaler()
min_max_scaler = min_max_scaler.fit(dense_train)

dense_train_scaled = min_max_scaler.transform(dense_train)
dense_val_scaled = min_max_scaler.transform(dense_val)

y_train = tr_train['goal']
y_val = val_train['goal']

dense_train_scaled.shape, dense_val_scaled.shape

((405234, 82), (58663, 82))

In [18]:
ln = len(y_train)

model = CatBoostClassifier(iterations=1000, 
                           class_weights=[np.sum(y_train == 1)/ln, np.sum(y_train == 0)/ln], 
                           eval_metric="F1")
model.fit(
    X=dense_train_scaled, y=y_train,
    eval_set=(dense_val_scaled, y_val),
    plot=True
)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.146575
0:	learn: 0.7605939	test: 0.7906701	best: 0.7906701 (0)	total: 132ms	remaining: 2m 11s
1:	learn: 0.7708943	test: 0.8036729	best: 0.8036729 (1)	total: 237ms	remaining: 1m 58s
2:	learn: 0.7838432	test: 0.8197817	best: 0.8197817 (2)	total: 335ms	remaining: 1m 51s
3:	learn: 0.7926943	test: 0.8302479	best: 0.8302479 (3)	total: 472ms	remaining: 1m 57s
4:	learn: 0.7996191	test: 0.8388362	best: 0.8388362 (4)	total: 724ms	remaining: 2m 24s
5:	learn: 0.8087703	test: 0.8421235	best: 0.8421235 (5)	total: 943ms	remaining: 2m 36s
6:	learn: 0.8158541	test: 0.8411739	best: 0.8421235 (5)	total: 1.09s	remaining: 2m 34s
7:	learn: 0.8222791	test: 0.8426455	best: 0.8426455 (7)	total: 1.19s	remaining: 2m 27s
8:	learn: 0.8256796	test: 0.8437550	best: 0.8437550 (8)	total: 1.31s	remaining: 2m 24s
9:	learn: 0.8300582	test: 0.8457560	best: 0.8457560 (9)	total: 1.45s	remaining: 2m 23s
10:	learn: 0.8307108	test: 0.8479053	best: 0.8479053 (10)	total: 1.55s	remaining: 2m 19s
11:	learn: 

95:	learn: 0.8927886	test: 0.8780839	best: 0.8780839 (95)	total: 10.9s	remaining: 1m 42s
96:	learn: 0.8929389	test: 0.8779296	best: 0.8780839 (95)	total: 11s	remaining: 1m 42s
97:	learn: 0.8932478	test: 0.8780941	best: 0.8780941 (97)	total: 11.1s	remaining: 1m 42s
98:	learn: 0.8935229	test: 0.8775472	best: 0.8780941 (97)	total: 11.2s	remaining: 1m 41s
99:	learn: 0.8935684	test: 0.8779584	best: 0.8780941 (97)	total: 11.3s	remaining: 1m 41s
100:	learn: 0.8940783	test: 0.8779045	best: 0.8780941 (97)	total: 11.5s	remaining: 1m 42s
101:	learn: 0.8945989	test: 0.8772520	best: 0.8780941 (97)	total: 11.6s	remaining: 1m 41s
102:	learn: 0.8947789	test: 0.8776932	best: 0.8780941 (97)	total: 11.6s	remaining: 1m 41s
103:	learn: 0.8950278	test: 0.8777055	best: 0.8780941 (97)	total: 11.7s	remaining: 1m 40s
104:	learn: 0.8952130	test: 0.8777367	best: 0.8780941 (97)	total: 11.8s	remaining: 1m 40s
105:	learn: 0.8953294	test: 0.8779226	best: 0.8780941 (97)	total: 11.9s	remaining: 1m 40s
106:	learn: 0.895

186:	learn: 0.9110917	test: 0.8840114	best: 0.8842167 (185)	total: 19.7s	remaining: 1m 25s
187:	learn: 0.9112024	test: 0.8837538	best: 0.8842167 (185)	total: 19.8s	remaining: 1m 25s
188:	learn: 0.9113039	test: 0.8837231	best: 0.8842167 (185)	total: 19.9s	remaining: 1m 25s
189:	learn: 0.9114529	test: 0.8838876	best: 0.8842167 (185)	total: 20s	remaining: 1m 25s
190:	learn: 0.9115194	test: 0.8839389	best: 0.8842167 (185)	total: 20.1s	remaining: 1m 25s
191:	learn: 0.9115957	test: 0.8837549	best: 0.8842167 (185)	total: 20.2s	remaining: 1m 25s
192:	learn: 0.9117813	test: 0.8837035	best: 0.8842167 (185)	total: 20.3s	remaining: 1m 25s
193:	learn: 0.9120843	test: 0.8836107	best: 0.8842167 (185)	total: 20.4s	remaining: 1m 24s
194:	learn: 0.9123012	test: 0.8834360	best: 0.8842167 (185)	total: 20.5s	remaining: 1m 24s
195:	learn: 0.9126186	test: 0.8841591	best: 0.8842167 (185)	total: 20.6s	remaining: 1m 24s
196:	learn: 0.9128635	test: 0.8842944	best: 0.8842944 (196)	total: 20.7s	remaining: 1m 24s
1

278:	learn: 0.9219377	test: 0.8840333	best: 0.8853978 (259)	total: 28.9s	remaining: 1m 14s
279:	learn: 0.9221141	test: 0.8836270	best: 0.8853978 (259)	total: 29s	remaining: 1m 14s
280:	learn: 0.9221936	test: 0.8836371	best: 0.8853978 (259)	total: 29.1s	remaining: 1m 14s
281:	learn: 0.9224234	test: 0.8836161	best: 0.8853978 (259)	total: 29.2s	remaining: 1m 14s
282:	learn: 0.9226575	test: 0.8835122	best: 0.8853978 (259)	total: 29.3s	remaining: 1m 14s
283:	learn: 0.9227368	test: 0.8833248	best: 0.8853978 (259)	total: 29.4s	remaining: 1m 14s
284:	learn: 0.9228050	test: 0.8835642	best: 0.8853978 (259)	total: 29.5s	remaining: 1m 14s
285:	learn: 0.9228668	test: 0.8837829	best: 0.8853978 (259)	total: 29.6s	remaining: 1m 13s
286:	learn: 0.9229377	test: 0.8837208	best: 0.8853978 (259)	total: 29.7s	remaining: 1m 13s
287:	learn: 0.9231648	test: 0.8836793	best: 0.8853978 (259)	total: 29.8s	remaining: 1m 13s
288:	learn: 0.9231945	test: 0.8838356	best: 0.8853978 (259)	total: 29.9s	remaining: 1m 13s
2

370:	learn: 0.9302161	test: 0.8832554	best: 0.8853978 (259)	total: 38.2s	remaining: 1m 4s
371:	learn: 0.9302636	test: 0.8833088	best: 0.8853978 (259)	total: 38.3s	remaining: 1m 4s
372:	learn: 0.9302891	test: 0.8831932	best: 0.8853978 (259)	total: 38.4s	remaining: 1m 4s
373:	learn: 0.9303701	test: 0.8830775	best: 0.8853978 (259)	total: 38.5s	remaining: 1m 4s
374:	learn: 0.9304568	test: 0.8832983	best: 0.8853978 (259)	total: 38.6s	remaining: 1m 4s
375:	learn: 0.9304674	test: 0.8837287	best: 0.8853978 (259)	total: 38.7s	remaining: 1m 4s
376:	learn: 0.9306351	test: 0.8834142	best: 0.8853978 (259)	total: 38.8s	remaining: 1m 4s
377:	learn: 0.9306998	test: 0.8835618	best: 0.8853978 (259)	total: 38.9s	remaining: 1m 3s
378:	learn: 0.9307896	test: 0.8834775	best: 0.8853978 (259)	total: 39s	remaining: 1m 3s
379:	learn: 0.9308913	test: 0.8833203	best: 0.8853978 (259)	total: 39.1s	remaining: 1m 3s
380:	learn: 0.9309076	test: 0.8833622	best: 0.8853978 (259)	total: 39.2s	remaining: 1m 3s
381:	learn: 

464:	learn: 0.9362948	test: 0.8831366	best: 0.8853978 (259)	total: 47.5s	remaining: 54.7s
465:	learn: 0.9363545	test: 0.8829369	best: 0.8853978 (259)	total: 47.6s	remaining: 54.6s
466:	learn: 0.9363818	test: 0.8829477	best: 0.8853978 (259)	total: 47.7s	remaining: 54.5s
467:	learn: 0.9365824	test: 0.8827581	best: 0.8853978 (259)	total: 47.8s	remaining: 54.4s
468:	learn: 0.9366482	test: 0.8828213	best: 0.8853978 (259)	total: 48s	remaining: 54.3s
469:	learn: 0.9366904	test: 0.8825790	best: 0.8853978 (259)	total: 48.1s	remaining: 54.2s
470:	learn: 0.9367336	test: 0.8823470	best: 0.8853978 (259)	total: 48.2s	remaining: 54.1s
471:	learn: 0.9368722	test: 0.8823258	best: 0.8853978 (259)	total: 48.3s	remaining: 54s
472:	learn: 0.9368372	test: 0.8825579	best: 0.8853978 (259)	total: 48.4s	remaining: 53.9s
473:	learn: 0.9369932	test: 0.8822098	best: 0.8853978 (259)	total: 48.5s	remaining: 53.8s
474:	learn: 0.9370394	test: 0.8825790	best: 0.8853978 (259)	total: 48.5s	remaining: 53.6s
475:	learn: 0.

558:	learn: 0.9416569	test: 0.8806023	best: 0.8853978 (259)	total: 56.5s	remaining: 44.6s
559:	learn: 0.9416754	test: 0.8806447	best: 0.8853978 (259)	total: 56.6s	remaining: 44.5s
560:	learn: 0.9416984	test: 0.8808993	best: 0.8853978 (259)	total: 56.7s	remaining: 44.4s
561:	learn: 0.9417780	test: 0.8806660	best: 0.8853978 (259)	total: 56.8s	remaining: 44.3s
562:	learn: 0.9418945	test: 0.8804857	best: 0.8853978 (259)	total: 56.9s	remaining: 44.2s
563:	learn: 0.9420307	test: 0.8806343	best: 0.8853978 (259)	total: 57s	remaining: 44.1s
564:	learn: 0.9420767	test: 0.8805493	best: 0.8853978 (259)	total: 57.1s	remaining: 44s
565:	learn: 0.9420909	test: 0.8802310	best: 0.8853978 (259)	total: 57.2s	remaining: 43.9s
566:	learn: 0.9422797	test: 0.8802203	best: 0.8853978 (259)	total: 57.3s	remaining: 43.8s
567:	learn: 0.9422594	test: 0.8801884	best: 0.8853978 (259)	total: 57.5s	remaining: 43.7s
568:	learn: 0.9423227	test: 0.8805598	best: 0.8853978 (259)	total: 57.6s	remaining: 43.6s
569:	learn: 0.

650:	learn: 0.9459947	test: 0.8793323	best: 0.8853978 (259)	total: 1m 5s	remaining: 35s
651:	learn: 0.9459818	test: 0.8791936	best: 0.8853978 (259)	total: 1m 5s	remaining: 34.9s
652:	learn: 0.9460004	test: 0.8792143	best: 0.8853978 (259)	total: 1m 5s	remaining: 34.8s
653:	learn: 0.9460509	test: 0.8790757	best: 0.8853978 (259)	total: 1m 5s	remaining: 34.7s
654:	learn: 0.9461135	test: 0.8792145	best: 0.8853978 (259)	total: 1m 5s	remaining: 34.6s
655:	learn: 0.9461651	test: 0.8789369	best: 0.8853978 (259)	total: 1m 5s	remaining: 34.5s
656:	learn: 0.9462128	test: 0.8790545	best: 0.8853978 (259)	total: 1m 5s	remaining: 34.4s
657:	learn: 0.9462690	test: 0.8790970	best: 0.8853978 (259)	total: 1m 6s	remaining: 34.3s
658:	learn: 0.9463198	test: 0.8793741	best: 0.8853978 (259)	total: 1m 6s	remaining: 34.2s
659:	learn: 0.9462934	test: 0.8792996	best: 0.8853978 (259)	total: 1m 6s	remaining: 34.1s
660:	learn: 0.9462972	test: 0.8792355	best: 0.8853978 (259)	total: 1m 6s	remaining: 34s
661:	learn: 0.

741:	learn: 0.9496024	test: 0.8771313	best: 0.8853978 (259)	total: 1m 14s	remaining: 25.9s
742:	learn: 0.9496887	test: 0.8770135	best: 0.8853978 (259)	total: 1m 14s	remaining: 25.8s
743:	learn: 0.9496597	test: 0.8771633	best: 0.8853978 (259)	total: 1m 14s	remaining: 25.7s
744:	learn: 0.9497144	test: 0.8771307	best: 0.8853978 (259)	total: 1m 14s	remaining: 25.6s
745:	learn: 0.9497635	test: 0.8771094	best: 0.8853978 (259)	total: 1m 14s	remaining: 25.5s
746:	learn: 0.9497170	test: 0.8769697	best: 0.8853978 (259)	total: 1m 14s	remaining: 25.4s
747:	learn: 0.9497406	test: 0.8770875	best: 0.8853978 (259)	total: 1m 14s	remaining: 25.3s
748:	learn: 0.9497499	test: 0.8773023	best: 0.8853978 (259)	total: 1m 15s	remaining: 25.2s
749:	learn: 0.9498549	test: 0.8771301	best: 0.8853978 (259)	total: 1m 15s	remaining: 25.1s
750:	learn: 0.9499175	test: 0.8772159	best: 0.8853978 (259)	total: 1m 15s	remaining: 25s
751:	learn: 0.9499052	test: 0.8770656	best: 0.8853978 (259)	total: 1m 15s	remaining: 24.9s
7

832:	learn: 0.9521077	test: 0.8773583	best: 0.8853978 (259)	total: 1m 23s	remaining: 16.7s
833:	learn: 0.9521472	test: 0.8774122	best: 0.8853978 (259)	total: 1m 23s	remaining: 16.6s
834:	learn: 0.9522194	test: 0.8774010	best: 0.8853978 (259)	total: 1m 23s	remaining: 16.4s
835:	learn: 0.9522316	test: 0.8774117	best: 0.8853978 (259)	total: 1m 23s	remaining: 16.3s
836:	learn: 0.9522821	test: 0.8774016	best: 0.8853978 (259)	total: 1m 23s	remaining: 16.2s
837:	learn: 0.9522670	test: 0.8773043	best: 0.8853978 (259)	total: 1m 23s	remaining: 16.1s
838:	learn: 0.9523402	test: 0.8772723	best: 0.8853978 (259)	total: 1m 23s	remaining: 16s
839:	learn: 0.9523497	test: 0.8772076	best: 0.8853978 (259)	total: 1m 23s	remaining: 15.9s
840:	learn: 0.9523184	test: 0.8773263	best: 0.8853978 (259)	total: 1m 23s	remaining: 15.8s
841:	learn: 0.9523894	test: 0.8770463	best: 0.8853978 (259)	total: 1m 23s	remaining: 15.7s
842:	learn: 0.9524332	test: 0.8770249	best: 0.8853978 (259)	total: 1m 23s	remaining: 15.6s
8

924:	learn: 0.9545447	test: 0.8750503	best: 0.8853978 (259)	total: 1m 31s	remaining: 7.44s
925:	learn: 0.9545829	test: 0.8749531	best: 0.8853978 (259)	total: 1m 31s	remaining: 7.34s
926:	learn: 0.9545490	test: 0.8747578	best: 0.8853978 (259)	total: 1m 31s	remaining: 7.24s
927:	learn: 0.9545596	test: 0.8748006	best: 0.8853978 (259)	total: 1m 32s	remaining: 7.14s
928:	learn: 0.9546059	test: 0.8746703	best: 0.8853978 (259)	total: 1m 32s	remaining: 7.04s
929:	learn: 0.9545950	test: 0.8747569	best: 0.8853978 (259)	total: 1m 32s	remaining: 6.94s
930:	learn: 0.9547031	test: 0.8748220	best: 0.8853978 (259)	total: 1m 32s	remaining: 6.84s
931:	learn: 0.9547290	test: 0.8747462	best: 0.8853978 (259)	total: 1m 32s	remaining: 6.75s
932:	learn: 0.9547822	test: 0.8746490	best: 0.8853978 (259)	total: 1m 32s	remaining: 6.64s
933:	learn: 0.9548286	test: 0.8747141	best: 0.8853978 (259)	total: 1m 32s	remaining: 6.55s
934:	learn: 0.9548926	test: 0.8745614	best: 0.8853978 (259)	total: 1m 32s	remaining: 6.45s

<catboost.core.CatBoostClassifier at 0x7fc3206bdcc0>

In [19]:
predictions = model.predict(dense_val_scaled)
model.save_model('catboost_models/fourth_features.pth')

accuracy_score(y_val, predictions), f1_score(y_val, predictions)

(0.8994084857576325, 0.6974311644362406)

In [20]:
fifth_features = fourth_features
for elem in tr_train.columns:
    if elem[:3] == 'DGK':
        fifth_features += [elem]

dense_train = tr_train[tr_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(fifth_features)
dense_val = val_train[val_train.columns[1:]].drop('goal', axis=1).drop('lith', axis=1).filter(fifth_features)

min_max_scaler = preprocessing.MinMaxScaler()
min_max_scaler = min_max_scaler.fit(dense_train)

dense_train_scaled = min_max_scaler.transform(dense_train)
dense_val_scaled = min_max_scaler.transform(dense_val)

y_train = tr_train['goal']
y_val = val_train['goal']

dense_train_scaled.shape, dense_val_scaled.shape

((405234, 89), (58663, 89))

In [21]:
ln = len(y_train)

model = CatBoostClassifier(iterations=1000, 
                           class_weights=[np.sum(y_train == 1)/ln, np.sum(y_train == 0)/ln], 
                           eval_metric="F1")
model.fit(
    X=dense_train_scaled, y=y_train,
    eval_set=(dense_val_scaled, y_val),
    plot=True
)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Learning rate set to 0.146575
0:	learn: 0.7831939	test: 0.8118135	best: 0.8118135 (0)	total: 246ms	remaining: 4m 6s
1:	learn: 0.7755721	test: 0.8159772	best: 0.8159772 (1)	total: 384ms	remaining: 3m 11s
2:	learn: 0.7842294	test: 0.8266549	best: 0.8266549 (2)	total: 508ms	remaining: 2m 48s
3:	learn: 0.7937237	test: 0.8307465	best: 0.8307465 (3)	total: 632ms	remaining: 2m 37s
4:	learn: 0.8003112	test: 0.8372639	best: 0.8372639 (4)	total: 744ms	remaining: 2m 28s
5:	learn: 0.8114986	test: 0.8415338	best: 0.8415338 (5)	total: 872ms	remaining: 2m 24s
6:	learn: 0.8156658	test: 0.8378862	best: 0.8415338 (5)	total: 961ms	remaining: 2m 16s
7:	learn: 0.8181773	test: 0.8390994	best: 0.8415338 (5)	total: 1.06s	remaining: 2m 11s
8:	learn: 0.8237746	test: 0.8429956	best: 0.8429956 (8)	total: 1.15s	remaining: 2m 6s
9:	learn: 0.8263837	test: 0.8451642	best: 0.8451642 (9)	total: 1.27s	remaining: 2m 5s
10:	learn: 0.8301568	test: 0.8476672	best: 0.8476672 (10)	total: 1.41s	remaining: 2m 6s
11:	learn: 0.83

93:	learn: 0.8973907	test: 0.8824752	best: 0.8824752 (93)	total: 9.42s	remaining: 1m 30s
94:	learn: 0.8976220	test: 0.8826797	best: 0.8826797 (94)	total: 9.57s	remaining: 1m 31s
95:	learn: 0.8979228	test: 0.8828522	best: 0.8828522 (95)	total: 9.69s	remaining: 1m 31s
96:	learn: 0.8980467	test: 0.8829642	best: 0.8829642 (96)	total: 9.81s	remaining: 1m 31s
97:	learn: 0.8983998	test: 0.8831062	best: 0.8831062 (97)	total: 9.98s	remaining: 1m 31s
98:	learn: 0.8986771	test: 0.8827703	best: 0.8831062 (97)	total: 10.1s	remaining: 1m 32s
99:	learn: 0.8989832	test: 0.8827606	best: 0.8831062 (97)	total: 10.3s	remaining: 1m 32s
100:	learn: 0.8991404	test: 0.8830764	best: 0.8831062 (97)	total: 10.5s	remaining: 1m 33s
101:	learn: 0.8993374	test: 0.8832587	best: 0.8832587 (101)	total: 10.6s	remaining: 1m 33s
102:	learn: 0.8995530	test: 0.8833202	best: 0.8833202 (102)	total: 10.7s	remaining: 1m 32s
103:	learn: 0.8997783	test: 0.8832902	best: 0.8833202 (102)	total: 10.8s	remaining: 1m 32s
104:	learn: 0.

186:	learn: 0.9142139	test: 0.8875634	best: 0.8878356 (184)	total: 18.8s	remaining: 1m 21s
187:	learn: 0.9143652	test: 0.8874610	best: 0.8878356 (184)	total: 18.9s	remaining: 1m 21s
188:	learn: 0.9144635	test: 0.8877568	best: 0.8878356 (184)	total: 18.9s	remaining: 1m 21s
189:	learn: 0.9145177	test: 0.8878789	best: 0.8878789 (189)	total: 19s	remaining: 1m 21s
190:	learn: 0.9147637	test: 0.8878602	best: 0.8878789 (189)	total: 19.2s	remaining: 1m 21s
191:	learn: 0.9148556	test: 0.8877794	best: 0.8878789 (189)	total: 19.3s	remaining: 1m 21s
192:	learn: 0.9149133	test: 0.8877081	best: 0.8878789 (189)	total: 19.3s	remaining: 1m 20s
193:	learn: 0.9151156	test: 0.8873424	best: 0.8878789 (189)	total: 19.4s	remaining: 1m 20s
194:	learn: 0.9152288	test: 0.8873914	best: 0.8878789 (189)	total: 19.5s	remaining: 1m 20s
195:	learn: 0.9152430	test: 0.8874647	best: 0.8878789 (189)	total: 19.7s	remaining: 1m 20s
196:	learn: 0.9153418	test: 0.8871960	best: 0.8878789 (189)	total: 19.8s	remaining: 1m 20s
1

278:	learn: 0.9242554	test: 0.8878992	best: 0.8891500 (274)	total: 27.8s	remaining: 1m 11s
279:	learn: 0.9243890	test: 0.8878785	best: 0.8891500 (274)	total: 27.9s	remaining: 1m 11s
280:	learn: 0.9244310	test: 0.8878775	best: 0.8891500 (274)	total: 28s	remaining: 1m 11s
281:	learn: 0.9246549	test: 0.8880322	best: 0.8891500 (274)	total: 28.1s	remaining: 1m 11s
282:	learn: 0.9247865	test: 0.8883677	best: 0.8891500 (274)	total: 28.1s	remaining: 1m 11s
283:	learn: 0.9249174	test: 0.8883770	best: 0.8891500 (274)	total: 28.2s	remaining: 1m 11s
284:	learn: 0.9249392	test: 0.8884798	best: 0.8891500 (274)	total: 28.4s	remaining: 1m 11s
285:	learn: 0.9251112	test: 0.8886402	best: 0.8891500 (274)	total: 28.5s	remaining: 1m 11s
286:	learn: 0.9251846	test: 0.8884759	best: 0.8891500 (274)	total: 28.6s	remaining: 1m 10s
287:	learn: 0.9252992	test: 0.8884759	best: 0.8891500 (274)	total: 28.6s	remaining: 1m 10s
288:	learn: 0.9253938	test: 0.8888055	best: 0.8891500 (274)	total: 28.7s	remaining: 1m 10s
2

370:	learn: 0.9318812	test: 0.8889032	best: 0.8902469 (347)	total: 36.8s	remaining: 1m 2s
371:	learn: 0.9319691	test: 0.8887055	best: 0.8902469 (347)	total: 36.9s	remaining: 1m 2s
372:	learn: 0.9320434	test: 0.8882662	best: 0.8902469 (347)	total: 37s	remaining: 1m 2s
373:	learn: 0.9321493	test: 0.8882976	best: 0.8902469 (347)	total: 37.1s	remaining: 1m 2s
374:	learn: 0.9322125	test: 0.8884000	best: 0.8902469 (347)	total: 37.2s	remaining: 1m 2s
375:	learn: 0.9324023	test: 0.8884523	best: 0.8902469 (347)	total: 37.3s	remaining: 1m 1s
376:	learn: 0.9324497	test: 0.8884314	best: 0.8902469 (347)	total: 37.4s	remaining: 1m 1s
377:	learn: 0.9324980	test: 0.8884815	best: 0.8902469 (347)	total: 37.5s	remaining: 1m 1s
378:	learn: 0.9326475	test: 0.8887197	best: 0.8902469 (347)	total: 37.6s	remaining: 1m 1s
379:	learn: 0.9326256	test: 0.8884721	best: 0.8902469 (347)	total: 37.7s	remaining: 1m 1s
380:	learn: 0.9327505	test: 0.8884314	best: 0.8902469 (347)	total: 37.8s	remaining: 1m 1s
381:	learn: 

463:	learn: 0.9373991	test: 0.8876436	best: 0.8902469 (347)	total: 45.8s	remaining: 52.9s
464:	learn: 0.9374124	test: 0.8878407	best: 0.8902469 (347)	total: 45.9s	remaining: 52.8s
465:	learn: 0.9374562	test: 0.8881920	best: 0.8902469 (347)	total: 46s	remaining: 52.7s
466:	learn: 0.9375310	test: 0.8881113	best: 0.8902469 (347)	total: 46.1s	remaining: 52.6s
467:	learn: 0.9376174	test: 0.8884939	best: 0.8902469 (347)	total: 46.2s	remaining: 52.5s
468:	learn: 0.9377094	test: 0.8882465	best: 0.8902469 (347)	total: 46.3s	remaining: 52.4s
469:	learn: 0.9378258	test: 0.8883923	best: 0.8902469 (347)	total: 46.4s	remaining: 52.3s
470:	learn: 0.9378238	test: 0.8881448	best: 0.8902469 (347)	total: 46.5s	remaining: 52.2s
471:	learn: 0.9379302	test: 0.8879990	best: 0.8902469 (347)	total: 46.6s	remaining: 52.1s
472:	learn: 0.9379392	test: 0.8879476	best: 0.8902469 (347)	total: 46.7s	remaining: 52.1s
473:	learn: 0.9379604	test: 0.8880021	best: 0.8902469 (347)	total: 46.8s	remaining: 52s
474:	learn: 0.

557:	learn: 0.9421096	test: 0.8874122	best: 0.8902469 (347)	total: 55s	remaining: 43.6s
558:	learn: 0.9421415	test: 0.8872454	best: 0.8902469 (347)	total: 55.1s	remaining: 43.5s
559:	learn: 0.9422088	test: 0.8870373	best: 0.8902469 (347)	total: 55.2s	remaining: 43.4s
560:	learn: 0.9423325	test: 0.8869530	best: 0.8902469 (347)	total: 55.3s	remaining: 43.3s
561:	learn: 0.9423323	test: 0.8870786	best: 0.8902469 (347)	total: 55.4s	remaining: 43.2s
562:	learn: 0.9423897	test: 0.8871840	best: 0.8902469 (347)	total: 55.5s	remaining: 43.1s
563:	learn: 0.9424022	test: 0.8872885	best: 0.8902469 (347)	total: 55.6s	remaining: 43s
564:	learn: 0.9425298	test: 0.8874879	best: 0.8902469 (347)	total: 55.7s	remaining: 42.9s
565:	learn: 0.9425399	test: 0.8872481	best: 0.8902469 (347)	total: 55.8s	remaining: 42.8s
566:	learn: 0.9425760	test: 0.8877592	best: 0.8902469 (347)	total: 55.9s	remaining: 42.7s
567:	learn: 0.9426059	test: 0.8877381	best: 0.8902469 (347)	total: 56s	remaining: 42.6s
568:	learn: 0.94

650:	learn: 0.9465999	test: 0.8854277	best: 0.8902469 (347)	total: 1m 4s	remaining: 34.5s
651:	learn: 0.9467395	test: 0.8855952	best: 0.8902469 (347)	total: 1m 4s	remaining: 34.4s
652:	learn: 0.9467159	test: 0.8856487	best: 0.8902469 (347)	total: 1m 4s	remaining: 34.3s
653:	learn: 0.9467418	test: 0.8853874	best: 0.8902469 (347)	total: 1m 4s	remaining: 34.2s
654:	learn: 0.9467771	test: 0.8856395	best: 0.8902469 (347)	total: 1m 4s	remaining: 34.1s
655:	learn: 0.9467921	test: 0.8857127	best: 0.8902469 (347)	total: 1m 4s	remaining: 34s
656:	learn: 0.9468876	test: 0.8855042	best: 0.8902469 (347)	total: 1m 4s	remaining: 33.9s
657:	learn: 0.9469054	test: 0.8855986	best: 0.8902469 (347)	total: 1m 4s	remaining: 33.8s
658:	learn: 0.9469269	test: 0.8855048	best: 0.8902469 (347)	total: 1m 5s	remaining: 33.7s
659:	learn: 0.9470187	test: 0.8856732	best: 0.8902469 (347)	total: 1m 5s	remaining: 33.6s
660:	learn: 0.9470247	test: 0.8857458	best: 0.8902469 (347)	total: 1m 5s	remaining: 33.5s
661:	learn: 

741:	learn: 0.9502915	test: 0.8831481	best: 0.8902469 (347)	total: 1m 13s	remaining: 25.6s
742:	learn: 0.9502628	test: 0.8830421	best: 0.8902469 (347)	total: 1m 13s	remaining: 25.5s
743:	learn: 0.9503116	test: 0.8829683	best: 0.8902469 (347)	total: 1m 13s	remaining: 25.4s
744:	learn: 0.9503208	test: 0.8828103	best: 0.8902469 (347)	total: 1m 13s	remaining: 25.3s
745:	learn: 0.9503441	test: 0.8829472	best: 0.8902469 (347)	total: 1m 13s	remaining: 25.2s
746:	learn: 0.9503752	test: 0.8830421	best: 0.8902469 (347)	total: 1m 14s	remaining: 25.1s
747:	learn: 0.9503915	test: 0.8830632	best: 0.8902469 (347)	total: 1m 14s	remaining: 25s
748:	learn: 0.9504159	test: 0.8824625	best: 0.8902469 (347)	total: 1m 14s	remaining: 24.9s
749:	learn: 0.9504303	test: 0.8825048	best: 0.8902469 (347)	total: 1m 14s	remaining: 24.8s
750:	learn: 0.9504481	test: 0.8822306	best: 0.8902469 (347)	total: 1m 14s	remaining: 24.7s
751:	learn: 0.9505025	test: 0.8822520	best: 0.8902469 (347)	total: 1m 14s	remaining: 24.6s
7

832:	learn: 0.9530018	test: 0.8795499	best: 0.8902469 (347)	total: 1m 22s	remaining: 16.6s
833:	learn: 0.9530330	test: 0.8799433	best: 0.8902469 (347)	total: 1m 22s	remaining: 16.5s
834:	learn: 0.9531037	test: 0.8798159	best: 0.8902469 (347)	total: 1m 22s	remaining: 16.4s
835:	learn: 0.9531117	test: 0.8797306	best: 0.8902469 (347)	total: 1m 23s	remaining: 16.3s
836:	learn: 0.9531716	test: 0.8798051	best: 0.8902469 (347)	total: 1m 23s	remaining: 16.2s
837:	learn: 0.9531430	test: 0.8799751	best: 0.8902469 (347)	total: 1m 23s	remaining: 16.1s
838:	learn: 0.9531961	test: 0.8801453	best: 0.8902469 (347)	total: 1m 23s	remaining: 16s
839:	learn: 0.9532233	test: 0.8799965	best: 0.8902469 (347)	total: 1m 23s	remaining: 15.9s
840:	learn: 0.9532494	test: 0.8802089	best: 0.8902469 (347)	total: 1m 23s	remaining: 15.8s
841:	learn: 0.9532371	test: 0.8799323	best: 0.8902469 (347)	total: 1m 23s	remaining: 15.7s
842:	learn: 0.9532628	test: 0.8799110	best: 0.8902469 (347)	total: 1m 23s	remaining: 15.6s
8

923:	learn: 0.9558955	test: 0.8778647	best: 0.8902469 (347)	total: 1m 29s	remaining: 7.34s
924:	learn: 0.9559934	test: 0.8777475	best: 0.8902469 (347)	total: 1m 29s	remaining: 7.25s
925:	learn: 0.9560220	test: 0.8778223	best: 0.8902469 (347)	total: 1m 29s	remaining: 7.15s
926:	learn: 0.9560669	test: 0.8778541	best: 0.8902469 (347)	total: 1m 29s	remaining: 7.05s
927:	learn: 0.9560832	test: 0.8779500	best: 0.8902469 (347)	total: 1m 29s	remaining: 6.95s
928:	learn: 0.9560968	test: 0.8779389	best: 0.8902469 (347)	total: 1m 29s	remaining: 6.85s
929:	learn: 0.9560994	test: 0.8779915	best: 0.8902469 (347)	total: 1m 29s	remaining: 6.75s
930:	learn: 0.9561103	test: 0.8778844	best: 0.8902469 (347)	total: 1m 29s	remaining: 6.65s
931:	learn: 0.9561402	test: 0.8779486	best: 0.8902469 (347)	total: 1m 29s	remaining: 6.55s
932:	learn: 0.9562002	test: 0.8782900	best: 0.8902469 (347)	total: 1m 29s	remaining: 6.45s
933:	learn: 0.9562097	test: 0.8784394	best: 0.8902469 (347)	total: 1m 29s	remaining: 6.36s

<catboost.core.CatBoostClassifier at 0x7fc3206b6f28>

In [22]:
predictions = model.predict(dense_val_scaled)
model.save_model('catboost_models/fifth_features.pth')

accuracy_score(y_val, predictions), f1_score(y_val, predictions)

(0.9004653699947156, 0.7017418399141849)

In [23]:
!mkdir well_jsons

In [26]:
test = pd.read_excel('test.xlsx')
test.head()

Unnamed: 0,id,well id,"depth, m",bk,GZ1,GZ2,GZ3,GZ4,GZ5,GZ7,DGK,NKTD,NKTM,NKTR,ALPS
0,1,47,2465.546,0.665198,0.259688,0.865866,0.526683,0.116218,0.46487,0.124486,0.196296,0.483192,0.643774,0.376472,0.261831
1,2,47,2465.6355,0.115153,0.258287,0.161988,0.583114,0.122256,0.477795,0.118895,0.146578,0.491132,0.655735,0.359872,0.23647
2,3,47,2465.725,0.155254,0.318849,0.125817,0.639544,0.128194,0.497212,0.111693,0.968615,0.499726,0.667695,0.343272,0.266756
3,4,47,2465.8145,0.125315,0.474215,0.149127,0.638636,0.128239,0.478265,0.987464,0.546126,0.492469,0.659642,0.341159,0.332568
4,5,47,2465.904,0.953756,0.638341,0.172437,0.637663,0.113455,0.465892,0.858188,0.123651,0.485865,0.651513,0.339456,0.398383


In [30]:
def extract_well(df, well_id):
    return df[df['well id'] == well_id].sort_values(by='depth, m')

extract_well(test, 47).head()

Unnamed: 0,id,well id,"depth, m",bk,GZ1,GZ2,GZ3,GZ4,GZ5,GZ7,DGK,NKTD,NKTM,NKTR,ALPS
0,1,47,2465.546,0.665198,0.259688,0.865866,0.526683,0.116218,0.46487,0.124486,0.196296,0.483192,0.643774,0.376472,0.261831
1,2,47,2465.6355,0.115153,0.258287,0.161988,0.583114,0.122256,0.477795,0.118895,0.146578,0.491132,0.655735,0.359872,0.23647
2,3,47,2465.725,0.155254,0.318849,0.125817,0.639544,0.128194,0.497212,0.111693,0.968615,0.499726,0.667695,0.343272,0.266756
3,4,47,2465.8145,0.125315,0.474215,0.149127,0.638636,0.128239,0.478265,0.987464,0.546126,0.492469,0.659642,0.341159,0.332568
4,5,47,2465.904,0.953756,0.638341,0.172437,0.637663,0.113455,0.465892,0.858188,0.123651,0.485865,0.651513,0.339456,0.398383


In [27]:
test.shape

(37604, 15)

In [None]:

well_df = train[]