# Machine Learning

## 1. Import Libraries

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import HTML, display
from tabulate import tabulate
import math
import scipy.stats

from sklearn.preprocessing import MinMaxScaler, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, log_loss, roc_auc_score, roc_curve, auc
import catboost
from catboost import CatBoostClassifier, Pool

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)

### 1.1 load data

In [2]:
# catboost (CB)
train_cb = pd.read_csv('train_ck.csv').drop('ID', axis = 1)
test_cb = pd.read_csv('test_ck.csv')

train_cb.head(2)

Unnamed: 0,VAR_0888,VAR_1193,VAR_0683,VAR_1322,VAR_0881,VAR_1873,VAR_0733,VAR_0648,VAR_1399,VAR_0482,VAR_1529,VAR_0245,VAR_1900,VAR_1209,VAR_0486,VAR_1882,VAR_0291,VAR_1153,VAR_1313,VAR_1564,VAR_1321,VAR_1550,VAR_0767,VAR_1902,VAR_0689,VAR_1561,VAR_1489,VAR_0865,VAR_0944,VAR_0516,VAR_0286,VAR_1190,VAR_0487,VAR_0198,VAR_0593,VAR_0504,VAR_1496,VAR_0555,VAR_0958,VAR_1319,VAR_1877,VAR_1315,VAR_1318,VAR_0886,VAR_1187,VAR_1497,VAR_0242,VAR_0735,VAR_1820,VAR_0095,VAR_0343,VAR_1139,VAR_1398,VAR_0589,VAR_0773,VAR_0797,VAR_1494,VAR_1913,VAR_0761,VAR_1879,VAR_1201,VAR_1862,VAR_0802,VAR_0953,VAR_0943,VAR_0763,VAR_1875,VAR_0308,VAR_1211,VAR_0483,VAR_0572,VAR_0782,VAR_1191,VAR_1186,VAR_0771,VAR_1565,VAR_0929,VAR_1218,VAR_0652,VAR_0312,VAR_1513,VAR_1146,VAR_1188,VAR_0578,VAR_0597,VAR_1243,VAR_1817,VAR_0339,VAR_0326,VAR_1253,VAR_0484,VAR_0939,VAR_1354,VAR_0788,VAR_0655,VAR_0744,VAR_1884,VAR_0272,VAR_0823,VAR_0489,VAR_1101,VAR_1208,VAR_1314,VAR_1924,VAR_1102,VAR_0896,VAR_0273,VAR_1248,VAR_1357,VAR_0940,VAR_0596,VAR_0739,VAR_0306,VAR_0488,VAR_0873,VAR_1323,VAR_0583,VAR_0745,VAR_1874,VAR_1206,VAR_1921,VAR_1312,VAR_1888,VAR_1504,VAR_0649,VAR_0971,VAR_0941,VAR_0573,VAR_0784,VAR_0877,VAR_0807,VAR_0520,VAR_1198,VAR_0650,VAR_0777,VAR_1378,VAR_1242,VAR_1325,VAR_1839,VAR_1901,VAR_1150,VAR_0952,VAR_0254,VAR_0327,VAR_0586,VAR_1885,VAR_0614,VAR_1262,VAR_1512,VAR_1317,VAR_0948,VAR_1876,VAR_0869,VAR_0554,VAR_0328,VAR_0255,VAR_0876,VAR_1228,VAR_1324,VAR_1189,VAR_0983,VAR_0894,VAR_0405,VAR_0351,VAR_0900,VAR_1896,VAR_0485,VAR_1195,VAR_1502,VAR_1241,VAR_1576,VAR_0902,VAR_0863,VAR_0287,VAR_1210,VAR_1566,VAR_0002,VAR_0003,VAR_0004,VAR_0532,VAR_0533,VAR_0534,VAR_0535,VAR_0536,VAR_0537,VAR_0538,VAR_0539,VAR_0540,VAR_0541,VAR_0542,VAR_0543,VAR_0544,VAR_0545,VAR_0546,VAR_0547,VAR_0548,VAR_0549,VAR_0550,VAR_0551,VAR_0552,VAR_0553,VAR_0556,VAR_0557,VAR_0558,VAR_0559,VAR_0560,VAR_0561,VAR_0562,VAR_0563,VAR_0564,VAR_0565,VAR_0566,VAR_0567,VAR_0568,VAR_0569,VAR_0570,VAR_0571,VAR_0574,VAR_0575,VAR_0576,VAR_0577,VAR_0579,VAR_0580,VAR_0581,VAR_0582,VAR_0584,VAR_0585,VAR_0587,VAR_0588,VAR_0590,VAR_0591,VAR_0592,VAR_0594,VAR_0595,VAR_0598,VAR_0599,VAR_0600,VAR_0601,VAR_0602,VAR_0603,VAR_0604,VAR_0605,VAR_0606,VAR_0607,VAR_0608,VAR_0609,VAR_0610,VAR_0611,VAR_0612,VAR_0613,VAR_0615,VAR_0616,VAR_0617,VAR_0618,VAR_0619,VAR_0620,VAR_0621,VAR_0622,VAR_0623,VAR_0624,VAR_0625,VAR_0626,VAR_0627,VAR_0628,VAR_0629,VAR_0630,VAR_0631,VAR_0632,VAR_0633,VAR_0634,VAR_0635,VAR_0636,VAR_0637,VAR_0638,VAR_0639,VAR_0640,VAR_0641,VAR_0642,VAR_0643,VAR_0644,VAR_0645,VAR_0646,VAR_0647,VAR_0651,VAR_0653,VAR_0654,VAR_0656,VAR_0657,VAR_0658,VAR_0659,VAR_0660,VAR_0661,VAR_0662,VAR_0663,VAR_0664,VAR_0665,VAR_0666,VAR_0667,VAR_0668,VAR_0669,VAR_0670,VAR_0671,VAR_0672,VAR_0673,VAR_0674,VAR_0675,VAR_0676,VAR_0677,VAR_0678,VAR_0679,VAR_0680,VAR_0681,VAR_0682,VAR_0684,VAR_0685,VAR_0686,VAR_0687,VAR_0688,VAR_0690,VAR_0691,VAR_0692,VAR_0693,VAR_0694,VAR_0695,VAR_0696,VAR_0697,VAR_0698,VAR_0699,VAR_0700,VAR_0701,VAR_0702,VAR_0703,VAR_0704,VAR_0705,VAR_0706,VAR_0707,VAR_0708,VAR_0709,VAR_0710,VAR_0711,VAR_0712,VAR_0713,VAR_0714,VAR_0715,VAR_0716,VAR_0717,VAR_0718,VAR_0719,VAR_0720,VAR_0721,VAR_0722,VAR_0723,VAR_0724,VAR_0725,VAR_0726,VAR_0727,VAR_0728,VAR_0729,VAR_0730,VAR_0731,VAR_0732,VAR_0734,VAR_0736,VAR_0737,VAR_0738,VAR_0740,VAR_0741,VAR_0742,VAR_0743,VAR_0746,VAR_0747,VAR_0748,VAR_0749,VAR_0750,VAR_0751,VAR_0752,VAR_0753,VAR_0754,VAR_0755,VAR_0756,VAR_0757,VAR_0758,VAR_0759,VAR_0760,VAR_0762,VAR_0764,VAR_0765,VAR_0766,VAR_0768,VAR_0769,VAR_0770,VAR_0772,VAR_0774,VAR_0775,VAR_0776,VAR_0778,VAR_0779,VAR_0780,VAR_0781,VAR_0783,VAR_0785,VAR_0786,VAR_0787,VAR_0789,VAR_0790,VAR_0791,VAR_0792,VAR_0793,VAR_0794,VAR_0795,VAR_0796,VAR_0798,VAR_0799,VAR_0800,VAR_0801,VAR_0803,VAR_0804,VAR_0805,VAR_0806,VAR_0808,VAR_0809,VAR_0810,VAR_0811,VAR_0812,VAR_0813,VAR_0814,VAR_0815,VAR_0816,VAR_0817,VAR_0818,VAR_0819,VAR_0820,VAR_0821,VAR_0822,VAR_0824,VAR_0825,VAR_0826,VAR_0827,VAR_0828,VAR_0829,VAR_0830,VAR_0831,VAR_0832,VAR_0833,VAR_0834,VAR_0835,VAR_0836,VAR_0837,VAR_0838,VAR_0839,VAR_0841,VAR_0842,VAR_0843,VAR_0844,VAR_0845,VAR_0846,VAR_0848,VAR_0849,VAR_0850,VAR_0851,VAR_0852,VAR_0853,VAR_0854,VAR_0855,VAR_0856,VAR_0857,VAR_0858,VAR_0859,VAR_0860,VAR_0861,VAR_0862,VAR_0864,VAR_0866,VAR_0867,VAR_0868,VAR_0870,VAR_0871,VAR_0872,VAR_0874,VAR_0875,VAR_0878,VAR_0879,VAR_0880,VAR_0882,VAR_0883,VAR_0884,VAR_0885,VAR_0887,VAR_0889,VAR_0890,VAR_0891,VAR_0892,VAR_0893,VAR_0895,VAR_0897,VAR_0898,VAR_0899,VAR_0901,VAR_0903,VAR_0904,...,VAR_1858,VAR_1859,VAR_1860,VAR_1861,VAR_1863,VAR_1864,VAR_1865,VAR_1866,VAR_1867,VAR_1868,VAR_1869,VAR_1870,VAR_1871,VAR_1872,VAR_1878,VAR_1880,VAR_1881,VAR_1883,VAR_1886,VAR_1887,VAR_1889,VAR_1890,VAR_1891,VAR_1892,VAR_1893,VAR_1894,VAR_1895,VAR_1897,VAR_1898,VAR_1899,VAR_1903,VAR_1904,VAR_1905,VAR_1906,VAR_1907,VAR_1908,VAR_1909,VAR_1910,VAR_1911,VAR_1912,VAR_1914,VAR_1915,VAR_1916,VAR_1917,VAR_1918,VAR_1919,VAR_1920,VAR_1922,VAR_1923,VAR_1925,VAR_1926,VAR_1927,VAR_1928,VAR_1929,VAR_1930,VAR_1931,VAR_1932,VAR_1933,VAR_0066,VAR_0457,VAR_0316,VAR_0423,VAR_0163,VAR_0444,VAR_0498,VAR_0107,VAR_0449,VAR_0360,VAR_0266,VAR_0133,VAR_0140,VAR_0248,VAR_0100,VAR_0281,VAR_0468,VAR_0371,VAR_0087,VAR_0007,VAR_0059,VAR_0101,VAR_0129,VAR_0465,VAR_0332,VAR_0345,VAR_0131,VAR_0126,VAR_0349,VAR_0448,VAR_0173,VAR_0184,VAR_0319,VAR_0227,VAR_0492,VAR_0251,VAR_0380,VAR_0195,VAR_0142,VAR_0348,VAR_0238,VAR_0437,VAR_0409,VAR_0180,VAR_0407,VAR_0454,VAR_0473,VAR_0062,VAR_0403,VAR_0362,VAR_0247,VAR_0192,VAR_0056,VAR_0456,VAR_0235,VAR_0384,VAR_0388,VAR_0356,VAR_0288,VAR_0309,VAR_0419,VAR_0359,VAR_0120,VAR_0318,VAR_0280,VAR_0378,VAR_0121,VAR_0429,VAR_0350,VAR_0376,VAR_0250,VAR_0193,VAR_0264,VAR_0428,VAR_0104,VAR_0363,VAR_0225,VAR_0410,VAR_0519,VAR_0320,VAR_0078,VAR_0507,VAR_0425,VAR_0322,VAR_0461,VAR_0249,VAR_0334,VAR_0377,VAR_0065,VAR_0048,VAR_0503,VAR_0015,VAR_0491,VAR_0105,VAR_0127,VAR_0047,VAR_0108,VAR_0383,VAR_0389,VAR_0134,VAR_0259,VAR_0267,VAR_0224,VAR_0228,VAR_0355,VAR_0037,VAR_0341,VAR_0263,VAR_0304,VAR_0315,VAR_0135,VAR_0336,VAR_0521,VAR_0162,VAR_0092,VAR_0452,VAR_0523,VAR_0244,VAR_0036,VAR_0141,VAR_0060,VAR_0427,VAR_0115,VAR_0307,VAR_0072,VAR_0398,VAR_0481,VAR_0146,VAR_0490,VAR_0261,VAR_0181,VAR_0443,VAR_0372,VAR_0375,VAR_0512,VAR_0497,VAR_0381,VAR_0509,VAR_0071,VAR_0035,VAR_0077,VAR_0109,VAR_0084,VAR_0475,VAR_0086,VAR_0402,VAR_0479,VAR_0017,VAR_0290,VAR_0400,VAR_0450,VAR_0182,VAR_0064,VAR_0220,VAR_0119,VAR_0175,VAR_0170,VAR_0268,VAR_0006,VAR_0436,VAR_0154,VAR_0474,VAR_0094,VAR_0442,VAR_0476,VAR_0091,VAR_0194,VAR_0289,VAR_0430,VAR_0518,VAR_0284,VAR_0396,VAR_0088,VAR_0150,VAR_0317,VAR_0153,VAR_0445,VAR_0258,VAR_0152,VAR_0278,VAR_0282,VAR_0515,VAR_0464,VAR_0406,VAR_0501,VAR_0054,VAR_0374,VAR_0477,VAR_0373,VAR_0161,VAR_0302,VAR_0439,VAR_0260,VAR_0333,VAR_0399,VAR_0241,VAR_0338,VAR_0471,VAR_0310,VAR_0070,VAR_0270,VAR_0459,VAR_0172,VAR_0212,VAR_0079,VAR_0416,VAR_0117,VAR_0301,VAR_0369,VAR_0097,VAR_0068,VAR_0099,VAR_0470,VAR_0514,VAR_0417,VAR_0517,VAR_0145,VAR_0257,VAR_0149,VAR_0292,VAR_0081,VAR_0387,VAR_0103,VAR_0426,VAR_0144,VAR_0063,VAR_0165,VAR_0297,VAR_0382,VAR_0050,VAR_0505,VAR_0052,VAR_0080,VAR_0414,VAR_0132,VAR_0524,VAR_0422,VAR_0447,VAR_0112,VAR_0102,VAR_0294,VAR_0219,VAR_0413,VAR_0110,VAR_0323,VAR_0469,VAR_0139,VAR_0185,VAR_0358,VAR_0123,VAR_0201,VAR_0049,VAR_0337,VAR_0478,VAR_0090,VAR_0051,VAR_0124,VAR_0340,VAR_0061,VAR_0098,VAR_0298,VAR_0045,VAR_0271,VAR_0511,VAR_0187,VAR_0111,VAR_0408,VAR_0453,VAR_0462,VAR_0279,VAR_0233,VAR_0522,VAR_0277,VAR_0424,VAR_0434,VAR_0311,VAR_0460,VAR_0508,VAR_0138,VAR_0128,VAR_0151,VAR_0013,VAR_0089,VAR_0114,VAR_0285,VAR_0046,VAR_0262,VAR_0269,VAR_0137,VAR_0295,VAR_0324,VAR_0401,VAR_0386,VAR_0122,VAR_0034,VAR_0395,VAR_0495,VAR_0183,VAR_0392,VAR_0496,VAR_0365,VAR_0016,VAR_0136,VAR_0329,VAR_0276,VAR_0067,VAR_0275,VAR_0303,VAR_0370,VAR_0397,VAR_0513,VAR_0085,VAR_0058,VAR_0155,VAR_0164,VAR_0432,VAR_0143,VAR_0458,VAR_0293,VAR_0435,VAR_0499,VAR_0385,VAR_0494,VAR_0076,VAR_0368,VAR_0506,VAR_0148,VAR_0463,VAR_0441,VAR_0265,VAR_0118,VAR_0440,VAR_0510,VAR_0113,VAR_0082,VAR_0455,VAR_0335,VAR_0412,VAR_0344,VAR_0069,VAR_0116,VAR_0391,VAR_0393,VAR_0321,VAR_0125,VAR_0096,VAR_0431,VAR_0357,VAR_0186,VAR_0057,VAR_0253,VAR_0171,VAR_0055,VAR_0083,VAR_0421,VAR_0252,VAR_0433,VAR_0130,VAR_0160,VAR_0366,VAR_0296,VAR_0480,VAR_0014,VAR_0053,VAR_0174,VAR_0093,VAR_0364,VAR_0415,VAR_0299,VAR_0502,VAR_0331,VAR_0313,VAR_0300,VAR_0525,VAR_0361,VAR_0379,VAR_0330,VAR_0231,VAR_0472,VAR_0347,VAR_0346,VAR_0367,VAR_0420,VAR_0256,VAR_0234,VAR_0418,VAR_0451,VAR_0147,VAR_0243,VAR_0390,VAR_0500,VAR_0033,target,VAR_0005_B,VAR_0005_C,VAR_0005_N,VAR_0005_S,VAR_0283_F,VAR_0283_H,VAR_0283_P,VAR_0283_R,VAR_0283_S,VAR_0283_U,VAR_0352_O,VAR_0352_R,VAR_0352_U,VAR_0325_F,VAR_0325_G,VAR_0325_H,VAR_0325_M,VAR_0325_P,VAR_0325_R,VAR_0325_S,VAR_0325_U,VAR_0305_H,VAR_0305_M,VAR_0305_P,VAR_0305_R,VAR_0305_S,VAR_0305_U,VAR_0001_H,VAR_0001_Q,VAR_0001_R,VAR_0353_O,VAR_0353_R,VAR_0353_U,VAR_0354_O,VAR_0354_R,VAR_0354_U,VAR_1934_BRANCH,VAR_1934_CSC,VAR_1934_IAPS,VAR_1934_MOBILE,VAR_1934_RCC,REGION_0237_Midwest,REGION_0237_Northeast,REGION_0237_South,REGION_0237_West,REGION_0274_Midwest,REGION_0274_Northeast,REGION_0274_South,REGION_0274_West
0,1.101461,-0.544576,-1.949119,-0.586556,0.566523,-0.554786,0.565057,1.80886,0.570337,-2.891785,0.576406,3.042406,-0.477953,0.564472,3.15162,-0.516705,-0.54477,1.903815,-0.556868,0.335153,-0.586556,0.582614,-1.833589,-0.481523,-0.507592,-0.622977,1.80886,-0.570083,0.583314,0.454878,0.367929,-0.543384,2.845839,1.569474,-0.688911,-0.542041,1.80886,1.769104,0.577679,-0.571921,-0.574502,-0.586452,-0.572013,0.97372,-0.531282,1.804494,1.105142,-0.570274,1.71024,-2.853926,0.940299,1.713022,0.574472,-0.5602,-0.570274,0.566815,1.80886,0.577144,0.565052,-0.527872,-0.530214,-0.720428,0.566823,1.93498,-0.559786,-0.570274,-0.519508,-0.420228,-0.518405,3.253268,-0.54031,0.56504,-0.564097,-0.551073,0.565048,-0.175144,0.5198,-0.544341,1.828459,1.561687,1.808049,1.903639,-0.534915,-0.521788,-0.56189,-0.548267,-0.925112,0.183243,-0.190757,-0.839072,3.437434,0.583413,-0.573223,-1.833589,1.80886,0.565037,-0.513631,1.451342,-0.18394,4.713894,-1.873448,-0.511883,-0.586514,0.515997,1.97226,1.572235,1.599366,-0.562621,-0.573326,-0.591531,-0.563361,-1.833589,1.592476,2.438843,-0.570083,-0.556973,-0.571498,-0.570274,-0.512767,-0.715183,0.515855,-0.586518,-0.510685,0.546654,1.83316,-0.972455,-0.66151,-0.532576,-0.570274,-0.6926,0.56681,0.594788,-0.535401,1.80886,-1.833589,-0.91856,-0.577716,-0.832822,0.553886,-0.488819,1.903773,1.809155,1.504892,-0.118064,-0.556972,-0.504749,-0.591191,0.514813,1.801856,-0.586492,0.557413,-0.528781,-1.833588,1.976004,-0.35254,1.327148,-1.833588,-0.56527,-0.607749,-0.538652,-0.110582,1.431601,-0.672787,0.9937,2.152625,-0.764741,3.318968,-0.538047,1.80886,-0.577548,-0.505671,-0.988358,0.565503,0.18161,-0.52529,-0.629552,1.147154,-1.905533,0.52516,4.04477,-0.150078,-0.281513,1.886201,-0.165134,-0.587356,-0.211731,-0.421831,0.965634,0.605181,-0.583208,0.800003,0.936629,-0.339077,0.292962,0.280504,0.381634,0.385451,-0.078266,0.048819,1.104544,1.505445,1.283215,0.585649,1.091853,0.979703,1.237845,-0.964954,-0.28294,-0.179801,-0.179538,-0.138329,-0.442805,-0.125784,-0.164605,-0.121347,0.345683,-0.343204,0.27599,0.752545,0.213472,0.162879,0.050394,-0.762867,-0.677157,-0.67565,-0.368615,-0.102604,-0.525149,-1.140475,-0.50731,-0.446655,-0.473552,-0.530254,-0.520751,0.157707,0.156631,0.196813,0.142317,0.152961,0.206293,-0.16704,-0.565799,-0.376015,-0.035431,-0.098767,0.095359,0.118757,-0.598194,-0.899085,-0.497816,-0.895776,-0.393593,-0.625571,-0.461742,-0.392752,-0.401335,-0.542701,-0.205481,-0.57204,-0.371585,-0.378063,-0.503863,-0.407428,-0.642488,-0.355789,-0.384445,-0.508099,0.995682,0.769979,0.421236,0.444092,0.593715,0.316097,0.184408,0.184479,0.184486,0.162737,0.138509,0.169792,0.127566,0.127567,2.237048,2.315246,1.156319,0.42568,0.299872,1.131801,-0.522152,0.657097,0.629035,0.632352,-1.100983,-3.445405,-3.477472,0.232334,0.232269,0.242462,0.202764,0.631275,0.16637,0.135802,0.166386,0.135808,0.588872,0.444761,0.166507,0.135933,0.588737,-6.167134,0.145343,0.442932,-3.142331,0.230327,0.600832,-0.516465,-0.735375,-0.349863,-0.252357,1.280294,1.146416,0.744957,0.806862,0.827087,0.829982,0.445039,0.445109,0.397957,0.371101,0.672609,0.368162,-0.179746,-0.303944,0.224462,0.684886,0.578682,0.013427,0.158319,-2.127796,-0.388414,0.710307,0.158301,-2.127796,-0.388414,0.710306,-1.033409,-1.393748,-0.360449,-0.309457,0.158325,0.158311,-0.946074,-0.514331,0.308862,0.219325,-0.524338,0.308862,0.219325,-1.546441,0.565114,0.308868,-0.973698,-1.0305,-1.345227,1.124019,0.219336,-0.474876,-0.170016,1.601073,0.308871,-1.345227,0.219343,-2.053435,-2.053435,-1.540776,-1.364597,-1.132817,-2.058926,-0.820297,-0.352336,-1.109819,-0.303863,-1.010638,0.308864,-0.973698,-1.0305,-1.345227,1.124019,0.219332,-1.365703,0.308863,-0.973698,-1.0305,-1.345227,1.124019,0.219329,-1.761275,-0.303944,0.308861,-0.973698,-1.0305,-1.345227,1.124019,0.219326,-0.181143,-1.496037,-1.496037,-1.713213,-1.468675,-1.383545,-0.591965,0.119706,-0.640227,-0.322944,0.442134,0.119705,-0.640227,-0.322944,0.442135,0.119705,-0.640227,-0.322944,0.442134,-0.179801,-0.501339,-0.320919,-0.341519,-0.963745,-0.917475,0.219332,-0.858704,-0.555941,-0.6254,-0.882627,-1.063611,1.026034,-0.762458,0.438229,-0.850502,-0.521747,-0.92213,1.307453,0.331144,-1.3326,-0.281996,-0.02437,-1.024544,0.455913,-2.053435,-1.540776,-1.364597,-2.053435,-1.540776,-1.364597,-1.132817,-1.496037,-1.496037,0.15831,0.15835,-2.127796,-0.388414,0.710307,-2.769931,-2.025929,-1.045098,-0.628826,-0.832829,-1.010007,-1.491273,0.017107,0.309256,-0.972572,-1.03033,-1.344863,1.124427,0.219878,0.012364,0.308905,-1.03033,-1.344863,0.219532,0.308765,0.219215,0.119645,-0.640227,-0.322944,0.442124,0.225671,0.183747,-0.640227,-0.322943,-0.639285,0.49411,-1.089594,0.521136,-0.640226,0.529129,0.349245,-0.488673,-0.640225,-0.322942,...,1.578639,1.178013,1.159802,1.207234,1.301638,0.631334,0.100829,-0.298222,0.396194,0.310374,0.199139,0.19903,0.19961,-0.643882,1.397269,1.435838,1.44083,1.442722,1.275862,1.261674,1.127039,1.107604,1.112343,1.097365,1.096663,1.094828,1.154097,1.211234,0.967038,0.956029,-0.448877,0.682687,0.691006,0.655116,0.681739,0.629984,0.682545,0.630506,0.630754,0.403723,0.404805,0.405799,0.590296,0.580379,0.328548,0.677435,-0.377838,0.336545,0.331506,-0.235074,0.359344,0.301285,0.301196,0.096351,0.238063,0.216614,0.071146,0.355819,-0.292354,-0.076244,1.047659,-0.203942,-0.086381,-0.203261,-0.099823,-0.012548,-0.051468,-0.069541,0.624873,-0.259063,-0.082183,-0.054314,-0.079662,0.469421,-0.032429,-0.041543,-0.525167,-0.597395,-0.29442,-0.272896,-0.218924,-0.425579,-0.956102,-0.300455,-0.034751,-0.069726,-0.280116,-0.151445,-0.114502,-0.135126,0.328699,-1.266627,4.398985,2.130846,-0.115867,-0.017436,-0.316957,-0.304498,-0.606718,-0.074666,-0.075461,-0.020344,-0.238048,-0.361005,-0.337828,-0.420162,3.964057,1.243108,-0.050693,-0.006432,-0.460334,-0.053546,-0.563589,-0.158853,-0.046845,-0.4194,1.042628,1.097694,-0.508208,-0.032908,-0.382205,-0.731825,0.104216,-0.05353,-0.43831,-0.057708,-0.313349,-0.415084,0.084714,-0.008612,-0.03205,-0.035511,-0.385473,5.120403,-0.150485,-0.084405,-0.109787,0.589832,-0.38564,-0.043039,-0.413421,0.860379,-0.191459,0.081545,-0.698545,-0.024262,-0.57676,-0.163379,-0.38464,-0.588885,-0.142418,-0.441884,-0.106862,-0.13852,-0.051867,1.218653,-0.063587,-0.30179,-0.076279,-0.427246,-0.259917,-1.266627,-0.261518,-0.429718,0.922417,1.11981,1.54414,1.101381,-0.32651,-0.399626,-0.037825,-0.18253,-3.287138,-0.047496,0.207994,0.066303,-0.509738,-0.271345,-0.358949,-0.315211,-0.035252,-0.17443,-0.542453,-0.009168,-0.182379,-0.094723,1.34602,-0.25561,-0.020344,-0.107477,-0.082264,-0.261026,-0.315126,-0.077979,-0.168438,-0.098309,-0.504712,-0.521207,-0.351557,-0.20036,-0.356174,-0.233499,-0.416663,-0.17126,-0.101439,-0.509808,2.512403,1.471537,-0.118326,-0.020344,-0.54869,-0.117768,-0.352794,-0.116825,-0.256739,1.931866,-0.598299,-0.238178,-0.192389,-0.215368,-3.255735,-0.320049,-0.04086,-3.285819,-0.012122,-0.483351,-0.078146,0.107747,-0.101002,-0.006483,-0.580605,-0.191284,1.06997,-0.079637,-0.069,2.460056,-0.180919,-0.067412,1.407461,-0.400889,-0.430773,-0.607648,5.132867,-0.331513,-0.180251,-0.062198,-0.121038,-0.257506,1.576168,-0.346946,1.076921,-0.668043,-0.014497,-0.603025,-0.627347,-0.13948,-0.393188,-0.457958,-0.036541,-0.006483,-0.244978,0.237972,-0.407028,-0.061121,-0.276862,0.583796,3.602469,-3.115389,-0.36322,-0.033236,-0.094041,0.689633,-0.483936,-0.460234,-0.423673,1.374297,-0.087346,1.576816,-0.533135,-0.035463,-0.354939,-0.295884,-0.370648,-0.515987,-0.088162,1.0212,-0.267202,-0.399795,-0.503681,-0.210049,-0.497872,-0.022154,-0.080007,-0.002275,-0.150954,-0.21248,-0.304738,-0.325943,0.724286,-0.115291,-0.014181,-0.250694,-0.837368,-0.062971,-0.033236,-0.16851,0.533678,0.107485,-0.414242,-0.292956,-0.396552,-0.07769,-3.285261,-0.414242,0.094368,0.008191,-0.397242,-0.012865,-0.766663,-0.0561,-0.041032,-0.213946,-0.276891,-0.276888,-0.678707,-0.129941,-0.132326,1.405387,-0.542333,0.130286,-0.031773,-0.279915,-0.486462,2.678276,-0.035722,-0.067093,-0.011141,-0.147101,-0.11084,-0.598299,-0.606718,-0.014384,1.156487,-0.105518,0.586038,-0.199659,-0.404073,1.044269,1.080182,3.094795,-0.023379,0.108393,-0.517447,-0.006483,-0.143901,-0.068931,-0.020503,-0.046479,4.591918,-0.466326,-0.353907,-0.839962,-0.070889,-0.335558,-0.03493,-0.014313,2.329212,-0.009168,-0.987934,-0.392963,-0.481157,-0.080904,-0.2267,-0.164738,-0.342738,-0.17989,-1.147963,-0.143958,-0.123152,-0.143769,1.667421,-0.300201,2.532657,-0.315126,-0.132143,-0.006483,-0.082525,-0.202555,-0.325732,-0.258885,-0.140459,-0.348888,-0.563627,-0.236762,-0.362794,-0.006483,-0.31374,-0.381484,-0.084071,-0.102517,-0.020057,0.552169,-0.008487,-3.199887,-0.109866,1.076921,-0.202786,-0.477399,0.391279,-0.267413,-0.358923,-0.301813,-0.115623,0.312131,-0.256912,-0.014384,-0.193674,1.583007,0.996227,-0.12718,-0.602915,-0.284598,-0.284717,-3.290664,1.833828,-0.035308,0.345215,-0.031773,-0.83172,-1.074577,0.624623,-0.208693,1.733452,-0.083292,-0.462925,-0.40023,-0.209887,-0.308843,-0.303305,1.269436,-0.072323,1.090653,-0.574052,-0.472175,-0.213074,-0.090584,-0.046108,-0.07753,-0.154008,-0.205586,1,False,True,False,False,False,False,False,False,True,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,True,False,False,False,False,True,True,False,False,False,False,True,False,False,False,False,True,False,False,False,True,False
1,0.551963,-0.544576,-1.910757,-0.586626,0.566429,-0.425424,0.565057,-0.552806,0.570337,0.460237,0.576406,1.035169,-0.513926,0.564472,0.450536,-0.516705,-0.54477,1.903815,-0.556979,1.530437,-0.586626,-1.717431,0.545374,-0.521907,-0.507592,0.777683,-0.552805,-0.570083,0.583314,0.577956,0.367929,-0.5186,1.570116,-0.204514,-0.56983,-0.542041,-0.552812,1.769104,-1.749874,-0.572059,-0.469368,-0.58663,-0.572059,1.236689,-0.531282,-0.554177,0.144303,-0.570274,-0.65975,0.13659,0.940299,1.713022,0.574472,-0.5602,-0.570274,0.566862,-0.552817,-1.732726,0.565052,-0.527872,-0.530169,-0.720428,0.566869,-0.626258,-0.578683,-0.570274,-0.442901,-0.420228,-0.540054,0.829061,-0.490808,0.565087,-0.513633,-0.526211,0.565071,1.512834,0.5198,-0.519887,-0.546937,1.561687,-0.559835,1.903639,-0.510216,-0.54749,-0.575187,-0.548444,-0.925112,0.183243,-0.190757,-0.839072,0.592633,0.583413,-0.573331,0.545409,-0.552834,0.56506,-0.513631,-0.609682,0.047808,0.681482,-1.873448,-0.537839,-0.586623,0.515997,-0.533881,0.544533,-0.65671,-0.562641,-0.573327,-0.567403,-0.575051,0.545379,1.592476,1.262945,-0.570083,-0.556974,-0.571612,-0.570274,-0.487297,-0.715183,0.515855,-0.586625,-0.485235,-1.829312,-0.545524,-0.972455,-0.66151,-0.534273,-0.570274,1.174964,0.566857,0.594788,-0.535401,-0.552834,0.545393,-0.91856,-0.577716,-0.832822,0.553886,-0.533456,1.903773,-0.519185,-0.1159,-0.118064,-0.556974,-0.504749,-0.571423,0.514813,-0.497668,-0.586626,0.557413,-0.400655,0.544742,1.976004,-0.35254,0.00472,0.544766,-0.565443,-0.607749,-0.513916,-0.179385,0.550926,0.7474,-0.935252,0.547231,-0.764741,0.534713,-0.538047,-0.552834,-0.57772,2.145447,0.942696,0.56408,0.18161,-0.540658,0.477119,-1.442531,0.233714,0.553262,-0.20255,-0.150078,-0.281513,1.194865,1.682147,-0.587356,-0.211731,-0.421831,0.538141,1.397153,-0.583208,1.36676,0.401764,-0.339077,0.292962,0.280504,0.381634,0.385451,0.563796,0.048819,1.104544,1.505445,1.283215,0.585649,1.091853,0.979703,1.237845,0.144305,-0.28294,-0.179801,-0.179538,-0.138329,2.258329,-0.125784,-0.164605,-0.121347,0.345627,-0.343204,0.27599,-1.285027,0.213472,0.162879,-0.260958,-0.273474,-0.288994,-0.287757,-0.574466,-0.544109,-0.569382,-0.735287,-0.50731,-0.935271,-0.972385,-0.074332,-0.068068,0.157707,0.156631,0.196813,0.144208,0.154275,0.207117,0.300977,0.180783,-0.376015,-0.422215,-0.469529,-0.38582,-0.269072,-0.339989,-0.657363,-0.227903,0.587729,-0.393593,-0.625571,-0.461742,-0.392752,-0.401335,-0.542701,-0.205481,-0.57204,-0.371585,-0.378063,-0.503863,-0.407428,-0.642488,-0.355789,-0.384445,-0.508099,0.995682,0.769979,0.421236,0.444092,0.593715,0.316097,0.184408,0.184479,0.184486,0.162737,0.138509,0.169792,0.127566,0.127567,-0.149741,-0.445173,-0.79923,-2.337533,-3.360681,-0.773302,-1.178977,-1.483199,-1.252958,-1.306744,-1.67522,0.298133,0.298058,0.232334,0.232269,0.253517,0.202764,-1.578992,0.16637,0.135802,0.166386,0.135808,-2.150223,0.444761,0.166507,0.135933,-2.15201,0.177181,0.145343,0.440682,0.295375,0.230327,-1.539129,0.893807,0.051037,-0.316717,-0.219553,-0.635609,-0.736457,-0.750807,-1.226929,-1.562057,-1.586733,0.445039,0.445109,0.397957,0.371101,-0.999187,0.368162,-0.179746,-0.303944,0.23847,0.602755,0.486191,0.008551,0.158337,0.469971,-0.388414,0.710309,0.158349,0.469968,-0.388414,0.710308,0.413373,0.457471,-0.360449,0.20634,0.158343,0.158341,0.311466,1.597928,0.308872,0.219356,1.591666,0.308872,0.219356,-1.546441,-0.628385,0.308864,-0.973698,0.970402,-1.345227,1.124019,0.219332,2.105811,-0.170016,0.559921,0.308864,-1.345227,0.219334,0.486989,0.48699,0.649023,0.732815,0.882752,0.485696,1.21907,-0.352336,0.90105,-0.303863,-0.062017,0.308864,-0.973698,0.970402,-1.345227,1.124019,0.219336,1.058051,0.308866,-0.973698,0.970402,-1.345227,1.124021,0.219343,1.158706,-0.303944,0.308868,-0.973698,0.970404,-1.345227,1.124021,0.219352,-0.181143,0.66843,0.668432,-0.796139,-0.579346,-0.498712,0.568817,0.119761,1.561947,-0.322944,0.442146,0.11976,1.561947,-0.322944,0.442145,0.11976,1.561948,-0.322944,0.442144,-0.179801,0.692341,1.356733,0.746144,-0.963745,-0.917475,0.219332,-0.858704,-0.555941,-0.6254,-0.215511,-0.439064,1.171503,-0.762458,0.438229,0.087033,-0.521747,-0.92213,0.806472,0.871271,0.623311,-0.281996,-0.02437,-1.024544,0.731162,0.48699,0.649023,0.732816,0.48699,0.649023,0.732815,0.882752,0.668431,0.66843,0.158358,0.158326,0.469963,-0.388414,0.710299,0.360988,0.493571,0.95681,-0.628826,-0.832829,-1.010007,0.670699,0.007772,0.308955,-0.972572,0.970376,-1.344863,1.124338,0.219473,0.008806,0.308804,0.970476,-1.344863,0.219377,0.30886,0.219418,0.11988,1.562047,-0.322944,0.442131,0.225636,0.240702,1.563186,-0.322943,1.690917,0.440639,0.935941,0.439783,1.562458,0.442352,0.87942,-0.488673,1.560214,-0.322942,...,-0.69666,-0.580717,-0.690949,-0.663274,-0.93833,0.631334,-1.135974,-0.298222,0.396194,0.310374,0.199139,0.19903,0.19961,0.593685,-0.323252,-0.777631,-0.741038,-0.728671,-0.677646,-0.348323,-0.608123,-0.724472,-0.777152,-0.901134,-0.67664,-0.667226,-0.694055,-0.993618,-0.513554,-0.5213,0.883624,-1.213048,-1.448928,-1.660744,-1.613631,-1.681307,-1.593695,-1.666385,-1.643284,-2.573741,-2.646448,-2.531849,-1.970221,-1.897152,0.328447,-1.163153,-0.377838,0.336545,0.331506,-0.235074,0.359344,0.301285,0.301196,0.096351,0.238063,0.216614,0.071146,0.355819,-0.292354,-0.076244,1.345301,-0.203942,-0.086381,-0.203261,-0.099823,-0.012548,-0.051468,0.464003,0.070577,-0.259063,-0.082183,-0.054314,-0.079662,0.469421,-0.032429,-0.041543,0.486095,-0.597395,-0.29442,-0.272896,-0.218924,2.44513,-0.956102,-0.300455,-0.034751,-0.069726,-0.280116,-0.151445,-0.114502,-0.135126,0.992757,0.850611,-0.250455,-0.409784,-0.115867,-0.017436,-0.316957,-0.304498,0.240596,-0.074666,-0.075461,-0.020344,-0.238048,-0.361005,2.944592,0.308171,-0.303688,1.243108,-0.050693,-0.006432,1.983543,-0.053546,0.312278,-0.158853,-0.046845,1.948891,-0.193,-0.127627,-0.508208,0.738512,-0.382205,-0.731825,0.104216,-0.05353,-0.43831,-0.057708,-0.313349,2.146934,0.084714,-0.008612,-0.03205,-0.035511,1.926846,0.888457,-0.150485,-0.084405,-0.109787,1.334013,0.220171,-0.043039,-0.413421,-0.601024,-0.191459,0.081545,-0.698545,-0.024262,0.278456,-0.163379,-0.38464,0.969475,-0.142418,1.540343,-0.106862,-0.13852,-0.051867,1.218653,-0.063587,-0.30179,-0.076279,1.172432,-0.259917,0.850611,-0.261518,-0.429718,-0.069257,0.995478,-1.157606,-0.83437,-0.32651,-0.399626,-0.037825,-0.18253,0.300867,-0.047496,0.207994,0.066303,-0.509738,-0.271345,0.555355,-0.315211,-0.035252,-0.17443,-0.542453,-0.009168,-0.182379,-0.094723,1.34602,3.725567,-0.020344,-0.107477,-0.082264,3.661302,-0.315126,-0.077979,-0.168438,-0.098309,-0.504712,-0.521207,0.327648,-0.20036,0.321492,-0.233499,0.145685,-0.17126,-0.101439,0.92424,2.512403,0.874821,-0.118326,-0.020344,0.353443,-0.117768,-0.352794,-0.116825,-0.256739,-0.44934,0.668063,-0.238178,-0.192389,-0.215368,0.223838,-0.320049,-0.04086,0.303672,-0.012122,-0.483351,-0.078146,0.107747,-0.101002,-0.006483,0.322958,-0.191284,1.355134,-0.079637,-0.069,1.287622,-0.180919,-0.067412,-0.890888,-0.400889,2.266011,0.494559,-0.21846,1.842133,5.347303,-0.062198,-0.121038,-0.257506,-1.175353,-0.346946,1.076921,-0.668043,-0.014497,1.297443,-0.002347,-0.13948,-0.393188,-0.457958,-0.036541,-0.006483,-0.244978,1.331284,0.157018,-0.061121,-0.276862,1.119438,3.492914,0.099437,-0.36322,-0.033236,-0.094041,0.689633,-0.483936,-0.460234,1.341312,0.847616,-0.087346,1.576816,0.154377,-0.035463,-0.354939,-0.295884,1.697508,0.465151,-0.088162,1.289052,-0.267202,1.323683,-0.503681,-0.210049,0.271102,-0.022154,-0.080007,-0.002275,-0.150954,-0.21248,2.251543,-0.325943,0.724425,-0.115291,-0.014181,-0.250694,-0.076084,-0.062971,-0.033236,-0.16851,0.533678,0.107485,1.231919,2.226902,-0.396552,-0.07769,0.30388,1.231919,0.094368,0.008191,0.394351,-0.012865,-0.766663,-0.0561,-0.041032,-0.213946,-0.276891,-0.276888,1.638551,-0.129941,-0.132326,-0.876689,0.34884,0.130286,-0.031773,-0.279915,-0.486462,2.678276,-0.035722,-0.067093,-0.011141,-0.147101,-0.11084,0.668063,0.240596,-0.014384,1.156487,-0.105518,-0.778218,-0.199659,0.686803,-0.884729,-0.351304,-0.31576,-0.023379,0.108393,-0.517447,-0.006483,-0.143901,-0.068931,-0.020503,-0.046479,1.427402,0.871066,0.954836,-0.839962,-0.070889,-0.335558,-0.03493,-0.014313,4.323121,-0.009168,0.843653,0.211157,1.715946,-0.080904,-0.2267,-0.164738,-0.342738,-0.17989,1.046735,-0.143958,-0.123152,-0.143769,-0.249253,-0.300201,2.638917,-0.315126,-0.132143,-0.006483,-0.082525,-0.202555,-0.325732,-0.258885,-0.140459,1.811124,0.080429,-0.236762,-0.362794,-0.006483,-0.31374,-0.381484,-0.084071,-0.102517,-0.020057,1.069699,-0.008487,0.168488,-0.109866,1.076921,-0.202786,1.756156,0.391279,-0.267413,1.610128,-0.301813,-0.115623,0.312131,-0.256912,-0.014384,-0.193674,1.385351,1.27629,-0.12718,-0.602915,2.351899,-0.284717,0.25727,-0.620399,-0.035308,1.056224,-0.031773,-0.83172,1.107608,1.382366,-0.208693,0.640692,-0.083292,-0.462925,-0.40023,-0.209887,-0.308843,-0.303305,0.178538,-0.072323,1.090653,0.289848,-0.472175,-0.213074,-0.090584,-0.046108,-0.07753,-0.154008,-0.205586,1,True,False,False,False,False,False,False,False,True,False,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,True,True,False,False,False


In [3]:
train_cb['target']

0        1
1        1
2        1
3        1
4        2
        ..
24200    1
24201    2
24202    2
24203    1
24204    1
Name: target, Length: 24205, dtype: int64

## 2. Feature Selection

As discovered in notebook, "data_understanding" there was a substantial class imbalance. To give the model a better understanding for its evaluation, first calculate the class weights.

In [4]:
# calculate class weights
class_counts=train_cb['target'].value_counts()
t_samples=len(train_cb)
num_classes=2

class_weights={cls:t_samples / (num_classes * count) for cls,count in class_counts.items()}

print(class_weights)

{1: 0.6478854389721628, 2: 2.1904977375565613}


In [5]:
def feature_selection_cb(train,target_col,test,top_n_features):
    # split into features and target
    X=train.drop(columns=[target_col])
    y=train[target_col]
    
    # split training df into train and validation
    X_train,X_val, y_train,y_val=train_test_split(X,y,test_size=0.2,random_state=42)
    
    # check for NaN values in target columns
    print(y_train.isna().sum(), y_val.isna().sum())  # Ensure these are 0
    
    # create Pools
    train_pool=Pool(X_train,y_train)
    val_pool=Pool(X_val,y_val)
    
    # catboost classifier
    model=CatBoostClassifier(
        iterations=100,
        depth=6,
        learning_rate=0.1,
        loss_function='Logloss',
        class_weights=list(class_weights.values()), # use list from calc class_weights
        verbose=False
    )
    
    # train model
    model.fit(train_pool,eval_set=val_pool,use_best_model=True,plot=False)
    
    # feature importance
    feature_importance=model.get_feature_importance(train_pool)
        # store features names to respective score
    feat_importance_df=pd.DataFrame({
        'Feature': X_train.columns,
        'Importance': feature_importance
    })
        # sort df
    feat_importance_df=feat_importance_df.sort_values(by='Importance',ascending=False)
    
    # select top features and apply selected feats to test and train df
    top_feats=feat_importance_df.head(top_n_features)['Feature'].tolist()
    X_train_feat=X_train[top_feats]
    X_val_feat=X_val[top_feats]
    test_feat=test[top_feats]
    return X_train_feat,X_val_feat,y_train,y_val,test_feat,top_feats

In [20]:
target_col='target'

X_train_feat,X_val_feat,y_train,y_val,test_feat,top_feats=feature_selection_cb(train_cb,target_col,test_cb,top_n_features=100)

print(X_train_feat.isna().sum().sum()) 
print(y_train.isna().sum())
print(f'Top Features:{top_feats}')

0 0
0
0
Top Features:['VAR_0795', 'VAR_0004', 'VAR_0137', 'VAR_0837', 'VAR_0853', 'VAR_0070', 'VAR_0007', 'VAR_0810', 'VAR_1042', 'VAR_1816', 'VAR_0807', 'VAR_0078', 'VAR_1747', 'VAR_0505', 'VAR_0806', 'VAR_1750', 'VAR_0711', 'VAR_0002', 'VAR_1377', 'VAR_0969', 'VAR_0855', 'VAR_0886', 'VAR_1029', 'VAR_1128', 'VAR_0144', 'VAR_0540', 'VAR_1329', 'VAR_1021', 'VAR_0884', 'VAR_1791', 'VAR_1114', 'VAR_1020', 'VAR_0304', 'VAR_0720', 'VAR_1130', 'VAR_0805', 'VAR_1426', 'VAR_1399', 'VAR_0885', 'VAR_1823', 'VAR_1424', 'VAR_0580', 'VAR_0708', 'VAR_0005_B', 'VAR_0758', 'VAR_0881', 'VAR_0535', 'VAR_1368', 'VAR_0802', 'VAR_1421', 'VAR_0234', 'VAR_0212', 'VAR_1743', 'VAR_1100', 'VAR_0712', 'VAR_0900', 'VAR_0241', 'VAR_1842', 'VAR_1392', 'VAR_0721', 'VAR_1120', 'VAR_1107', 'VAR_0088', 'VAR_0282', 'VAR_1531', 'VAR_1150', 'VAR_0003', 'VAR_1022', 'VAR_0764', 'VAR_0062', 'VAR_1129', 'VAR_0968', 'VAR_1407', 'VAR_0722', 'VAR_0279', 'VAR_0612', 'VAR_1406', 'REGION_0237_South', 'VAR_1089', 'VAR_1400', 'VAR_15

## 3. Train Model

In [21]:
# create train and test sets with selected features
train_selected=pd.concat([X_train_feat.reset_index(drop=True),y_train.reset_index(drop=True)],axis=1)
test_selected=test_feat

print(X_train_feat.shape, y_train.shape)  # Ensure these match

(19364, 100) (19364,)


In [22]:
train_selected['target']

0        2
1        2
2        2
3        2
4        2
        ..
19359    1
19360    1
19361    1
19362    1
19363    1
Name: target, Length: 19364, dtype: int64

In [23]:
print('model computing...')

target_col='target'

X_train_selected=train_selected.drop(columns=[target_col])
y_train_selected=train_selected[target_col]

X_train, X_val, y_train, y_val = train_test_split(X_train_selected, y_train_selected, test_size=0.2, random_state=42)

# catboost Pools
train_pool = Pool(X_train, y_train)
val_pool = Pool(X_val, y_val)
test_pool = Pool(test_selected)

# catboost classifier
model = CatBoostClassifier(
    iterations=1000,
    depth=6,
    learning_rate=0.01,
    loss_function='Logloss',
    class_weights=list(class_weights.values()),
    early_stopping_rounds=50,
    verbose=100
)

# train model
model.fit(train_pool, eval_set=val_pool, use_best_model=True, plot=False)

# evaluate model
preds = model.predict(val_pool)
accuracy = (preds == y_val).mean()
print(f'Validation Accuracy: {accuracy:.4f}')

# predict
test_preds=model.predict(test_pool)

print('Model is successful')

# test_predictions=pd.DataFrame(test_preds,columns=['Predicted'])
# test_predictions.to_csv('test_predictions.csv', index=False)

model computing...
0:	learn: 0.6916706	test: 0.6916964	best: 0.6916964 (0)	total: 6.62ms	remaining: 6.62s
100:	learn: 0.6149357	test: 0.6228558	best: 0.6228558 (100)	total: 822ms	remaining: 7.32s
200:	learn: 0.5869427	test: 0.6020463	best: 0.6020463 (200)	total: 1.44s	remaining: 5.71s
300:	learn: 0.5709300	test: 0.5922184	best: 0.5922184 (300)	total: 2.06s	remaining: 4.77s
400:	learn: 0.5594276	test: 0.5869324	best: 0.5869278 (399)	total: 2.69s	remaining: 4.03s
500:	learn: 0.5502790	test: 0.5834729	best: 0.5834729 (500)	total: 3.57s	remaining: 3.56s
600:	learn: 0.5424140	test: 0.5811472	best: 0.5811472 (600)	total: 4.19s	remaining: 2.78s
700:	learn: 0.5348393	test: 0.5791667	best: 0.5791667 (700)	total: 4.89s	remaining: 2.09s
800:	learn: 0.5276815	test: 0.5778715	best: 0.5778656 (799)	total: 5.61s	remaining: 1.39s
900:	learn: 0.5206312	test: 0.5766383	best: 0.5766383 (900)	total: 6.22s	remaining: 684ms
999:	learn: 0.5130476	test: 0.5759458	best: 0.5759458 (999)	total: 6.81s	remaining: 

In [24]:
validation_predictions = model.predict(val_pool,prediction_type='Class')
print(classification_report(y_val, validation_predictions))
print(confusion_matrix(y_val, validation_predictions))

              precision    recall  f1-score   support

           1       0.89      0.73      0.80      3020
           2       0.42      0.69      0.52       853

    accuracy                           0.72      3873
   macro avg       0.66      0.71      0.66      3873
weighted avg       0.79      0.72      0.74      3873

[[2204  816]
 [ 265  588]]
