In [14]:
import pandas as pd
import numpy as np
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.model_selection import KFold
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

In [15]:
data = pd.read_csv('final_data.csv')
X = data.iloc[:, 1: -1]
Y = data.iloc[:, -1]

In [16]:
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X, Y)
train, test, y_train, y_test = train_test_split(X_train_res,y_train_res, test_size=0.2, random_state=100)
train, val, y_train, y_val = train_test_split(train,y_train,test_size=0.25)

In [None]:
folds = KFold(n_splits=5)
folds.split(X)
xgb_c1 = XGBClassifier()
for train_index, test_index in folds.split(X):
    # Split train-test
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = Y[train_index], Y[test_index]
    xgb_c1.fit(X_train, y_train)
    y_pred = xgb_c1.predict(X_test)
    
    
    

In [17]:
xgb_c1 = XGBClassifier(subsample=0.5,samplingmethod='gradient_based')
xgb_c1.fit(train, y_train,eval_set=[(train, y_train), (val, y_val)])
y_pred = xgb_c1.predict(test)
    

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print('Precision:', precision)
print('Recall:', recall)
print('Accuracy:', acc)
print('Confusion Matrix:', cm)

[0]	validation_0-error:0.136286	validation_1-error:0.13402
[1]	validation_0-error:0.124885	validation_1-error:0.12401
[2]	validation_0-error:0.136085	validation_1-error:0.13386
[3]	validation_0-error:0.125533	validation_1-error:0.124367
[4]	validation_0-error:0.11778	validation_1-error:0.116044
[5]	validation_0-error:0.099698	validation_1-error:0.097933
[6]	validation_0-error:0.098832	validation_1-error:0.09664
[7]	validation_0-error:0.093451	validation_1-error:0.091383
[8]	validation_0-error:0.090069	validation_1-error:0.089031
[9]	validation_0-error:0.091337	validation_1-error:0.089844
[10]	validation_0-error:0.09034	validation_1-error:0.088588
[11]	validation_0-error:0.088547	validation_1-error:0.086975
[12]	validation_0-error:0.088181	validation_1-error:0.086827
[13]	validation_0-error:0.08331	validation_1-error:0.081988
[14]	validation_0-error:0.078085	validation_1-error:0.076608
[15]	validation_0-error:0.076103	validation_1-error:0.074625
[16]	validation_0-error:0.072569	validati

In [20]:
print('Roc-auc score',roc_auc_score(y_test, y_pred))

Roc-auc score 0.9750549125131733


In [28]:
y_pred

array([0, 0, 0, ..., 0, 1, 1], dtype=int64)

In [25]:
kfold = KFold(n_splits=10)
pred = []
train, test, y_train, y_test = train_test_split(X, Y, test_size = 0.1,random_state = 42)
for train_ind, test_ind in kfold.split(train):
    train_val, test_val, y_train_val, y_test_val = train.iloc[train_ind, :], train.iloc[test_ind, :],\
                                                   y_train.iloc[train_ind], y_train.iloc[test_ind]
    xgb_c1.fit(train_val, y_train_val,eval_set=[(train, y_train), (test_val, y_test_val)])
    prediction = xgb_c1.predict_proba(test)
    pred.append(
    prediction[:, 1]
    )
    

print('Roc-auc score with Catboost:',roc_auc_score(y_test, np.mean(pred, axis = 0)))

[0]	validation_0-error:0.052311	validation_1-error:0.053873
[1]	validation_0-error:0.052433	validation_1-error:0.054069
[2]	validation_0-error:0.051745	validation_1-error:0.053336
[3]	validation_0-error:0.051715	validation_1-error:0.053189
[4]	validation_0-error:0.051735	validation_1-error:0.053189
[5]	validation_0-error:0.053527	validation_1-error:0.054801
[6]	validation_0-error:0.053361	validation_1-error:0.054997
[7]	validation_0-error:0.053987	validation_1-error:0.055876
[8]	validation_0-error:0.056815	validation_1-error:0.058611
[9]	validation_0-error:0.057215	validation_1-error:0.058806
[10]	validation_0-error:0.057528	validation_1-error:0.059197
[11]	validation_0-error:0.056614	validation_1-error:0.058269
[12]	validation_0-error:0.056033	validation_1-error:0.057878
[13]	validation_0-error:0.055838	validation_1-error:0.058171
[14]	validation_0-error:0.054372	validation_1-error:0.056364
[15]	validation_0-error:0.054851	validation_1-error:0.056804
[16]	validation_0-error:0.055276	v

[36]	validation_0-error:0.045419	validation_1-error:0.045961
[37]	validation_0-error:0.045332	validation_1-error:0.045472
[38]	validation_0-error:0.045273	validation_1-error:0.045228
[39]	validation_0-error:0.044946	validation_1-error:0.045375
[40]	validation_0-error:0.044858	validation_1-error:0.045375
[41]	validation_0-error:0.04475	validation_1-error:0.045179
[42]	validation_0-error:0.044736	validation_1-error:0.045179
[43]	validation_0-error:0.044648	validation_1-error:0.044886
[44]	validation_0-error:0.044106	validation_1-error:0.044202
[45]	validation_0-error:0.044198	validation_1-error:0.044495
[46]	validation_0-error:0.043778	validation_1-error:0.043861
[47]	validation_0-error:0.043612	validation_1-error:0.043567
[48]	validation_0-error:0.043471	validation_1-error:0.043274
[49]	validation_0-error:0.043129	validation_1-error:0.042737
[50]	validation_0-error:0.042841	validation_1-error:0.042444
[51]	validation_0-error:0.042508	validation_1-error:0.042151
[52]	validation_0-error:0

[71]	validation_0-error:0.038337	validation_1-error:0.036876
[72]	validation_0-error:0.038205	validation_1-error:0.036778
[73]	validation_0-error:0.038	validation_1-error:0.036583
[74]	validation_0-error:0.037683	validation_1-error:0.036485
[75]	validation_0-error:0.037458	validation_1-error:0.035997
[76]	validation_0-error:0.037326	validation_1-error:0.035948
[77]	validation_0-error:0.037307	validation_1-error:0.036046
[78]	validation_0-error:0.037111	validation_1-error:0.03585
[79]	validation_0-error:0.036779	validation_1-error:0.035264
[80]	validation_0-error:0.036803	validation_1-error:0.03546
[81]	validation_0-error:0.036642	validation_1-error:0.035167
[82]	validation_0-error:0.036535	validation_1-error:0.034971
[83]	validation_0-error:0.036388	validation_1-error:0.035069
[84]	validation_0-error:0.036144	validation_1-error:0.034336
[85]	validation_0-error:0.036037	validation_1-error:0.03419
[86]	validation_0-error:0.036046	validation_1-error:0.034629
[87]	validation_0-error:0.0357

[7]	validation_0-error:0.058685	validation_1-error:0.06076
[8]	validation_0-error:0.059223	validation_1-error:0.061493
[9]	validation_0-error:0.057411	validation_1-error:0.059637
[10]	validation_0-error:0.055183	validation_1-error:0.05739
[11]	validation_0-error:0.05364	validation_1-error:0.056022
[12]	validation_0-error:0.054397	validation_1-error:0.056413
[13]	validation_0-error:0.05257	validation_1-error:0.054215
[14]	validation_0-error:0.051754	validation_1-error:0.053824
[15]	validation_0-error:0.050963	validation_1-error:0.052799
[16]	validation_0-error:0.049664	validation_1-error:0.050747
[17]	validation_0-error:0.050274	validation_1-error:0.051773
[18]	validation_0-error:0.050191	validation_1-error:0.052115
[19]	validation_0-error:0.050553	validation_1-error:0.052457
[20]	validation_0-error:0.050021	validation_1-error:0.051626
[21]	validation_0-error:0.050392	validation_1-error:0.052652
[22]	validation_0-error:0.05006	validation_1-error:0.05192
[23]	validation_0-error:0.049928	

[43]	validation_0-error:0.044325	validation_1-error:0.04474
[44]	validation_0-error:0.043983	validation_1-error:0.044349
[45]	validation_0-error:0.043749	validation_1-error:0.0443
[46]	validation_0-error:0.0437	validation_1-error:0.044447
[47]	validation_0-error:0.043656	validation_1-error:0.044251
[48]	validation_0-error:0.043607	validation_1-error:0.044251
[49]	validation_0-error:0.043383	validation_1-error:0.043861
[50]	validation_0-error:0.043246	validation_1-error:0.04347
[51]	validation_0-error:0.043104	validation_1-error:0.043421
[52]	validation_0-error:0.042665	validation_1-error:0.042981
[53]	validation_0-error:0.04243	validation_1-error:0.042737
[54]	validation_0-error:0.042113	validation_1-error:0.042493
[55]	validation_0-error:0.042054	validation_1-error:0.042591
[56]	validation_0-error:0.041786	validation_1-error:0.042395
[57]	validation_0-error:0.041541	validation_1-error:0.042249
[58]	validation_0-error:0.041527	validation_1-error:0.0422
[59]	validation_0-error:0.041532	

[78]	validation_0-error:0.036974	validation_1-error:0.037366
[79]	validation_0-error:0.036818	validation_1-error:0.037513
[80]	validation_0-error:0.036574	validation_1-error:0.037269
[81]	validation_0-error:0.036403	validation_1-error:0.037269
[82]	validation_0-error:0.036271	validation_1-error:0.037073
[83]	validation_0-error:0.036354	validation_1-error:0.037122
[84]	validation_0-error:0.036237	validation_1-error:0.037073
[85]	validation_0-error:0.036037	validation_1-error:0.037122
[86]	validation_0-error:0.035744	validation_1-error:0.036927
[87]	validation_0-error:0.035553	validation_1-error:0.036585
[88]	validation_0-error:0.03548	validation_1-error:0.036389
[89]	validation_0-error:0.035407	validation_1-error:0.036341
[90]	validation_0-error:0.035348	validation_1-error:0.036292
[91]	validation_0-error:0.03528	validation_1-error:0.036292
[92]	validation_0-error:0.035192	validation_1-error:0.035999
[93]	validation_0-error:0.034938	validation_1-error:0.035657
[94]	validation_0-error:0.

[13]	validation_0-error:0.056018	validation_1-error:0.055585
[14]	validation_0-error:0.054441	validation_1-error:0.053583
[15]	validation_0-error:0.05321	validation_1-error:0.052264
[16]	validation_0-error:0.052082	validation_1-error:0.051092
[17]	validation_0-error:0.051877	validation_1-error:0.050945
[18]	validation_0-error:0.050211	validation_1-error:0.048552
[19]	validation_0-error:0.049625	validation_1-error:0.048161
[20]	validation_0-error:0.049376	validation_1-error:0.04821
[21]	validation_0-error:0.049767	validation_1-error:0.049089
[22]	validation_0-error:0.049952	validation_1-error:0.048845
[23]	validation_0-error:0.049918	validation_1-error:0.048943
[24]	validation_0-error:0.049654	validation_1-error:0.048601
[25]	validation_0-error:0.049161	validation_1-error:0.048161
[26]	validation_0-error:0.049298	validation_1-error:0.047966
[27]	validation_0-error:0.048057	validation_1-error:0.046793
[28]	validation_0-error:0.047994	validation_1-error:0.046647
[29]	validation_0-error:0.

[48]	validation_0-error:0.044086	validation_1-error:0.044644
[49]	validation_0-error:0.043627	validation_1-error:0.043716
[50]	validation_0-error:0.043305	validation_1-error:0.043667
[51]	validation_0-error:0.043148	validation_1-error:0.042983
[52]	validation_0-error:0.042894	validation_1-error:0.042983
[53]	validation_0-error:0.042591	validation_1-error:0.042544
[54]	validation_0-error:0.042406	validation_1-error:0.042544
[55]	validation_0-error:0.042259	validation_1-error:0.042348
[56]	validation_0-error:0.041751	validation_1-error:0.041713
[57]	validation_0-error:0.041312	validation_1-error:0.041176
[58]	validation_0-error:0.040809	validation_1-error:0.040785
[59]	validation_0-error:0.04052	validation_1-error:0.04059
[60]	validation_0-error:0.040433	validation_1-error:0.040444
[61]	validation_0-error:0.039861	validation_1-error:0.039613
[62]	validation_0-error:0.039558	validation_1-error:0.03932
[63]	validation_0-error:0.039505	validation_1-error:0.039271
[64]	validation_0-error:0.0

In [27]:
pred

[array([0.96577   , 0.9890392 , 0.9629396 , ..., 0.05404733, 0.98394716,
        0.9895839 ], dtype=float32),
 array([0.9632713 , 0.98568106, 0.96334857, ..., 0.05304264, 0.9850036 ,
        0.9918046 ], dtype=float32),
 array([0.96326935, 0.98893857, 0.95778733, ..., 0.06200124, 0.9807358 ,
        0.9901419 ], dtype=float32),
 array([0.965991  , 0.99026275, 0.9519397 , ..., 0.04039519, 0.9854866 ,
        0.99223125], dtype=float32),
 array([0.9660719 , 0.9933989 , 0.95865345, ..., 0.05288278, 0.9848519 ,
        0.99245876], dtype=float32),
 array([0.964028  , 0.99009925, 0.9551687 , ..., 0.06963506, 0.98472184,
        0.99207944], dtype=float32),
 array([0.9657457 , 0.9898722 , 0.9732912 , ..., 0.05745121, 0.98587555,
        0.9913833 ], dtype=float32),
 array([0.96915764, 0.9930483 , 0.9639168 , ..., 0.04839014, 0.98587877,
        0.9933182 ], dtype=float32),
 array([0.96786755, 0.990559  , 0.9639224 , ..., 0.05167852, 0.98421496,
        0.9916121 ], dtype=float32),
 array([0.

In [26]:
precision = precision_score(y_test, pred)
recall = recall_score(y_test, pred)
acc = accuracy_score(y_test, pred)
cm = confusion_matrix(y_test, pred)
print('Precision:', precision)
print('Recall:', recall)
print('Accuracy:', acc)
print('Confusion Matrix:', cm)

ValueError: Found input variables with inconsistent numbers of samples: [22749, 10]

In [None]:
X_test_new = pd.read_csv('X_test.csv')

y_test_new = pd.read_csv('y_test.csv')

In [None]:
y_pred = xgb_c1.predict(X_test_new)
    

precision = precision_score(y_test_new, y_pred)
recall = recall_score(y_test_new, y_pred)
acc = accuracy_score(y_test_new, y_pred)
cm = confusion_matrix(y_test_new, y_pred)
print('Precision:', precision)
print('Recall:', recall)
print('Accuracy:', acc)
print('Confusion Matrix:', cm)