# Download Dataset

In [1]:
!kaggle datasets download -d laotse/credit-risk-dataset

Downloading credit-risk-dataset.zip to /media/hari31416/Hari_SSD/Users/harik/Desktop/Placement/Projects/Risk_Assesment
  0%|                                                | 0.00/368k [00:00<?, ?B/s]
100%|████████████████████████████████████████| 368k/368k [00:00<00:00, 5.10MB/s]


In [2]:
!unzip credit-risk-dataset.zip

Archive:  credit-risk-dataset.zip
  inflating: credit_risk_dataset.csv  


In [3]:
!rm credit-risk-dataset.zip

# Imports

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as io

io.templates.default = "plotly_dark"
io.renderers.default = "notebook_connected"


In [56]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import optuna

# EDA

In [5]:
df = pd.read_csv("credit_risk_dataset.csv")
df.head()

Unnamed: 0,person_age,person_income,person_home_ownership,person_emp_length,loan_intent,loan_grade,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length
0,22,59000,RENT,123.0,PERSONAL,D,35000,16.02,1,0.59,Y,3
1,21,9600,OWN,5.0,EDUCATION,B,1000,11.14,0,0.1,N,2
2,25,9600,MORTGAGE,1.0,MEDICAL,C,5500,12.87,1,0.57,N,3
3,23,65500,RENT,4.0,MEDICAL,C,35000,15.23,1,0.53,N,2
4,24,54400,RENT,8.0,MEDICAL,C,35000,14.27,1,0.55,Y,4


In [6]:
df.describe()

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_cred_hist_length
count,32581.0,32581.0,31686.0,32581.0,29465.0,32581.0,32581.0,32581.0
mean,27.7346,66074.85,4.789686,9589.371106,11.011695,0.218164,0.170203,5.804211
std,6.348078,61983.12,4.14263,6322.086646,3.240459,0.413006,0.106782,4.055001
min,20.0,4000.0,0.0,500.0,5.42,0.0,0.0,2.0
25%,23.0,38500.0,2.0,5000.0,7.9,0.0,0.09,3.0
50%,26.0,55000.0,4.0,8000.0,10.99,0.0,0.15,4.0
75%,30.0,79200.0,7.0,12200.0,13.47,0.0,0.23,8.0
max,144.0,6000000.0,123.0,35000.0,23.22,1.0,0.83,30.0


In [17]:
df.dtypes

person_age                      int64
person_income                   int64
person_home_ownership          object
person_emp_length             float64
loan_intent                    object
loan_grade                     object
loan_amnt                       int64
loan_int_rate                 float64
loan_status                     int64
loan_percent_income           float64
cb_person_default_on_file      object
cb_person_cred_hist_length      int64
dtype: object

In [37]:
df.fillna(df.mean(), inplace=True)





In [38]:
df.isnull().sum()

person_age                    0
person_income                 0
person_home_ownership         0
person_emp_length             0
loan_intent                   0
loan_grade                    0
loan_amnt                     0
loan_int_rate                 0
loan_status                   0
loan_percent_income           0
cb_person_default_on_file     0
cb_person_cred_hist_length    0
dtype: int64

In [39]:
str_columns = df.dtypes[df.dtypes == "object"].index
str_columns

Index(['person_home_ownership', 'loan_intent', 'loan_grade',
       'cb_person_default_on_file'],
      dtype='object')

In [40]:
for col in str_columns:
    df[col] = df[col].apply(lambda x: str(x).strip())

In [41]:
df.dtypes

person_age                      int64
person_income                   int64
person_home_ownership          object
person_emp_length             float64
loan_intent                    object
loan_grade                     object
loan_amnt                       int64
loan_int_rate                 float64
loan_status                     int64
loan_percent_income           float64
cb_person_default_on_file      object
cb_person_cred_hist_length      int64
dtype: object

In [42]:
df["loan_status"].value_counts()/len(df)

0    0.781836
1    0.218164
Name: loan_status, dtype: float64

In [48]:
X = df.drop("loan_status", axis=1)
y = df["loan_status"]
cat_features = X.columns[np.where(X.dtypes != float)[0]].values.tolist()

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [50]:
catboost = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=6, loss_function="Logloss", eval_metric="AUC", random_seed=42, use_best_model=True)
catboost.fit(X_train, y_train, eval_set=(X_test, y_test), verbose=100, cat_features=cat_features)

0:	test: 0.8452308	best: 0.8452308 (0)	total: 107ms	remaining: 1m 46s
100:	test: 0.9570248	best: 0.9570248 (100)	total: 5.74s	remaining: 51s
200:	test: 0.9613269	best: 0.9613370 (193)	total: 12s	remaining: 47.7s
300:	test: 0.9625046	best: 0.9625046 (300)	total: 19s	remaining: 44.1s
400:	test: 0.9629026	best: 0.9630238 (363)	total: 24.8s	remaining: 37s
500:	test: 0.9636308	best: 0.9636308 (500)	total: 30.6s	remaining: 30.5s
600:	test: 0.9633975	best: 0.9636308 (500)	total: 36.7s	remaining: 24.4s
700:	test: 0.9631335	best: 0.9636308 (500)	total: 43.3s	remaining: 18.5s
800:	test: 0.9633454	best: 0.9636308 (500)	total: 50.4s	remaining: 12.5s
900:	test: 0.9629589	best: 0.9636308 (500)	total: 56.9s	remaining: 6.25s
999:	test: 0.9630222	best: 0.9636308 (500)	total: 1m 3s	remaining: 0us

bestTest = 0.9636308439
bestIteration = 500

Shrink model to first 501 iterations.


<catboost.core.CatBoostClassifier at 0x7f6e60d7de80>

In [51]:
catboost.get_best_score()

{'learn': {'Logloss': 0.11028521931653382},
 'validation': {'Logloss': 0.16496361740286555, 'AUC': 0.9636308438758692}}

In [52]:
y_pred = catboost.predict(X_test)

In [53]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.99      0.96      5072
           1       0.96      0.78      0.86      1445

    accuracy                           0.94      6517
   macro avg       0.95      0.88      0.91      6517
weighted avg       0.94      0.94      0.94      6517



In [54]:
accuracy_score(y_test, y_pred)

0.9427650759551941

In [57]:
#Fine tune using optuna

def objective(trial):
    params = {
        'iterations': 1000,
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
        'depth': trial.suggest_int('depth', 3, 10),
        'loss_function': 'Logloss',
        'eval_metric': 'AUC',
        'random_seed': 42,
        'use_best_model': True,
        'verbose': 100
    }
    catboost = CatBoostClassifier(**params)
    catboost.fit(X_train, y_train, eval_set=(X_test, y_test), cat_features=cat_features)
    return catboost.get_best_score()['validation']['AUC']

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[32m[I 2023-01-11 12:07:12,623][0m A new study created in memory with name: no-name-45b2860b-2492-46ec-85de-fc9f3d571bfb[0m

suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.



0:	test: 0.8427404	best: 0.8427404 (0)	total: 69.6ms	remaining: 1m 9s
100:	test: 0.8630100	best: 0.8632258 (91)	total: 3.84s	remaining: 34.2s
200:	test: 0.9161063	best: 0.9161063 (200)	total: 7.24s	remaining: 28.8s
300:	test: 0.9312711	best: 0.9312711 (300)	total: 10.2s	remaining: 23.7s
400:	test: 0.9370707	best: 0.9370707 (400)	total: 13.3s	remaining: 19.8s
500:	test: 0.9399149	best: 0.9399149 (500)	total: 16.6s	remaining: 16.6s
600:	test: 0.9418791	best: 0.9418791 (600)	total: 20.1s	remaining: 13.3s
700:	test: 0.9433868	best: 0.9433876 (699)	total: 23.8s	remaining: 10.1s
800:	test: 0.9449891	best: 0.9449891 (800)	total: 27.3s	remaining: 6.79s
900:	test: 0.9464747	best: 0.9464747 (900)	total: 31.2s	remaining: 3.42s


[32m[I 2023-01-11 12:07:48,702][0m Trial 0 finished with value: 0.9483293855675504 and parameters: {'learning_rate': 0.004941949914593827, 'depth': 4}. Best is trial 0 with value: 0.9483293855675504.[0m


999:	test: 0.9483294	best: 0.9483294 (999)	total: 35.2s	remaining: 0us

bestTest = 0.9483293856
bestIteration = 999

0:	test: 0.8375186	best: 0.8375186 (0)	total: 38.8ms	remaining: 38.8s
100:	test: 0.8450211	best: 0.8450218 (88)	total: 2.89s	remaining: 25.7s
200:	test: 0.8480512	best: 0.8480731 (197)	total: 6.33s	remaining: 25.2s
300:	test: 0.8579926	best: 0.8580121 (285)	total: 9.19s	remaining: 21.3s
400:	test: 0.8629460	best: 0.8633868 (399)	total: 12s	remaining: 17.9s
500:	test: 0.8631463	best: 0.8633868 (399)	total: 14.9s	remaining: 14.8s
600:	test: 0.8635892	best: 0.8636635 (525)	total: 17.7s	remaining: 11.7s
700:	test: 0.8962782	best: 0.8962782 (700)	total: 20.2s	remaining: 8.62s
800:	test: 0.9094312	best: 0.9094312 (800)	total: 22.8s	remaining: 5.67s
900:	test: 0.9144484	best: 0.9144484 (900)	total: 25.5s	remaining: 2.8s


[32m[I 2023-01-11 12:08:17,681][0m Trial 1 finished with value: 0.919175771997424 and parameters: {'learning_rate': 0.0012737888340198951, 'depth': 3}. Best is trial 0 with value: 0.9483293855675504.[0m


999:	test: 0.9191758	best: 0.9191758 (999)	total: 28.2s	remaining: 0us

bestTest = 0.919175772
bestIteration = 999

0:	test: 0.8452308	best: 0.8452308 (0)	total: 52.7ms	remaining: 52.6s
100:	test: 0.8677179	best: 0.8677751 (98)	total: 4.85s	remaining: 43.1s
200:	test: 0.8864250	best: 0.8864250 (200)	total: 9.56s	remaining: 38s
300:	test: 0.9302027	best: 0.9302027 (300)	total: 13.9s	remaining: 32.2s
400:	test: 0.9378196	best: 0.9378196 (400)	total: 18.5s	remaining: 27.6s
500:	test: 0.9410652	best: 0.9410652 (500)	total: 23.1s	remaining: 23s
600:	test: 0.9433487	best: 0.9433487 (600)	total: 27.8s	remaining: 18.4s
700:	test: 0.9450568	best: 0.9450568 (700)	total: 32.6s	remaining: 13.9s
800:	test: 0.9462930	best: 0.9462930 (800)	total: 37.6s	remaining: 9.35s
900:	test: 0.9473726	best: 0.9473726 (900)	total: 42.8s	remaining: 4.71s


[32m[I 2023-01-11 12:09:06,499][0m Trial 2 finished with value: 0.9484400412605198 and parameters: {'learning_rate': 0.0032856092034610454, 'depth': 6}. Best is trial 2 with value: 0.9484400412605198.[0m


999:	test: 0.9484400	best: 0.9484400 (999)	total: 48s	remaining: 0us

bestTest = 0.9484400413
bestIteration = 999

0:	test: 0.8452308	best: 0.8452308 (0)	total: 63.8ms	remaining: 1m 3s
100:	test: 0.9405241	best: 0.9405241 (100)	total: 4.35s	remaining: 38.7s
200:	test: 0.9483668	best: 0.9483668 (200)	total: 9.01s	remaining: 35.8s
300:	test: 0.9533287	best: 0.9533287 (300)	total: 14.4s	remaining: 33.4s
400:	test: 0.9549700	best: 0.9549700 (400)	total: 19.9s	remaining: 29.8s
500:	test: 0.9563495	best: 0.9563495 (500)	total: 25.6s	remaining: 25.5s
600:	test: 0.9570566	best: 0.9570566 (600)	total: 31.2s	remaining: 20.7s
700:	test: 0.9576058	best: 0.9576065 (698)	total: 36.9s	remaining: 15.7s
800:	test: 0.9580312	best: 0.9580312 (800)	total: 42.6s	remaining: 10.6s
900:	test: 0.9581281	best: 0.9581610 (839)	total: 48.4s	remaining: 5.31s


[32m[I 2023-01-11 12:10:01,722][0m Trial 3 finished with value: 0.9585215799067818 and parameters: {'learning_rate': 0.01629855372310976, 'depth': 6}. Best is trial 3 with value: 0.9585215799067818.[0m


999:	test: 0.9585216	best: 0.9585216 (999)	total: 54.3s	remaining: 0us

bestTest = 0.9585215799
bestIteration = 999

0:	test: 0.8427404	best: 0.8427404 (0)	total: 50.2ms	remaining: 50.2s
100:	test: 0.8566349	best: 0.8566862 (98)	total: 3.6s	remaining: 32.1s
200:	test: 0.8628699	best: 0.8631856 (177)	total: 7.34s	remaining: 29.2s
300:	test: 0.8649634	best: 0.8650202 (291)	total: 11.1s	remaining: 25.8s
400:	test: 0.9044571	best: 0.9044571 (400)	total: 13.9s	remaining: 20.8s
500:	test: 0.9216895	best: 0.9216895 (500)	total: 16.8s	remaining: 16.7s
600:	test: 0.9286136	best: 0.9286136 (600)	total: 19.8s	remaining: 13.1s
700:	test: 0.9330141	best: 0.9330141 (700)	total: 23s	remaining: 9.81s
800:	test: 0.9356292	best: 0.9356292 (800)	total: 26.3s	remaining: 6.54s
900:	test: 0.9376895	best: 0.9376895 (900)	total: 29.4s	remaining: 3.23s


[32m[I 2023-01-11 12:10:35,119][0m Trial 4 finished with value: 0.9390783786143888 and parameters: {'learning_rate': 0.0022394246633486914, 'depth': 4}. Best is trial 3 with value: 0.9585215799067818.[0m


999:	test: 0.9390784	best: 0.9390784 (999)	total: 32.6s	remaining: 0us

bestTest = 0.9390783786
bestIteration = 999

0:	test: 0.8375186	best: 0.8375186 (0)	total: 34.6ms	remaining: 34.6s
100:	test: 0.9394199	best: 0.9394199 (100)	total: 2.66s	remaining: 23.7s
200:	test: 0.9473808	best: 0.9473808 (200)	total: 5.93s	remaining: 23.6s
300:	test: 0.9528191	best: 0.9528191 (300)	total: 9.15s	remaining: 21.2s
400:	test: 0.9547787	best: 0.9547787 (400)	total: 12.2s	remaining: 18.2s
500:	test: 0.9561656	best: 0.9561656 (500)	total: 15.7s	remaining: 15.6s
600:	test: 0.9571401	best: 0.9571401 (600)	total: 19s	remaining: 12.6s
700:	test: 0.9579809	best: 0.9579809 (700)	total: 22.9s	remaining: 9.78s
800:	test: 0.9586850	best: 0.9586850 (800)	total: 26.4s	remaining: 6.57s
900:	test: 0.9591480	best: 0.9591485 (899)	total: 29.9s	remaining: 3.29s


[32m[I 2023-01-11 12:11:09,608][0m Trial 5 finished with value: 0.9596340039077423 and parameters: {'learning_rate': 0.037610313644443304, 'depth': 3}. Best is trial 5 with value: 0.9596340039077423.[0m


999:	test: 0.9596340	best: 0.9596340 (999)	total: 33.6s	remaining: 0us

bestTest = 0.9596340039
bestIteration = 999

0:	test: 0.8534399	best: 0.8534399 (0)	total: 186ms	remaining: 3m 5s
100:	test: 0.9564442	best: 0.9564442 (100)	total: 13.7s	remaining: 2m 2s
200:	test: 0.9609780	best: 0.9609780 (200)	total: 28.7s	remaining: 1m 54s
300:	test: 0.9620657	best: 0.9620978 (294)	total: 45.3s	remaining: 1m 45s
400:	test: 0.9626681	best: 0.9627127 (384)	total: 1m 2s	remaining: 1m 32s
500:	test: 0.9629969	best: 0.9630757 (494)	total: 1m 17s	remaining: 1m 17s
600:	test: 0.9631411	best: 0.9631897 (567)	total: 1m 33s	remaining: 1m 1s
700:	test: 0.9628981	best: 0.9631897 (567)	total: 1m 49s	remaining: 46.6s
800:	test: 0.9628661	best: 0.9631897 (567)	total: 2m 7s	remaining: 31.7s
900:	test: 0.9627743	best: 0.9631897 (567)	total: 2m 24s	remaining: 15.9s


[32m[I 2023-01-11 12:13:53,109][0m Trial 6 finished with value: 0.963189721982688 and parameters: {'learning_rate': 0.05572689809873393, 'depth': 10}. Best is trial 6 with value: 0.963189721982688.[0m


999:	test: 0.9628268	best: 0.9631897 (567)	total: 2m 42s	remaining: 0us

bestTest = 0.963189722
bestIteration = 567

Shrink model to first 568 iterations.
0:	test: 0.8530298	best: 0.8530298 (0)	total: 75.3ms	remaining: 1m 15s
100:	test: 0.9398996	best: 0.9398996 (100)	total: 6.53s	remaining: 58.1s
200:	test: 0.9473119	best: 0.9473119 (200)	total: 13.3s	remaining: 52.9s
300:	test: 0.9509558	best: 0.9509558 (300)	total: 20.6s	remaining: 48s
400:	test: 0.9544054	best: 0.9544054 (400)	total: 28.6s	remaining: 42.7s
500:	test: 0.9565561	best: 0.9565561 (500)	total: 34.8s	remaining: 34.6s
600:	test: 0.9578346	best: 0.9578346 (600)	total: 40.3s	remaining: 26.7s
700:	test: 0.9587415	best: 0.9587415 (700)	total: 45.5s	remaining: 19.4s
800:	test: 0.9593336	best: 0.9593336 (800)	total: 50.5s	remaining: 12.6s
900:	test: 0.9601727	best: 0.9601727 (900)	total: 55.6s	remaining: 6.1s


[32m[I 2023-01-11 12:14:54,999][0m Trial 7 finished with value: 0.9606408206258937 and parameters: {'learning_rate': 0.010709877570200562, 'depth': 8}. Best is trial 6 with value: 0.963189721982688.[0m


999:	test: 0.9606373	best: 0.9606408 (996)	total: 1m 1s	remaining: 0us

bestTest = 0.9606408206
bestIteration = 996

Shrink model to first 997 iterations.
0:	test: 0.8452308	best: 0.8452308 (0)	total: 38.6ms	remaining: 38.6s
100:	test: 0.9326687	best: 0.9326687 (100)	total: 2.65s	remaining: 23.6s
200:	test: 0.9435579	best: 0.9435579 (200)	total: 5.55s	remaining: 22.1s
300:	test: 0.9481687	best: 0.9481687 (300)	total: 8.44s	remaining: 19.6s
400:	test: 0.9511209	best: 0.9511209 (400)	total: 11.8s	remaining: 17.6s
500:	test: 0.9537035	best: 0.9537035 (500)	total: 15.2s	remaining: 15.2s
600:	test: 0.9556318	best: 0.9556318 (600)	total: 18.7s	remaining: 12.4s
700:	test: 0.9568294	best: 0.9568294 (700)	total: 22.1s	remaining: 9.44s
800:	test: 0.9575944	best: 0.9575962 (799)	total: 25.7s	remaining: 6.38s
900:	test: 0.9583737	best: 0.9583737 (900)	total: 29.3s	remaining: 3.22s


[32m[I 2023-01-11 12:15:28,617][0m Trial 8 finished with value: 0.9589794843526573 and parameters: {'learning_rate': 0.010565846981597665, 'depth': 6}. Best is trial 6 with value: 0.963189721982688.[0m


999:	test: 0.9589795	best: 0.9589795 (999)	total: 32.9s	remaining: 0us

bestTest = 0.9589794844
bestIteration = 999

0:	test: 0.8534399	best: 0.8534399 (0)	total: 101ms	remaining: 1m 40s
100:	test: 0.9505821	best: 0.9505821 (100)	total: 8.56s	remaining: 1m 16s
200:	test: 0.9569664	best: 0.9569664 (200)	total: 18.3s	remaining: 1m 12s
300:	test: 0.9595007	best: 0.9595007 (300)	total: 28.1s	remaining: 1m 5s
400:	test: 0.9608890	best: 0.9608898 (399)	total: 37.9s	remaining: 56.6s
500:	test: 0.9617414	best: 0.9617442 (499)	total: 49.1s	remaining: 48.9s
600:	test: 0.9623224	best: 0.9623224 (600)	total: 59.9s	remaining: 39.7s
700:	test: 0.9625996	best: 0.9626364 (679)	total: 1m 10s	remaining: 30s
800:	test: 0.9628932	best: 0.9629357 (757)	total: 1m 20s	remaining: 20s
900:	test: 0.9627845	best: 0.9629357 (757)	total: 1m 30s	remaining: 9.96s


[32m[I 2023-01-11 12:17:10,522][0m Trial 9 finished with value: 0.9630109809743159 and parameters: {'learning_rate': 0.029144686717713766, 'depth': 10}. Best is trial 6 with value: 0.963189721982688.[0m


999:	test: 0.9629961	best: 0.9630110 (953)	total: 1m 41s	remaining: 0us

bestTest = 0.963010981
bestIteration = 953

Shrink model to first 954 iterations.
0:	test: 0.8534399	best: 0.8534399 (0)	total: 97ms	remaining: 1m 36s
100:	test: 0.9575517	best: 0.9575751 (95)	total: 10.8s	remaining: 1m 35s
200:	test: 0.9606224	best: 0.9608819 (183)	total: 21.8s	remaining: 1m 26s
300:	test: 0.9601762	best: 0.9608819 (183)	total: 32s	remaining: 1m 14s
400:	test: 0.9602706	best: 0.9608819 (183)	total: 42.3s	remaining: 1m 3s
500:	test: 0.9605924	best: 0.9608819 (183)	total: 52.7s	remaining: 52.4s
600:	test: 0.9603030	best: 0.9608819 (183)	total: 1m 2s	remaining: 41.8s
700:	test: 0.9602978	best: 0.9608819 (183)	total: 1m 13s	remaining: 31.2s
800:	test: 0.9606792	best: 0.9608819 (183)	total: 1m 23s	remaining: 20.7s
900:	test: 0.9603971	best: 0.9608819 (183)	total: 1m 34s	remaining: 10.3s


[32m[I 2023-01-11 12:18:55,767][0m Trial 10 finished with value: 0.9608819163219193 and parameters: {'learning_rate': 0.09823668121981591, 'depth': 10}. Best is trial 6 with value: 0.963189721982688.[0m


999:	test: 0.9604606	best: 0.9608819 (183)	total: 1m 44s	remaining: 0us

bestTest = 0.9608819163
bestIteration = 183

Shrink model to first 184 iterations.
0:	test: 0.8534399	best: 0.8534399 (0)	total: 103ms	remaining: 1m 42s
100:	test: 0.9551600	best: 0.9551600 (100)	total: 9.44s	remaining: 1m 24s
200:	test: 0.9592072	best: 0.9592072 (200)	total: 19.8s	remaining: 1m 18s
300:	test: 0.9613217	best: 0.9613217 (300)	total: 31.8s	remaining: 1m 13s


[33m[W 2023-01-11 12:19:30,684][0m Trial 11 failed because of the following error: KeyboardInterrupt('')[0m
Traceback (most recent call last):
  File "/home/hari31416/anaconda3/envs/data-science/lib/python3.9/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_26136/780873226.py", line 15, in objective
    catboost.fit(X_train, y_train, eval_set=(X_test, y_test), cat_features=cat_features)
  File "/home/hari31416/anaconda3/envs/data-science/lib/python3.9/site-packages/catboost/core.py", line 5128, in fit
    self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
  File "/home/hari31416/anaconda3/envs/data-science/lib/python3.9/site-packages/catboost/core.py", line 2355, in _fit
    self._train(
  File "/home/hari31416/anaconda3/envs/data-science/lib/python3.9/site-packages/catboost/core.py", line 1759, in _train
    self._object._tra

KeyboardInterrupt: 

In [60]:
best_params = study.best_params

In [61]:
final_model = CatBoostClassifier(iterations=1000, learning_rate=best_params["learning_rate"], depth=best_params["depth"], loss_function="Logloss", eval_metric="AUC", random_seed=42, use_best_model=True)

In [62]:
final_model.fit(X_train, y_train, eval_set=(X_test, y_test), verbose=100, cat_features=cat_features)

0:	test: 0.8534399	best: 0.8534399 (0)	total: 109ms	remaining: 1m 48s
100:	test: 0.9564442	best: 0.9564442 (100)	total: 9.6s	remaining: 1m 25s
200:	test: 0.9609780	best: 0.9609780 (200)	total: 20s	remaining: 1m 19s
300:	test: 0.9620657	best: 0.9620978 (294)	total: 32.2s	remaining: 1m 14s
400:	test: 0.9626681	best: 0.9627127 (384)	total: 43.4s	remaining: 1m 4s
500:	test: 0.9629969	best: 0.9630757 (494)	total: 54.7s	remaining: 54.4s
600:	test: 0.9631411	best: 0.9631897 (567)	total: 1m 5s	remaining: 43.6s
700:	test: 0.9628981	best: 0.9631897 (567)	total: 1m 17s	remaining: 32.9s
800:	test: 0.9628661	best: 0.9631897 (567)	total: 1m 29s	remaining: 22.2s
900:	test: 0.9627743	best: 0.9631897 (567)	total: 1m 41s	remaining: 11.2s
999:	test: 0.9628268	best: 0.9631897 (567)	total: 1m 53s	remaining: 0us

bestTest = 0.963189722
bestIteration = 567

Shrink model to first 568 iterations.


<catboost.core.CatBoostClassifier at 0x7f6e59c78eb0>

In [63]:
y_pred = final_model.predict(X_test)
print(classification_report(y_test, y_pred))
accuracy_score(y_test, y_pred)

              precision    recall  f1-score   support

           0       0.94      0.99      0.96      5072
           1       0.96      0.76      0.85      1445

    accuracy                           0.94      6517
   macro avg       0.95      0.88      0.91      6517
weighted avg       0.94      0.94      0.94      6517



0.9398496240601504