In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from catboost import CatBoostClassifier
import os

base_dir = "./inputs"
files = os.listdir(base_dir)
files

['bank-full.csv', 'sample_submission.csv', 'test.csv', 'train.csv']

In [2]:
train_df = pd.read_csv(os.path.join(base_dir, files[3])).set_index("id")
target = train_df["y"]
train_df.drop("y", axis=1, inplace=True)
orig_df = pd.read_csv(os.path.join(base_dir, files[0]))
actual = orig_df["y"]
orig_df.drop("y", axis=1, inplace=True)

In [3]:
categories = train_df.select_dtypes(include=["object"]).columns
non_categories = train_df.select_dtypes(exclude=["object"]).columns

In [4]:
# Encode Categorical data
le = LabelEncoder()
for column in categories:
    train_df[column] = le.fit_transform(train_df[column])
    orig_df[column] = le.fit_transform(orig_df[column])
actual = le.fit_transform(actual)

In [5]:
# Scale non_categorical columns
sdt = StandardScaler()
# train_df[non_categories] = sdt.fit_transform(train_df[non_categories])
# orig_df[non_categories] = sdt.fit_transform(orig_df[non_categories])
train_df = sdt.fit_transform(train_df)
orig_df = sdt.fit_transform(orig_df)

In [12]:
X_train = train_df
X_test = orig_df
y_train = target
y_test = actual

In [13]:
model = CatBoostClassifier()
model.fit(X_train, y_train,inti_model="./saved_models/model.bin")
model.save_model("./saved_models/model.bin")
predictions = model.predict(X_test)

TypeError: CatBoostClassifier.fit() got an unexpected keyword argument 'inti_model'

In [8]:
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)
matrix = confusion_matrix(y_test, predictions)
print(accuracy)
print(report)
print(matrix)

0.8948707173033111
              precision    recall  f1-score   support

           0       0.95      0.93      0.94     39922
           1       0.55      0.60      0.57      5289

    accuracy                           0.89     45211
   macro avg       0.75      0.77      0.76     45211
weighted avg       0.90      0.89      0.90     45211

[[37276  2646]
 [ 2107  3182]]


In [9]:
X_train, X_test, y_train, y_test = train_test_split(train_df, target, test_size=0.2, random_state=42)

In [10]:
model = CatBoostClassifier()
model.fit(X_train, y_train,init_model="./saved_models/model.bin")
model.save_model("./saved_models/model.bin")
predictions = model.predict(X_test)

Learning rate set to 0.158199
0:	learn: 0.1350669	total: 69.2ms	remaining: 1m 9s
1:	learn: 0.1350506	total: 139ms	remaining: 1m 9s
2:	learn: 0.1350232	total: 200ms	remaining: 1m 6s
3:	learn: 0.1350073	total: 261ms	remaining: 1m 5s
4:	learn: 0.1349923	total: 338ms	remaining: 1m 7s
5:	learn: 0.1349789	total: 395ms	remaining: 1m 5s
6:	learn: 0.1349685	total: 461ms	remaining: 1m 5s
7:	learn: 0.1349404	total: 528ms	remaining: 1m 5s
8:	learn: 0.1349253	total: 605ms	remaining: 1m 6s
9:	learn: 0.1349116	total: 678ms	remaining: 1m 7s
10:	learn: 0.1349023	total: 747ms	remaining: 1m 7s
11:	learn: 0.1348868	total: 819ms	remaining: 1m 7s
12:	learn: 0.1348676	total: 886ms	remaining: 1m 7s
13:	learn: 0.1348538	total: 948ms	remaining: 1m 6s
14:	learn: 0.1348385	total: 1.01s	remaining: 1m 6s
15:	learn: 0.1348279	total: 1.08s	remaining: 1m 6s
16:	learn: 0.1348068	total: 1.15s	remaining: 1m 6s
17:	learn: 0.1347914	total: 1.22s	remaining: 1m 6s
18:	learn: 0.1347740	total: 1.28s	remaining: 1m 6s
19:	learn:

In [11]:
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)
matrix = confusion_matrix(y_test, predictions)
print(accuracy)
print(report)
print(matrix)

0.94082
              precision    recall  f1-score   support

           0       0.96      0.97      0.97    131795
           1       0.79      0.70      0.74     18205

    accuracy                           0.94    150000
   macro avg       0.87      0.84      0.85    150000
weighted avg       0.94      0.94      0.94    150000

[[128451   3344]
 [  5533  12672]]
