In [3]:
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.1.1-cp37-none-manylinux1_x86_64.whl (76.6 MB)
[K     |████████████████████████████████| 76.6 MB 1.2 MB/s 
Installing collected packages: catboost
Successfully installed catboost-1.1.1


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
import catboost as cb
from sklearn.datasets import load_digits
from sklearn.metrics import f1_score

In [8]:
data = pd.read_csv("/content/sample_data/fashion-mnist_test.csv", delimiter=",")           #load_digits()
data = data.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
predicts = data.drop(columns=['label'])
target = data['label']
print(predicts, "\n")
print(target, "\n")
print(predicts.shape)

print(predicts.isna().sum())
print(target.isna().sum())


      pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  pixel9  \
0          0       0       0       0       0       0       0       9       8   
1          0       0       0       0       0       0       0       0       0   
2          0       0       0       0       0       0      14      53      99   
3          0       0       0       0       0       0       0       0       0   
4          0       0       0       0       0       0       0       0       0   
...      ...     ...     ...     ...     ...     ...     ...     ...     ...   
9995       0       0       0       0       0       0       0       0       0   
9996       0       0       0       0       0       0       0       0       0   
9997       0       0       0       0       0       0       0       0       0   
9998       0       1       3       0       0       0       0       0       0   
9999       0       0       0       0       0       0       0     140     119   

      pixel10  ...  pixel775  pixel776 

In [9]:
A_train, A_test, y_train, y_test = train_test_split(predicts, target, train_size=0.8)
print(A_train.shape, "\n")
print(A_test.shape, "\n")
print(y_train.shape, "\n")
print(y_test.shape)

(8000, 784) 

(2000, 784) 

(8000,) 

(2000,)


In [10]:
random_forest = RandomForestClassifier(max_depth=15, min_samples_split=10).fit(A_train, y_train)

In [11]:
y_preds_d = random_forest.predict(A_train)
print("F1 мера для тренировочных данных: ", f1_score(y_preds_d, y_train, average='macro'))

F1 мера для тренировочных данных:  0.9814470174322747


In [12]:
y_pred = random_forest.predict(A_test)
print("F1 мера для тестовых данных: ", f1_score(y_pred, y_test, average="macro"))

F1 мера для тестовых данных:  0.8483111507306716


In [13]:
random_forest = RandomForestClassifier()

params_grid = {
    "max_depth": [12, 18],
    "min_samples_leaf": [3, 10],
    "min_samples_split": [6, 12]
}

grid_search_random_forest = GridSearchCV(estimator=random_forest, param_grid=params_grid, scoring="f1_macro", cv = 4)

In [14]:
grid_search_random_forest.fit(A_train, y_train)

GridSearchCV(cv=4, estimator=RandomForestClassifier(),
             param_grid={'max_depth': [12, 18], 'min_samples_leaf': [3, 10],
                         'min_samples_split': [6, 12]},
             scoring='f1_macro')

In [15]:
best_model = grid_search_random_forest.best_estimator_

In [16]:
y_preds_d = best_model.predict(A_train)
print("F1 мера для тренировочных данных: ", f1_score(y_preds_d, y_train, average='macro'))

F1 мера для тренировочных данных:  0.9863513038115788


In [17]:
y_pred = best_model.predict(A_test)
print("F1 мера для тестовых данных: ", f1_score(y_pred, y_test, average="macro"))

F1 мера для тестовых данных:  0.8531535536679927


In [18]:
model_catboost_clf = cb.CatBoostClassifier(iterations=3000, task_type="GPU", devices='0')
model_catboost_clf.fit(A_train, y_train)

Learning rate set to 0.039726
0:	learn: 2.1568508	total: 89.1ms	remaining: 4m 27s
1:	learn: 2.0306829	total: 161ms	remaining: 4m 1s
2:	learn: 1.9244255	total: 225ms	remaining: 3m 45s
3:	learn: 1.8363547	total: 294ms	remaining: 3m 40s
4:	learn: 1.7574849	total: 344ms	remaining: 3m 25s
5:	learn: 1.6864554	total: 382ms	remaining: 3m 10s
6:	learn: 1.6226198	total: 427ms	remaining: 3m 2s
7:	learn: 1.5650288	total: 465ms	remaining: 2m 54s
8:	learn: 1.5131917	total: 515ms	remaining: 2m 51s
9:	learn: 1.4657487	total: 557ms	remaining: 2m 46s
10:	learn: 1.4197535	total: 596ms	remaining: 2m 41s
11:	learn: 1.3787258	total: 636ms	remaining: 2m 38s
12:	learn: 1.3407549	total: 681ms	remaining: 2m 36s
13:	learn: 1.3044738	total: 721ms	remaining: 2m 33s
14:	learn: 1.2716923	total: 760ms	remaining: 2m 31s
15:	learn: 1.2406094	total: 803ms	remaining: 2m 29s
16:	learn: 1.2100937	total: 841ms	remaining: 2m 27s
17:	learn: 1.1817900	total: 879ms	remaining: 2m 25s
18:	learn: 1.1555901	total: 920ms	remaining: 

<catboost.core.CatBoostClassifier at 0x7f553906d790>

In [19]:
y_preds_t = model_catboost_clf.predict(A_train, task_type="CPU")
print("F1 мера для тренировочных данных: ", f1_score(y_preds_t, y_train, average='macro'))

F1 мера для тренировочных данных:  0.9992491695129729


In [20]:
y_preds = model_catboost_clf.predict(A_test, task_type="CPU")
print("F1 мера для тестовых данных: ", f1_score(y_preds, y_test, average='macro'))

F1 мера для тестовых данных:  0.877116474944654
