In [1]:
import numpy as np
import os
import pandas as pd
from skimage.io import imread
from catboost import CatBoostClassifier

In [2]:
cropdat_path = './data/train/all_cropped/'
augmented_path = './data/train/all_augmented/'
cropped_testdat_path = './data/test_cropped/'
submissions_path = './submissions/'

### Load Data

In [3]:
%%time
conv_type = {'ER': 1, 'NR': 0}
conv_nrj = {'1': 0, '3': 1, '6': 2, '10': 3, '20': 4, '30': 5}

X_crp = []
y_crp_typ = []
y_crp_nrj = []
for filename in os.listdir(cropdat_path):
    img = imread(cropdat_path + filename)    
    X_crp.append(img.flatten())
    fn_parts = filename.split('-')
    y_crp_typ.append(conv_type[fn_parts[1]]) 
    y_crp_nrj.append(conv_nrj[fn_parts[2].split('.')[0]])
print(len(X_crp))
print(len(y_crp_typ))
print(len(y_crp_nrj))

X_test_crp = []
X_test_labels = []
for filename in os.listdir(cropped_testdat_path):
    img = imread(cropped_testdat_path + filename)    
    X_test_crp.append(img.flatten())
    X_test_labels.append(filename.split('.')[0])
print(len(X_test_crp))
print(len(X_test_labels))

X_crp = np.array(X_crp)
y_crp_typ = np.array(y_crp_typ)
y_crp_nrj = np.array(y_crp_nrj)
X_test_crp = np.array(X_test_crp)

13404
13404
13404
16564
16564
Wall time: 9.21 s


# 1. Binary Classification

### CatBoost

In [4]:
%%time
clf2_cb = CatBoostClassifier(
    depth=6,
    iterations=800,
    early_stopping_rounds=500,
    l2_leaf_reg=1e-20,
    leaf_estimation_iterations=10,
    logging_level='Silent',
    loss_function='Logloss',
    eval_metric='AUC',
    boosting_type='Plain',
    random_state=125)
clf2_cb.fit(X_crp, y_crp_typ)

Wall time: 1min 7s


<catboost.core.CatBoostClassifier at 0x181016694c8>

In [5]:
%%time
y2_cb = clf2_cb.predict(X_test_crp)

Wall time: 7.46 s


# 2. Six-classes Classification

### CatBoost

In [6]:
%%time
clf6_cb = CatBoostClassifier(
    depth=6,
    iterations=800,
    early_stopping_rounds=500,
    l2_leaf_reg=1e-20,
    leaf_estimation_iterations=10,
    logging_level='Silent',
    loss_function='MultiClass',
    eval_metric='MultiClass',
    boosting_type='Plain',
    task_type='GPU',
    random_state=125)
clf6_cb.fit(X_crp, y_crp_nrj)

Wall time: 45.7 s


<catboost.core.CatBoostClassifier at 0x180aff7c188>

In [7]:
%%time
y6_cb = clf6_cb.predict(X_test_crp)

Wall time: 7.47 s


In [8]:
clf6_cb.save_model('./saved_models/cat6.cbm')
clf2_cb.save_model('./saved_models/cat2.cbm')

# 3. Form Submission

In [None]:
sub_bin = pd.DataFrame(X_test_labels, columns=['id'])
sub_six = pd.DataFrame(X_test_labels, columns=['id'])
sub_cat = pd.DataFrame(X_test_labels, columns=['id'])
sub_xgb = pd.DataFrame(X_test_labels, columns=['id'])
sub_mode = pd.DataFrame(X_test_labels, columns=['id'])

In [None]:
y6_cat = np.stack(y6_cb, axis=1)[0]

In [None]:
conv_six = {0: 1, 1: 3, 2: 6, 3: 10, 4: 20, 5: 30}

sub_bin['y2_sgd'] = y2_sgd
sub_bin['y2_rf'] = y2_rf
sub_bin['y2_cb'] = y2_cb
sub_bin['y2_xgb'] = y2_xgb
sub_bin['y2_svc'] = y2_svc
sub_bin.drop('id', 1, inplace=True)

sub_six['y6_sgd'] = list(map(lambda x: conv_six[x], y6_sgd))
sub_six['y6_rf'] = list(map(lambda x: conv_six[x], y6_rf))
sub_six['y6_cb'] = list(map(lambda x: conv_six[x], y6_cat))
sub_six['y6_xgb'] = list(map(lambda x: conv_six[x], y6_xgb))
sub_six['y6_svc'] = list(map(lambda x: conv_six[x], y6_svc))
sub_six.drop('id', 1, inplace=True)

sub_bin.head()

In [None]:
sub_six.head()

In [None]:
sub_cat['classification_predictions'] = sub_bin['y2_cb']
sub_cat['regression_predictions'] = sub_six['y6_cb']

sub_xgb['classification_predictions'] = sub_bin['y2_xgb']
sub_xgb['regression_predictions'] = sub_six['y6_xgb']

sub_mode['classification_predictions'] = sub_bin.mode(axis=1)[0].astype('int32')
sub_mode['regression_predictions'] = sub_six.mode(axis=1)[0].astype('int32')
sub_mode.head()

In [None]:
sub_cat.head()

In [None]:
sub_xgb.head()

In [None]:
sub_mode.to_csv(submissions_path + 'basic_modes.csv', index=False)
sub_cat.to_csv(submissions_path + 'basic_cat.csv', index=False)
sub_xgb.to_csv(submissions_path + 'basic_xgb.csv', index=False)

In [None]:
sub_rf = pd.DataFrame(X_test_labels, columns=['id'])
sub_rf['classification_predictions'] = sub_bin['y2_rf']
sub_rf['regression_predictions'] = sub_six['y6_rf']
sub_rf.head()

In [None]:
sub_rf.to_csv(submissions_path + 'basic_rf.csv', index=False)

## Yandex Contest Scores
-  rf - 46.07 
- cat - 663.1
- xgb - 579.88
- modes - 537.95