In [None]:
import numpy as np
import os
import pandas as pd
from skimage.io import imread
from sklearn.linear_model import SGDClassifier
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

In [None]:
cropdat_path = './data/train/all_cropped/'
augmented_path = './data/train/all_augmented/'
cropped_testdat_path = './data/test_cropped/'
submissions_path = './submissions/'

### Load Data

In [None]:
%%time
conv_type = {'ER': 1, 'NR': 0}
conv_nrj = {'1': 0, '3': 1, '6': 2, '10': 3, '20': 4, '30': 5}

X_crp = []
y_crp_typ = []
y_crp_nrj = []
for filename in os.listdir(cropdat_path):
    img = imread(cropdat_path + filename)    
    X_crp.append(img.flatten())
    fn_parts = filename.split('-')
    y_crp_typ.append(conv_type[fn_parts[1]]) 
    y_crp_nrj.append(conv_nrj[fn_parts[2].split('.')[0]])
print(len(X_crp))
print(len(y_crp_typ))
print(len(y_crp_nrj))

X_test_crp = []
X_test_labels = []
for filename in os.listdir(cropped_testdat_path):
    img = imread(cropped_testdat_path + filename)    
    X_test_crp.append(img.flatten())
    X_test_labels.append(filename.split('.')[0])
print(len(X_test_crp))
print(len(X_test_labels))

X_crp = np.array(X_crp)
y_crp_typ = np.array(y_crp_typ)
y_crp_nrj = np.array(y_crp_nrj)
X_test_crp = np.array(X_test_crp)

### Prepare Submition

In [None]:
sub_all = pd.DataFrame(X_test_labels, columns=['id'])
sub_all.head()

# 1. Binary Classification

### SGD

In [None]:
%%time
clf2_sgd = SGDClassifier(
    alpha=0.1, 
    l1_ratio=0.05, 
    loss='hinge', 
    penalty='elasticnet',
    n_jobs=-1,
    random_state=125)
clf2_sgd.fit(X_crp, y_crp_typ)
y2_sgd = clf2_sgd.predict(X_test_crp)

### Random Forest

In [None]:
%%time
clf2_rf = RandomForestClassifier(
    criterion='gini', 
    max_depth=8, 
    max_features='auto', 
    n_estimators=800,
    n_jobs=-1,
    random_state=125)
clf2_rf.fit(X_crp, y_crp_typ)
y2_rf = clf2_rf.predict(X_test_crp)

### CatBoost

In [None]:
%%time
clf2_cb = CatBoostClassifier(
    depth=6,
    iterations=20000,
    early_stopping_rounds=500,
    l2_leaf_reg=1e-20,
    leaf_estimation_iterations=10,
    logging_level='Silent',
    loss_function='Logloss',
    eval_metric='AUC',
    boosting_type='Plain',
    task_type='GPU',
    random_state=125)
clf2_cb.fit(X_crp, y_crp_typ)
y2_cb = clf2_cb.predict(X_test_crp)

### XGBoost

In [None]:
%%time
clf2_xgb = XGBClassifier(
    n_estimators=800,
    colsample_bytree=1.0,
    gamma=1,
    max_depth=3,
    min_child_weight=1,
    subsample=1.0,
    eval_metric='auc', 
    use_label_encoder=False,
    tree_method='gpu_hist',
    predictor='gpu_predictor',
    random_state=125)
clf2_xgb.fit(X_crp, y_crp_typ)
y2_xgb = clf2_xgb.predict(X_test_crp)

### SVM

In [None]:
%%time
clf2_svc = svm.SVC(
    C=0.1,
    gamma='scale',
    kernel='rbf',
    random_state=125)
clf2_svc.fit(X_crp, y_crp_typ)
y2_svc = clf2_svc.predict(X_test_crp)

# 2. Six-classes Classification

### SGD

In [None]:
%%time
clf6_sgd = SGDClassifier(
    alpha=0.1, 
    l1_ratio=0.05, 
    loss='log', 
    penalty='elasticnet',
    n_jobs=-1,
    random_state=125)
clf6_sgd.fit(X_crp, y_crp_nrj)
y6_sgd = clf6_sgd.predict(X_test_crp)

### RandomForestClassifier

In [None]:
%%time
clf6_rf = RandomForestClassifier(
    criterion='gini', 
    max_depth=8, 
    max_features='auto', 
    n_estimators=800,
    n_jobs=-1,
    random_state=125)
clf6_rf.fit(X_crp, y_crp_nrj)
y6_rf = clf6_rf.predict(X_test_crp)

### CatBoostClassifier

In [None]:
%%time
clf6_cb = CatBoostClassifier(
    depth=6,
    iterations=20000,
    early_stopping_rounds=500,
    l2_leaf_reg=1e-20,
    leaf_estimation_iterations=10,
    logging_level='Silent',
    loss_function='MultiClass',
    eval_metric='MultiClass',
    boosting_type='Plain',
    task_type='GPU',
    random_state=125)
clf6_cb.fit(X_crp, y_crp_nrj)
y6_cb = clf6_cb.predict(X_test_crp)

### XGBoost

In [None]:
%%time
clf6_xgb = XGBClassifier(
    objective='multi:softmax',
    num_classes=6,
    n_estimators=500,
    colsample_bytree=1.0,
    gamma=1,
    max_depth=3,
    min_child_weight=1,
    subsample=1.0,
    eval_metric='mlogloss', 
    use_label_encoder=False,
    tree_method='gpu_hist',
    predictor='gpu_predictor',
    n_jobs=-1,
    random_state=125)
clf6_xgb.fit(X_crp, y_crp_nrj)
y6_xgb = clf6_xgb.predict(X_test_crp)

### SVM

In [None]:
%%time
clf6_svc = OneVsRestClassifier(svm.SVC(
        C=0.1,
        gamma='scale',
        kernel='rbf',
        probability=True,
        random_state=125),
    n_jobs=-1)
clf6_svc.fit(X_crp, y_crp_nrj)
y6_svc = clf6_svc.predict(X_test_crp)