In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

import livelossplot

from pandas import read_hdf
from pandas import set_option
from pandas import DataFrame

from sklearn.ensemble import VotingClassifier

from sklearn.feature_selection import f_regression
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import VarianceThreshold

from sklearn.metrics import accuracy_score

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

from sklearn.utils import class_weight

from sklearn import mixture
from sklearn import svm

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Step: Load and prepare data

In [2]:
# Load training data HDF5 using Pandas
train_labeled = read_hdf("train_labeled.h5")
train_unlabeled = read_hdf("train_unlabeled.h5")

# Load training data into arrays Y and X
data_labeled = train_labeled.values
data_unlabeled = train_unlabeled.values
y, X = data_labeled[:, 0], data_labeled[:, 1:]
X_unlabeled = data_unlabeled

# Load test (challenge) data HDF5 using Pandas
test = read_hdf("test.h5")

# Load test (challenge) data into array test_X
X_challenge = test.values

print('Data loaded.')

Data loaded.


In [3]:
# Split given data into train and test set
validation_size = 0.1
seed = 7
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=validation_size, random_state=seed)

In [4]:
# Make pipeline: mandatory transformations plus other experiments
pipeline = Pipeline(memory=None,
                    steps=[('std_scaler', StandardScaler())#,
#                            ('poly_features', PolynomialFeatures())#,
#                            ('anova', SelectKBest(f_regression, k=64)), #best: 64 for RandomForest, 200 for Keras
                           ])

In [5]:
pipeline.fit(X_train, y_train)
X_train_transformed = pipeline.transform(X_train)
X_test_transformed = pipeline.transform(X_test)
X_unlabeled_transformed = pipeline.transform(X_unlabeled)
X_challenge_transformed = pipeline.transform(X_challenge)
print('Data ready.')

Data ready.


## Step: Label unlabeled data (DSL, XGB)

In [None]:
from sklearn.ensemble.forest import ExtraTreesClassifier as ExtremeRandomizedTrees
from sklearn.neighbors import KNeighborsClassifier as kNearestNeighbors
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble.forest import RandomForestClassifier
from xgboost.sklearn import XGBClassifier
from deepSuperLearner import *

In [None]:
ERT_learner = ExtremeRandomizedTrees(n_estimators=200, max_depth=None, max_features=1, n_jobs=-1)
kNN_learner = kNearestNeighbors(n_neighbors=11, n_jobs=-1)
LR_learner = LogisticRegression(n_jobs=-1)
RFC_learner = RandomForestClassifier(n_estimators=200, max_depth=None, n_jobs=-1)
XGB_learner = XGBClassifier(n_estimators=200, max_depth=3, learning_rate=1., n_jobs=-1)
Base_learners = {'ExtremeRandomizedTrees':ERT_learner, 'kNearestNeighbors':kNN_learner, 'LogisticRegression':LR_learner,
                 'RandomForestClassifier':RFC_learner, 'XGBClassifier':XGB_learner}

np.random.seed(100)

In [None]:
# DSL Labeling
DSL_learner = DeepSuperLearner(Base_learners, K=5)
DSL_learner.fit(X_train_transformed, y_train, max_iterations=100, sample_weight=None)
y_pred = DSL_learner.predict(X_test_transformed)
predictions = np.argmax(y_pred, axis=1)
accuracy = accuracy_score(y_test, predictions)
print("accuracy: {:.3f}%".format(accuracy*100))
DSL_learner.get_precision_recall(X_test_transformed, y_test, show_graphs=True)    

In [None]:
# XGB Labeling
XGB_model = XGBClassifier(nthread=8, subsample=0.75, n_estimators=225,  max_depth=5)
eval_set = [(X_test_transformed, y_test)]
XGB_model.fit(X_train_transformed, y_train, eval_set=eval_set, eval_metric="mlogloss", verbose=True)
y_pred = XGB_model.predict(X_test_transformed)
acc = accuracy_score(y_test, y_pred)
print(acc)

In [None]:
# Label the unlabeled
y_unlabeled_pred = DSL_learner.predict(X_unlabeled_transformed)
y_unlabeled = np.argmax(y_unlabeled_pred, axis=1)
# y_unlabeled = XGB_model.predict(X_unlabeled_transformed)

## Step: Label with confidence (SVC)

In [None]:
parameters = {'kernel':('linear', 'rbf', 'poly'), 'C':[1, 10, 100], 'degree':[2]}
svc = svm.SVC(gamma='auto', coef0=0.0, shrinking=True, probability=True,
       tol=0.001, cache_size=1024, class_weight='balanced', verbose=True, max_iter=-1,
       decision_function_shape='ovr',random_state=None)

svm_clf = GridSearchCV(svc, parameters, n_jobs=-1, verbose=2)
svm_clf.fit(X_train_transformed, y_train)

print(svm_clf.best_params_)
y_pred = svm_clf.predict(X_test_transformed)
accuracy = accuracy_score(y_test, y_pred)
print("accuracy: {:.3f}%".format(accuracy*100))

In [7]:
svm_clf = svm.SVC(C=2.0, kernel='rbf', degree=2, gamma='auto', coef0=0.0, shrinking=True, probability=True,
              tol=0.001, cache_size=1024, class_weight='balanced', verbose=True, max_iter=-1,
              decision_function_shape='ovo', random_state=None)
svm_clf.fit(X_train_transformed, y_train)
y_pred = svm_clf.predict(X_test_transformed)
accuracy = accuracy_score(y_test, y_pred)
print("accuracy: {:.3f}%".format(accuracy*100))

[LibSVM]accuracy: 93.889%


In [8]:
y_unlabeled_proba = svm_clf.predict_proba(X_unlabeled_transformed)
high_confidence_rows = [i for i in range(y_unlabeled_proba.shape[0]) if
     y_unlabeled_proba[i, np.argmax(y_unlabeled_proba[i])] > 0.9]

y_unlabeled_SVM  = np.argmax(y_unlabeled_proba[high_confidence_rows], axis=1)
X_unlabeled_transformed_SVM = X_unlabeled_transformed[high_confidence_rows]

## GMM: maybe

In [9]:
lowest_bic = np.infty
bic = []
n_components = 10
cv_types = ['spherical', 'tied', 'diag', 'full']
for cv_type in cv_types:
    # Fit a Gaussian mixture with EM
    gmm = mixture.GaussianMixture(n_components=n_components,
                                  covariance_type=cv_type, verbose=2)
    gmm.fit(np.vstack((X_unlabeled_transformed, X_train_transformed)))
    bic.append(gmm.bic(X_unlabeled_transformed))
    if bic[-1] < lowest_bic:
        lowest_bic = bic[-1]
        gmm_clf = gmm

bic = np.array(bic)

Initialization 0
  Iteration 0	 time lapse 0.92890s	 ll change inf
  Iteration 10	 time lapse 0.78200s	 ll change 0.05510
  Iteration 20	 time lapse 0.76606s	 ll change 0.00261
Initialization converged: True	 time lapse 2.79799s	 ll -158.44866
Initialization 0
  Iteration 0	 time lapse 1.48744s	 ll change inf
  Iteration 10	 time lapse 6.26224s	 ll change 0.14595
  Iteration 20	 time lapse 6.26192s	 ll change 0.10069
  Iteration 30	 time lapse 6.24351s	 ll change 0.03363
  Iteration 40	 time lapse 6.34742s	 ll change 0.00605
Initialization converged: True	 time lapse 32.23670s	 ll 67.78926
Initialization 0
  Iteration 0	 time lapse 1.28740s	 ll change inf
  Iteration 10	 time lapse 1.02835s	 ll change 0.00779
  Iteration 20	 time lapse 1.06568s	 ll change 0.00085
Initialization converged: True	 time lapse 3.38155s	 ll -156.14125
Initialization 0
  Iteration 0	 time lapse 2.17216s	 ll change inf
  Iteration 10	 time lapse 11.57017s	 ll change 1.18936
  Iteration 20	 time lapse 11.08355s



In [273]:
means_init = np.array([X_train[np.where(y_train == i)].mean(axis=0) for i in np.unique(y_train)])

In [263]:
weights_init = [0.1 for _ in range(10)]

In [391]:
gmm_clf = mixture.GaussianMixture(n_components=10,
                                  covariance_type='full',
                                  reg_covar=0,
#                                   means_init=means_init,
#                                   weights_init=weights_init,
                                  max_iter=500,
                                  verbose=2,
                                  warm_start=False)

In [392]:
gmm_clf.fit(np.vstack((X_unlabeled_pca, X_train_pca)))

Initialization 0
  Iteration 0	 time lapse 0.55503s	 ll change inf
  Iteration 10	 time lapse 2.20618s	 ll change 0.24864
  Iteration 20	 time lapse 2.22770s	 ll change 0.02131
  Iteration 30	 time lapse 2.63590s	 ll change 0.00593
  Iteration 40	 time lapse 2.82421s	 ll change 0.00242
Initialization converged: True	 time lapse 11.35379s	 ll 134.31449


GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=500,
        means_init=None, n_components=10, n_init=1, precisions_init=None,
        random_state=None, reg_covar=0, tol=0.001, verbose=2,
        verbose_interval=10, warm_start=False, weights_init=None)

In [393]:
y_pred = gmm_clf.predict(X_test_pca)

In [398]:
for i in np.unique(y_train):
    print(i, '*****')
    idx = np.where(y_test == i)
    print(y_pred[idx])



0.0 *****
[0 2 2 0 7 0 0 0 0 8 7 0 0 7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 2 0 0 7 0 7 7 0 0 0 0 0 7 0 0 7 0 0 0 0 0 0 8 0 0 7 0
 0]
1.0 *****
[2 1 4 1 4 2 8 1 8 1 1 2 8 8 1 2 1 8 8 1 2 2 2 1 1 2 3 1 4 1 2 4 1 1 1 2 1
 2 1 1 2 1 4 2 2 1 2 1 4 4 8 1 1 1 2 1 2 1 1 4 2 1 9 1 8 1 1 1 1 1 1 1 1 4
 4 1 8 4 1 2 1 4 1 2 4 1 2 1 4 1 1 2 1 1 8]
2.0 *****
[3 7 3 7 3 2 7 7 2 3 7 3 3 8 2 7 3 3 3 2 3 3 3 7 3 3 7 3 3 7 3 3 9 3 7 3 3
 7 3 3 3 8 3 7 7 8 7 3 3 7 7 7 7 3 7 7 7 3 3 7 3 7 3 7 3 7 7 3 3 7 3 7 3 3
 8 8 3 3 2 3 7 3 3 7 7 7 0 3 7 7]
3.0 *****
[8 8 3 3 3 8 3 8 7 3 3 3 2 2 3 3 2 3 8 8 3 3 3 3 3 8 3 8 3 3 3 3 8 2 7 3 2
 2 3 7 2 3 3 3 2 3 3 2 3 3 2 7 3 3 3 8 7 7 3 2 3 3 8 3 3 2 3 3 3 3 8 8 3 3
 3 8 3 3 8 3 3 3 7 3]
4.0 *****
[7 7 9 2 7 9 7 9 9 7 7 7 9 7 9 9 7 9 2 7 7 8 7 9 2 7 7 7 9 7 7 2 3 9 7 9 9
 9 9 9 9 9 9 2 9 9 7 7 2 9 7 7 2 9 2 7 9 9 9 7 7 7 9 9 9 9 7 9 9 7 8 9 9 2
 7 9 9 9 7 9 9 7 7 9 2 2 9 9 7 9 9 7 9 9 9 7 9 9 7 7]
5.0 *****
[7 7 8 7 7 8 8 7 7 8 8 8 8 3 8 8

In [388]:
from sklearn.decomposition import PCA
pca = PCA(n_components=32)

In [389]:
pca.fit(np.vstack((X_train, X_unlabeled)))

PCA(copy=True, iterated_power='auto', n_components=32, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [390]:
X_train_pca = pca.transform(X_train)
X_unlabeled_pca = pca.transform(X_unlabeled)
X_test_pca = pca.transform(X_test)

In [347]:
accuracy = accuracy_score(y_test, y_pred)
print("accuracy: {:.3f}%".format(accuracy*100))

accuracy: 16.889%


In [397]:
y_pred[y_pred == 6] = 9

In [225]:
y_pred[y_pred == 2] = 6

In [226]:
y_pred[y_pred == 9] = 7

In [227]:
y_pred[y_pred == 5] = 8

In [218]:
y_gmm = gmm_clf.predict(X_train)

In [220]:
for i in np.unique(y_train):
    print(i, '*****')
    idx = np.where(y_train == i)
    for j in np.unique(y_train):
        if np.argmax(y_proba[idx]) > 0.995:
            print(j, np.count_nonzero(y_gmm[idx] == j))


0.0 *****
0.0 653
1.0 0
2.0 0
3.0 1
4.0 10
5.0 63
6.0 60
7.0 3
8.0 1
9.0 0
1.0 *****
0.0 0
1.0 829
2.0 0
3.0 0
4.0 23
5.0 30
6.0 0
7.0 0
8.0 5
9.0 1
2.0 *****
0.0 4
1.0 7
2.0 6
3.0 3
4.0 44
5.0 134
6.0 25
7.0 27
8.0 556
9.0 7
3.0 *****
0.0 1
1.0 5
2.0 9
3.0 404
4.0 34
5.0 327
6.0 9
7.0 36
8.0 14
9.0 2
4.0 *****
0.0 0
1.0 2
2.0 7
3.0 1
4.0 266
5.0 184
6.0 13
7.0 193
8.0 8
9.0 44
5.0 *****
0.0 1
1.0 6
2.0 11
3.0 245
4.0 257
5.0 127
6.0 39
7.0 23
8.0 3
9.0 7
6.0 *****
0.0 3
1.0 4
2.0 682
3.0 7
4.0 25
5.0 13
6.0 82
7.0 2
8.0 7
9.0 0
7.0 *****
0.0 2
1.0 9
2.0 0
3.0 0
4.0 22
5.0 141
6.0 8
7.0 47
8.0 3
9.0 618
8.0 *****
0.0 0
1.0 29
2.0 0
3.0 41
4.0 297
5.0 342
6.0 49
7.0 66
8.0 4
9.0 6
9.0 *****
0.0 4
1.0 3
2.0 0
3.0 4
4.0 34
5.0 253
6.0 9
7.0 168
8.0 0
9.0 346


In [212]:
y_proba = gmm_clf.predict_proba(X_train)
high_confidence_rows = [i for i in range(y_proba.shape[0]) if
     y_proba[i, np.argmax(y_proba[i])] > 0.99]

In [213]:
len(high_confidence_rows)

7034

In [204]:
np.argmax(y_proba[high_confidence_rows], axis=1)

array([7, 0, 0, 6, 8, 2, 7, 6, 3, 2, 1, 1, 3, 2, 7, 2, 2, 2, 7, 9, 9, 1,
       9, 5, 1, 7, 6, 2, 7, 1, 6, 4, 9, 6, 1, 0, 9, 0, 3, 9, 7, 6, 8, 9,
       1, 1, 1, 8, 3, 3, 8, 2, 9, 8, 3, 0, 7, 7, 3, 9, 2, 9, 6, 3, 4, 7,
       1, 3, 9, 6, 0, 0, 8, 1, 8, 6, 2, 4, 0, 7, 7, 3, 2, 9, 1, 2, 9, 1,
       7, 1, 7, 1, 9, 5, 9, 8, 4, 4, 5, 6, 9, 1, 1, 6, 1, 0, 0, 1, 3, 6,
       6, 2, 1, 2, 6, 4, 6, 7, 1, 5, 4, 1, 1, 4, 9, 2, 0, 0, 5, 6, 7, 7,
       4, 7, 7, 2, 9, 9, 7, 4, 6, 9, 1, 9, 8, 3, 7, 8, 9, 1, 0, 9, 8, 9,
       6, 2, 9, 9, 1, 1, 6, 1, 7, 1, 9, 0, 9, 9, 9, 1, 1, 0, 9, 9, 9, 9,
       4, 8, 4, 1, 9, 3, 1, 1, 6, 4, 2, 9, 7, 7, 7, 8, 8, 4, 8, 8, 7, 8,
       3, 1, 5, 9, 5, 4, 3, 7, 9, 7, 9, 8, 3, 8, 8, 9, 1, 3, 8, 7, 2, 8,
       6, 9, 9, 9, 9, 8, 1, 6, 7, 6, 1, 1, 3, 2, 6, 3, 3, 1, 9, 8, 3, 3,
       9, 4, 1, 9, 8, 3, 7, 7, 2, 3, 0, 6, 2, 4, 0, 2, 1, 0, 1, 2, 7, 8,
       2, 1, 3, 3, 8, 1, 6, 3, 6, 9, 3, 4, 9, 7, 7, 1, 1, 9, 2, 7, 8, 7,
       1, 1, 4, 1, 9, 7, 3, 1, 9, 3, 9, 3, 3, 1, 6,

In [130]:
b_gmm_clf = mixture.BayesianGaussianMixture(n_components=10,
                                            covariance_type='full',
                                            weight_concentration_prior_type='dirichlet_distribution',
                                            reg_covar=0,
                                            init_params='random',
                                            mean_precision_prior=.8,
                                            n_init=1,
                                            max_iter=2000,
                                            verbose=2,
                                            warm_start=True)
b_gmm_clf.weight_concentration_prior = 1

In [131]:
b_gmm_clf.fit(np.vstack((X_unlabeled_transformed, X_train_transformed)))

Initialization 0
  Iteration 0	 time lapse 1.61027s	 ll change inf
  Iteration 10	 time lapse 11.20518s	 ll change 18294.91827
  Iteration 20	 time lapse 11.24981s	 ll change 15150.65028
  Iteration 30	 time lapse 11.43823s	 ll change 1265.55470
  Iteration 40	 time lapse 11.49076s	 ll change 3266.55054
  Iteration 50	 time lapse 11.42150s	 ll change 2803.42967
  Iteration 60	 time lapse 11.39112s	 ll change 2291.23896
  Iteration 70	 time lapse 11.32693s	 ll change 1457.15225
  Iteration 80	 time lapse 16.48973s	 ll change 264.15707
  Iteration 90	 time lapse 12.02077s	 ll change 334.91379
  Iteration 100	 time lapse 12.16268s	 ll change 15.55278
  Iteration 110	 time lapse 11.66629s	 ll change 4.87165
  Iteration 120	 time lapse 11.52492s	 ll change 2.19827
  Iteration 130	 time lapse 12.88033s	 ll change 1.75626
  Iteration 140	 time lapse 12.69557s	 ll change 0.01897
  Iteration 150	 time lapse 12.31239s	 ll change 10.09222
  Iteration 160	 time lapse 23.75982s	 ll change 0.08845
 

BayesianGaussianMixture(covariance_prior=None, covariance_type='full',
            degrees_of_freedom_prior=None, init_params='random',
            max_iter=2000, mean_precision_prior=0.8, mean_prior=None,
            n_components=10, n_init=1, random_state=None, reg_covar=0,
            tol=0.001, verbose=2, verbose_interval=10, warm_start=True,
            weight_concentration_prior=1,
            weight_concentration_prior_type='dirichlet_distribution')

In [126]:
y_b_gmm = b_gmm_clf.predict(X_train_transformed)

In [127]:
for i in np.unique(y_train):
    idx = np.where(y_train == i)
    print(i, idx[:10])
    print(y_b_gmm[idx])

0.0 (array([  28,   34,   50,   57,   58,   80,   87,   93,  102,  104,  116,
        120,  124,  136,  140,  161,  170,  175,  177,  179,  183,  196,
        202,  215,  220,  229,  238,  242,  265,  272,  274,  278,  287,
        289,  316,  359,  363,  370,  373,  389,  402,  408,  414,  418,
        428,  440,  443,  471,  477,  487,  498,  499,  528,  550,  560,
        572,  603,  622,  645,  666,  674,  678,  695,  712,  723,  726,
        733,  754,  760,  766,  770,  777,  779,  792,  808,  817,  822,
        824,  825,  828,  833,  841,  848,  862,  880,  883,  911,  938,
        951,  962,  963,  984,  999, 1005, 1025, 1032, 1034, 1044, 1045,
       1059, 1065, 1070, 1089, 1101, 1105, 1112, 1120, 1155, 1196, 1203,
       1216, 1224, 1230, 1231, 1247, 1249, 1253, 1254, 1260, 1272, 1277,
       1279, 1285, 1297, 1334, 1341, 1342, 1349, 1405, 1406, 1443, 1444,
       1445, 1464, 1465, 1474, 1477, 1484, 1510, 1516, 1517, 1518, 1523,
       1524, 1527, 1553, 1563, 1566, 1575, 157

In [23]:
select = np.vectorize(lambda x: x==1.0)
y_train[select(1)]

array([[7., 3., 9., ..., 4., 9., 5.]], dtype=float32)

In [33]:
y_train[4]

1.0

## LightGBM

In [None]:
import lightgbm as lgb

In [None]:
lgb_clf = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=-1,
                             learning_rate=0.1, n_estimators=250, subsample_for_bin=200000,
                             objective='multiclass', class_weight='balanced', min_split_gain=0.0,
                             min_child_weight=0.001, min_child_samples=20, subsample=1.0,
                             subsample_freq=0, colsample_bytree=1.0, reg_alpha=0,
                             reg_lambda=0, random_state=None, n_jobs=-1, silent=True)

In [None]:
lgb_clf.fit(X_train_transformed, y_train, eval_set=[(X_test_transformed, y_test)],
           early_stopping_rounds=4)

y_pred = lgb_clf.predict(X_test_transformed)
accuracy = accuracy_score(y_test, y_pred)
print("accuracy: {:.3f}%".format(accuracy*100))

In [None]:
y_unlabeled_proba = lgb_clf.predict_proba(X_unlabeled_transformed)
high_confidence_rows = [i for i in range(y_unlabeled_proba.shape[0]) if
     y_unlabeled_proba[i, np.argmax(y_unlabeled_proba[i])] > 0.9]

y_unlabeled_LGB  = np.argmax(y_unlabeled_proba[high_confidence_rows], axis=1)
X_unlabeled_transformed_LGB = X_unlabeled_transformed[high_confidence_rows]

## Step: Stack labeled and unlabeled

In [None]:
# Stack all training data together for DNN
X_both_transformed = np.vstack((X_train_transformed, X_unlabeled_transformed_SVM))#, X_unlabeled_transformed_LGB))
y_both = np.hstack((y_train, y_unlabeled_SVM))#, y_unlabeled_LGB))

## Step: NNs for combined data

### DNN

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, GaussianNoise
from keras.layers.normalization import BatchNormalization
from keras.optimizers import *
from keras.wrappers.scikit_learn import KerasClassifier

from livelossplot import PlotLossesKeras

y_both_categorical = keras.utils.to_categorical(y_both, num_classes=10)
y_test_categorical = keras.utils.to_categorical(y_test, num_classes=10)

callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='auto'),
             PlotLossesKeras()]

def dnn_model():
    model = Sequential()

    model.add(GaussianNoise(0.0, input_shape=(128,)))

    model.add(Dense(1024, kernel_initializer='glorot_normal', bias_initializer='zeros'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

#     model.add(GaussianNoise(0.5))
    
    model.add(Dense(2048, kernel_initializer='glorot_normal', bias_initializer='zeros'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

#     model.add(GaussianNoise(0.5))
    
    model.add(Dense(1024, kernel_initializer='glorot_normal', bias_initializer='zeros'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(512, kernel_initializer='glorot_normal', bias_initializer='zeros'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(10, kernel_initializer='glorot_normal', bias_initializer='zeros'))
    model.add(Activation('softmax'))
    
    sgd = SGD(lr=0.025, decay=1e-6, momentum=0.9, nesterov=True)
    adam = Adam()
    adadelta = Adadelta()
    nadam = Nadam()
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
#                   optimizer=adam,
#                   optimizer=nadam,
                  metrics=['accuracy'])
    
    return model

In [None]:
model = dnn_model()

model.fit(X_both_transformed, y_both_categorical,
          epochs=100,
          callbacks=callbacks,
          validation_data=(X_test_transformed, y_test_categorical),
          batch_size=128,
          verbose=0)

score = model.evaluate(X_test_transformed, y_test_categorical, batch_size=64)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
lgb_clf.fit(X_both_transformed, y_both, eval_set=[(X_test_transformed, y_test)],
           early_stopping_rounds=10)

y_pred = lgb_clf.predict(X_test_transformed)
accuracy = accuracy_score(y_test, y_pred)
print("accuracy: {:.3f}%".format(accuracy*100))

In [None]:
dnn_list = [KerasClassifier(build_fn=dnn_model, epochs=75, batch_size=64, verbose=0, callbacks=callbacks,
            validation_data=(X_test_transformed, y_test_categorical))
            for _ in range(10)]

In [None]:
for dnn in dnn_list:
    dnn.fit(X_both_transformed, y_both_categorical)

In [None]:
y_predict = np.zeros((len(X_test_transformed),10))
for dnn in dnn_list:
    pred = dnn.predict_proba(X_test_transformed)
    y_predict += pred
y_predict /= len(dnn_list)
y_predict = np.argmax(y_predict, axis=1)
accuracy = accuracy_score(y_test, y_predict)
print("accuracy: {:.3f}%".format(accuracy*100))

### CNN

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv1D, MaxPooling1D
from keras import backend as K
from livelossplot import PlotLossesKeras

batch_size = 64
num_classes = 10
epochs = 25
patience = 1

# input dimensions
n_features = 128

if K.image_data_format() == 'channels_first':
    x_train_ = X_both_transformed.reshape(X_both_transformed.shape[0], 1, n_features)
    x_test_ = X_test_transformed.reshape(X_test_transformed.shape[0], 1, n_features)
    x_challenge_ = X_challenge_transformed.reshape(X_challenge_transformed.shape[0], 1, n_features)
    input_shape = (1, n_features)
else:
    x_train_ = X_both_transformed.reshape(X_both_transformed.shape[0], n_features, 1)
    x_test_ = X_test_transformed.reshape(X_test_transformed.shape[0], n_features, 1)
    x_challenge_ = X_challenge_transformed.reshape(X_challenge.shape[0], n_features, 1)
    input_shape = (n_features, 1)

print('x_train shape:', x_train_.shape)
print(x_train_.shape[0], 'train samples')
print(x_test_.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train_ = keras.utils.to_categorical(y_both, num_classes)
y_test_ = keras.utils.to_categorical(y_test, num_classes)

cnn = Sequential()

cnn.add(GaussianNoise(0.5, input_shape=input_shape))

cnn.add(Conv1D(128, kernel_size=8))
cnn.add(BatchNormalization())
cnn.add(Activation('relu'))

cnn.add(Conv1D(512, 4))
cnn.add(BatchNormalization())
cnn.add(Activation('relu'))

cnn.add(Conv1D(1024, 2))
cnn.add(BatchNormalization())
cnn.add(Activation('relu'))

cnn.add(MaxPooling1D(pool_size=2))

cnn.add(Dropout(0.5))

cnn.add(Flatten())



cnn.add(Dense(1024, kernel_initializer='glorot_normal', bias_initializer='zeros'))
cnn.add(BatchNormalization())
cnn.add(Activation('relu'))
cnn.add(Dropout(0.5))

cnn.add(Dense(512, kernel_initializer='glorot_normal', bias_initializer='zeros'))
cnn.add(BatchNormalization())
cnn.add(Activation('relu'))
cnn.add(Dropout(0.5))

cnn.add(Dense(num_classes, activation='softmax'))

cnn.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=patience, verbose=1, mode='auto'),
             PlotLossesKeras()]

cnn.fit(x_train_, y_train_,
          batch_size=batch_size,
          epochs=epochs,
          callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=patience, verbose=1, mode='auto'),
                       PlotLossesKeras()],
          verbose=1,
          validation_data=(x_test_, y_test_))
score = cnn.evaluate(x_test_, y_test_, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Step: Output

In [10]:
# Generate prediction
y_predict = svm_clf.predict(X_challenge_transformed)

# Write prediction to output file
filename = 'prediction_svm.csv'
result = DataFrame(y_predict)
result.index = result.index + len(data_labeled) + len(data_unlabeled)
result.to_csv(filename, index_label='Id', header=['y'])

In [None]:
# Generate prediction
y_predict = model.predict(X_challenge_transformed)

# y_predict = cnn.predict(x_challenge_)

# Write prediction to output file
filename = 'prediction_labeled.csv'
result = DataFrame(np.argmax(y_predict, axis=1))
result.index = result.index + len(data_labeled) + len(data_unlabeled)
result.to_csv(filename, index_label='Id', header=['y'])

In [None]:
y_predict = np.zeros((len(X_challenge_transformed),10))
for dnn in dnn_list:
    pred = dnn.predict_proba(X_challenge_transformed)
    y_predict += pred
y_predict /= len(dnn_list)

# Write prediction to output file
filename = 'prediction_labeled_bag.csv'
result = DataFrame(np.argmax(y_predict, axis=1))
result.index = result.index + len(data_labeled) + len(data_unlabeled)
result.to_csv(filename, index_label='Id', header=['y'])
