- ref. 

In [145]:
# load data
import pandas as pd, numpy as np
pd.set_option("display.width", 80)
pd.set_option("display.max_rows", 20)
import seaborn as sns, matplotlib.pyplot as plt
import multiprocessing as mp
n_cpus = mp.cpu_count()

# train_df = pd.read_csv("./train.csv")   # 42000
train_df = pd.read_csv("./train.csv", nrows=20000)
test_df = pd.read_csv("./test.csv")   # 28000
train_num = train_df.shape[0]
test_num = test_df.shape[0]
train_x = train_df.drop(['label'], axis=1)
train_y = train_df["label"]
test_x = test_df
all_df = pd.concat((train_x, test_df))

dfd = {'train': train_df, 'test': test_df, 'all': all_df}
for name,df in dfd.items():
    print(name, df.shape)
    print(df.head())

train (20000, 785)
   label  pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  \
0      1       0       0       0       0       0       0       0       0   
1      0       0       0       0       0       0       0       0       0   
2      1       0       0       0       0       0       0       0       0   
3      4       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel8    ...     pixel774  pixel775  pixel776  pixel777  pixel778  \
0       0    ...            0         0         0         0         0   
1       0    ...            0         0         0         0         0   
2       0    ...            0         0         0         0         0   
3       0    ...            0         0         0         0         0   
4       0    ...            0         0         0         0         0   

   pixel779  pixel780  pixel781  pixel782  pixel783  
0         0         0         0

In [146]:
# check data
for name,df in dfd.items():
    print(name)
    print(df.info())
    #print(df.describe())   # doesn't show a lot of information

train
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 119.8 MB
None
test
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 167.5 MB
None
all
<class 'pandas.core.frame.DataFrame'>
Int64Index: 48000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 287.5 MB
None


In [147]:
# check null
for name,df in dfd.items():
    print(name)
    isnull_ratio = df.isnull().sum()/df.isnull().count()
    print(isnull_ratio[isnull_ratio != 0].sort_values(ascending=False))

Series([], dtype: float64)


Series([], dtype: float64)
all


Series([], dtype: float64)
test


train


In [148]:
# check correlation (takes time)
# -> pixel436 has high correlation with label.. so what?
# correlations = train_df.astype(float).corr()['label'].sort_values(ascending=False)
# print(correlations.head())
# print(correlations.tail())

In [149]:
# check skewness (takes time)
# -> pixel367 has high skewness, but log/boxcox transform doesn't help
# from scipy.stats import skew
# skewness = all_df.apply(lambda x: skew(x)).sort_values(ascending=False)
# print(skewness.head())
# print(skewness.tail())
# all_df['pixel367'].hist(bins=50)
# plt.show()

In [150]:
# grid search
from sklearn.linear_model import LogisticRegression, Perceptron, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
train_x = all_df[:train_num]
test_x = all_df[train_num:]

models = []
C_list, gamma_list, degree_list = np.logspace(-2, 0, 3), np.logspace(-3, -1, 3), np.linspace(1, 3, 3)
grid = [
    {'C': C_list, 'kernel': ['poly'], 'gamma': gamma_list, 'degree': degree_list},
    # {'C': C_list, 'kernel': ['sigmoid'], 'gamma': gamma_list},
    # {'C': C_list, 'kernel': ['linear']},
    # {'C': C_list, 'kernel': ['rbf'], 'gamma': gamma_list},
    ]
models.append(('svm', GridSearchCV(SVC(), grid, cv=3, n_jobs=n_cpus)))
grid = [{'n_estimators': [1000], 'max_depth': [6,7,8], 'min_samples_leaf': [2,3], 'max_features' : ['sqrt']}]
models.append(('random_forest', GridSearchCV(RandomForestClassifier(), grid, cv=3, n_jobs=n_cpus)))
models.append(('extra_trees', GridSearchCV(ExtraTreesClassifier(), grid, cv=3, n_jobs=n_cpus)))
grid = {'n_estimators': [1000], 'learning_rate' : [0.25, 0.5, 0.75]}
models.append(('ada_boost', GridSearchCV(AdaBoostClassifier(), grid, cv=3, n_jobs=n_cpus)))
grid = {'n_estimators': [1000], 'max_depth': [6,7,8], 'min_samples_leaf': [2,3]}
models.append(('gradient_boosting', GridSearchCV(GradientBoostingClassifier(), grid, cv=3, n_jobs=n_cpus)))
grid = {'max_iter': [1000], 'tol': [0.001]}
models.append(('perceptron', GridSearchCV(Perceptron(max_iter=1000, tol=0.001), grid, cv=3, n_jobs=n_cpus)))
models.append(('sgd_classifier', GridSearchCV(SGDClassifier(max_iter=1000, tol=0.001), grid, cv=3, n_jobs=n_cpus)))
grid = {'n_neighbors': [2,3,4]}
models.append(('k-nearest_neighbors', GridSearchCV(KNeighborsClassifier(), grid, cv=3, n_jobs=n_cpus)))
grid = {}
models.append(('logistic_regression', GridSearchCV(LogisticRegression(), grid, cv=3, n_jobs=n_cpus)))
models.append(('gaussian_naive bayes', GridSearchCV(GaussianNB(), grid, cv=3, n_jobs=n_cpus)))
models.append(('linear_svc', GridSearchCV(LinearSVC(), grid, cv=3, n_jobs=n_cpus)))
models.append(('decision_tree', GridSearchCV(DecisionTreeClassifier(), grid, cv=3, n_jobs=n_cpus)))

params = {}
for name,model in models:
    model.fit(train_x, train_y)
    print(name, model.best_score_, model.best_params_)
    params[name] = model.best_params_
    # results = model.cv_results_
    # for mean, std, params in zip(results['mean_test_score'], results['std_test_score'], results['params']):
    #     print("{:0.3f} (+/-{:0.03f} for {}".format(mean, std, params))

models = [
       ('svm', SVC(**params['svm'])),
       ('random_forest', RandomForestClassifier(**params['random_forest'])),
       ('extra_trees', ExtraTreesClassifier(**params['extra_trees'])),
       ('ada_boost', AdaBoostClassifier(**params['ada_boost'])),
       ('gradient_boosting', GradientBoostingClassifier(**params['gradient_boosting'])),
       ('perceptron', Perceptron(**params['perceptron'])),
       ('sgd_classifier', SGDClassifier(**params['sgd_classifier'])),
       ('k-nearest_neighbors', KNeighborsClassifier(**params['k-nearest_neighbors'])),
       ('logistic_regression', LogisticRegression(**params['logistic_regression'])),
       ('gaussian_naive bayes', GaussianNB(**params['gaussian_naive bayes'])),
       ('linear_svc', LinearSVC(**params['linear_svc'])),
       ('decision_tree', DecisionTreeClassifier(**params['decision_tree'])),
       ]

decision_tree 0.8189 {}


linear_svc 0.8538 {}


gaussian_naive bayes 0.5606 {}


logistic_regression 0.8562 {}


k-nearest_neighbors 0.9543 {'n_neighbors': 3}


sgd_classifier 0.86715 {'max_iter': 1000, 'tol': 0.001}


perceptron 0.82985 {'max_iter': 1000, 'tol': 0.001}


gradient_boosting 0.9545 {'max_depth': 6, 'min_samples_leaf': 3, 'n_estimators': 1000}


ada_boost 0.71145 {'learning_rate': 0.25, 'n_estimators': 1000}


extra_trees 0.9146 {'max_depth': 8, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'n_estimators': 1000}


random_forest 0.92315 {'max_depth': 8, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'n_estimators': 1000}


svm 0.9645 {'C': 0.01, 'degree': 2.0, 'gamma': 0.001, 'kernel': 'poly'}


In [151]:
# cross validation
from sklearn.model_selection import KFold, cross_validate
from multiprocessing import Manager

k_fold = KFold(n_splits=3)
with Manager() as manager:
    results = manager.list()
    def validate_one_model(name, model):
         # Multiprocessing-backed parallel loops cannot be nested
         scores = cross_validate(model, train_x, train_y, cv=k_fold)
         result = {}
         result['model'] = name
         result['test_mean'] = np.mean(scores['test_score'])
         result['test_std'] = np.std(scores['test_score'])
         result['train_mean'] = np.mean(scores['train_score'])
         result['train_std'] = np.std(scores['train_score'])
         results.append(result)
    Parallel(n_jobs=n_cpus)(delayed(validate_one_model)(name, model) for name,model in models)
    results_merged = {'model': [], 'test_mean': [], 'test_std': [], 'train_mean': [], 'train_std': []}
    for result in results:
        for k,v in result.items():
            results_merged[k].append(v)
    print(pd.DataFrame(results_merged).sort_values(by='test_mean', ascending=False))

                   model  test_mean  test_std  train_mean  train_std
7                    svm   0.963950  0.002717    1.000000   0.000000
11     gradient_boosting   0.955100  0.003276    1.000000   0.000000
9    k-nearest_neighbors   0.954000  0.000835    0.977825   0.000655
6          random_forest   0.922550  0.004791    0.952575   0.001124
5            extra_trees   0.914250  0.004660    0.939175   0.000373
0             perceptron   0.867700  0.001187    0.891025   0.004880
1         sgd_classifier   0.865951  0.008922    0.900500   0.003892
3             linear_svc   0.861100  0.004611    0.937500   0.002822
10   logistic_regression   0.857900  0.001247    0.974175   0.002991
4          decision_tree   0.820500  0.002878    1.000000   0.000000
8              ada_boost   0.680353  0.052878    0.691624   0.047616
2   gaussian_naive bayes   0.561000  0.009317    0.567750   0.002612


In [152]:
# learn, predict and report
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.externals.joblib import Parallel, delayed, dump
test_id = np.arange(1, test_num+1)
results = {}
def process_one_model(name, model):
    model.fit(train_x, train_y)
    predict_y = model.predict(train_x)
    output = model.predict(test_x)
    results[name] = output
    submit = pd.DataFrame(data={'ImageId':test_id, 'Label':output})
    submit.to_csv('{}_submit.csv'.format(name), index=False)
    #dump(model, '{}.learn'.format(name))
    print(name)
    print(confusion_matrix(train_y, predict_y))
    print(classification_report(train_y, predict_y))
Parallel(n_jobs=n_cpus)(delayed(process_one_model)(name, model) for name,model in models)

[None, None, None, None, None, None, None, None, None, None, None, None]




             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1932
          1       1.00      1.00      1.00      2266
          2       1.00      1.00      1.00      2010
          3       1.00      1.00      1.00      2080
          4       1.00      1.00      1.00      1971
          5       1.00      1.00      1.00      1794
          6       1.00      1.00      1.00      1982
          7       1.00      1.00      1.00      2070
          8       1.00      1.00      1.00      1915
          9       1.00      1.00      1.00      1980

avg / total       1.00      1.00      1.00     20000





[[1932    0    0    0    0    0    0    0    0    0]
 [   0 2266    0    0    0    0    0    0    0    0]
 [   0    0 2010    0    0    0    0    0    0    0]
 [   0    0    0 2080    0    0    0    0    0    0]
 [   0    0    0    0 1971    0    0    0    0    0]
 [   0    0    0    0    0 1794    0    0    0    0]
 [   0    0    0    0    0    0 1982    0    0    0]
 [   0    0    0    0    0    0    0 2070    0    0]
 [   0    0    0    0    0    0    0    0 1915    0]
 [   0    0    0    0    0    0    0    0    0 1980]]

gradient_boosting





             precision    recall  f1-score   support

          0       0.98      1.00      0.99      1932
          1       0.97      1.00      0.98      2266
          2       0.99      0.97      0.98      2010
          3       0.97      0.98      0.98      2080
          4       0.99      0.98      0.98      1971
          5       0.98      0.97      0.98      1794
          6       0.99      0.99      0.99      1982
          7       0.98      0.98      0.98      2070
          8       0.99      0.95      0.97      1915
          9       0.98      0.98      0.98      1980

avg / total       0.98      0.98      0.98     20000





[[1924    1    0    0    0    1    5    0    1    0]
 [   0 2259    2    0    0    0    0    3    1    1]
 [   6   17 1952    6    0    0    0   21    6    2]
 [   2    0   11 2044    0   11    0    2    7    3]
 [   0   18    0    0 1931    0    2    2    0   18]
 [   6    2    1   22    1 1746   11    0    2    3]
 [   2    3    0    0    4    8 1964    0    1    0]
 [   0   22    4    0    4    0    0 2035    0    5]
 [  10   16    5   20    5   15    3    4 1825   12]
 [   5    1    1    7   15    1    0   12    1 1937]]

k-nearest_neighbors





             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1932
          1       1.00      1.00      1.00      2266
          2       0.95      0.94      0.95      2010
          3       0.93      0.93      0.93      2080
          4       0.96      0.97      0.97      1971
          5       0.93      0.92      0.93      1794
          6       0.98      0.98      0.98      1982
          7       0.97      0.96      0.96      2070
          8       0.91      0.92      0.92      1915
          9       0.93      0.94      0.93      1980

avg / total       0.96      0.96      0.96     20000





[[1931    0    0    0    0    1    0    0    0    0]
 [   0 2266    0    0    0    0    0    0    0    0]
 [   0    1 1891   17   15    6    9   17   49    5]
 [   0    0   38 1931    1   35    5    9   43   18]
 [   0    0    4    5 1912    3    3    3   10   31]
 [   0    0    6   48   11 1653   16    3   41   16]
 [   0    0    3    1    8   17 1944    0    9    0]
 [   0    1   17    3    4    2    0 1992    5   46]
 [   0    1   16   49    3   39    9    6 1769   23]
 [   0    0    6   21   33   13    0   33   15 1859]]

logistic_regression





             precision    recall  f1-score   support

          0       0.82      0.90      0.86      1932
          1       0.82      0.98      0.89      2266
          2       0.77      0.47      0.58      2010
          3       0.84      0.72      0.77      2080
          4       0.72      0.87      0.79      1971
          5       0.73      0.59      0.65      1794
          6       0.68      0.87      0.76      1982
          7       0.92      0.73      0.81      2070
          8       0.69      0.79      0.74      1915
          9       0.65      0.65      0.65      1980

avg / total       0.77      0.76      0.75     20000





[[1741    0   46    2    2   75   57    0    7    2]
 [   0 2212    5    6    2   11    3    5   20    2]
 [  98  157  942   41   57   26  541   31  106   11]
 [ 116   44   26 1501    8  142   62   14  135   32]
 [  11    7   25    1 1721    6   33   14   39  114]
 [ 105   38   12  146   14 1061   81    1  279   57]
 [  31   43   80    0   40   54 1724    0   10    0]
 [   3   14   35    8   41   14    0 1501   24  430]
 [  16  173   14   63   13   47   34    6 1519   30]
 [  12   16   39   27  478   10    0   59   61 1278]]

ada_boost





             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1932
          1       1.00      1.00      1.00      2266
          2       1.00      1.00      1.00      2010
          3       1.00      1.00      1.00      2080
          4       1.00      1.00      1.00      1971
          5       1.00      1.00      1.00      1794
          6       1.00      1.00      1.00      1982
          7       1.00      1.00      1.00      2070
          8       1.00      1.00      1.00      1915
          9       1.00      1.00      1.00      1980

avg / total       1.00      1.00      1.00     20000





[[1932    0    0    0    0    0    0    0    0    0]
 [   0 2266    0    0    0    0    0    0    0    0]
 [   0    0 2010    0    0    0    0    0    0    0]
 [   0    0    0 2080    0    0    0    0    0    0]
 [   0    0    0    0 1971    0    0    0    0    0]
 [   0    0    0    0    0 1794    0    0    0    0]
 [   0    0    0    0    0    0 1982    0    0    0]
 [   0    0    0    0    0    0    0 2070    0    0]
 [   0    0    0    0    0    0    0    0 1915    0]
 [   0    0    0    0    0    0    0    0    0 1980]]

svm





             precision    recall  f1-score   support

          0       0.97      0.99      0.98      1932
          1       0.95      0.98      0.97      2266
          2       0.95      0.94      0.95      2010
          3       0.95      0.93      0.94      2080
          4       0.94      0.94      0.94      1971
          5       0.97      0.93      0.95      1794
          6       0.96      0.98      0.97      1982
          7       0.95      0.92      0.94      2070
          8       0.94      0.93      0.93      1915
          9       0.88      0.92      0.90      1980

avg / total       0.95      0.95      0.95     20000





[[1907    0    0    0    2    1    4    1   17    0]
 [   0 2230    7    7    1    4    7    3    3    4]
 [   5    9 1896    8   23    0   15   32   13    9]
 [   5   13   33 1940    4   18    7   18   19   23]
 [   0    4    3    0 1843    2   13    5    8   93]
 [  16   22    2   31    5 1665   20    4   13   16]
 [   7    7    1    0    7   12 1941    0    7    0]
 [   1   27   37    3   19    0    0 1911   11   61]
 [   4   31    5   30    9    5    8    5 1772   46]
 [  12    8    6   27   44    3    2   30   23 1825]]

random_forest





             precision    recall  f1-score   support

          0       1.00      1.00      1.00      1932
          1       1.00      1.00      1.00      2266
          2       1.00      1.00      1.00      2010
          3       1.00      1.00      1.00      2080
          4       1.00      1.00      1.00      1971
          5       1.00      1.00      1.00      1794
          6       1.00      1.00      1.00      1982
          7       1.00      1.00      1.00      2070
          8       1.00      1.00      1.00      1915
          9       1.00      1.00      1.00      1980

avg / total       1.00      1.00      1.00     20000





[[1932    0    0    0    0    0    0    0    0    0]
 [   0 2266    0    0    0    0    0    0    0    0]
 [   0    0 2010    0    0    0    0    0    0    0]
 [   0    0    0 2080    0    0    0    0    0    0]
 [   0    0    0    0 1971    0    0    0    0    0]
 [   0    0    0    0    0 1794    0    0    0    0]
 [   0    0    0    0    0    0 1982    0    0    0]
 [   0    0    0    0    0    0    0 2070    0    0]
 [   0    0    0    0    0    0    0    0 1915    0]
 [   0    0    0    0    0    0    0    0    0 1980]]

decision_tree





             precision    recall  f1-score   support

          0       0.96      0.98      0.97      1932
          1       0.91      0.98      0.95      2266
          2       0.95      0.92      0.94      2010
          3       0.92      0.92      0.92      2080
          4       0.93      0.93      0.93      1971
          5       0.97      0.89      0.92      1794
          6       0.95      0.98      0.96      1982
          7       0.92      0.92      0.92      2070
          8       0.94      0.90      0.92      1915
          9       0.89      0.89      0.89      1980

avg / total       0.93      0.93      0.93     20000





[[1900    0    1    0    1    1   13    1   15    0]
 [   0 2228    8   10    1    2    5    5    3    4]
 [   8   23 1856   12   27    0   22   48    8    6]
 [  11   23   36 1906    5   27    7   17   24   24]
 [   1   12    4    1 1834    1   12    9    9   88]
 [  19   35    4   68   17 1589   27   10    9   16]
 [   9   12    0    0    8    9 1936    2    6    0]
 [   4   46   37    3   21    0    1 1909    8   41]
 [   6   51    6   54    8    9   11    9 1714   47]
 [  15   16    6   27   59    6    2   59   19 1771]]

extra_trees





             precision    recall  f1-score   support

          0       0.98      0.99      0.98      1932
          1       0.98      0.99      0.99      2266
          2       0.98      0.80      0.88      2010
          3       0.87      0.91      0.89      2080
          4       0.94      0.94      0.94      1971
          5       0.89      0.80      0.84      1794
          6       0.97      0.96      0.96      1982
          7       0.90      0.96      0.93      2070
          8       0.76      0.90      0.82      1915
          9       0.89      0.87      0.88      1980

avg / total       0.92      0.91      0.91     20000





[[1919    0    1    1    1    3    0    0    6    1]
 [   1 2253    0    0    0    0    1    0   10    1]
 [   9    9 1613   94   35   30   21   35  157    7]
 [   2    5   11 1903    4   32    5   11   83   24]
 [   2    2    3    0 1853    7    1    7   15   81]
 [   5    5    1  110   11 1440   21    3  163   35]
 [  11    1    3    1   14   25 1897    3   27    0]
 [   1    6    5    5   13    1    0 1983   13   43]
 [   9   19    4   51    5   68    4   22 1714   19]
 [   7    1    0   14   43   13    1  133   55 1713]]

linear_svc





             precision    recall  f1-score   support

          0       0.63      0.94      0.76      1932
          1       0.76      0.96      0.85      2266
          2       0.86      0.24      0.37      2010
          3       0.71      0.42      0.53      2080
          4       0.84      0.14      0.23      1971
          5       0.70      0.09      0.15      1794
          6       0.60      0.95      0.74      1982
          7       0.91      0.34      0.49      2070
          8       0.34      0.60      0.43      1915
          9       0.40      0.94      0.56      1980

avg / total       0.68      0.57      0.52     20000





[[1818    3   11    7    3    5   47    0   21   17]
 [   1 2165    4   10    0    3   32    0   31   20]
 [ 270   66  476  228    7   11  563    7  352   30]
 [ 235  132   14  870    3    4  136   12  497  177]
 [  92   23   17   13  268   25  274   15  356  888]
 [ 349   60    9   43    6  153  124    2  875  173]
 [  30   33    6    3    1    6 1883    0   17    3]
 [  12   26    2   22   17    4   10  702   69 1206]
 [  48  308    9   22    5    9   55    4 1154  301]
 [  16   18    7    2    9    0    0   28   37 1863]]

gaussian_naive bayes





             precision    recall  f1-score   support

          0       0.98      0.98      0.98      1932
          1       0.99      0.92      0.96      2266
          2       0.97      0.85      0.91      2010
          3       0.87      0.87      0.87      2080
          4       0.96      0.90      0.93      1971
          5       0.94      0.66      0.77      1794
          6       0.96      0.96      0.96      1982
          7       0.81      0.98      0.89      2070
          8       0.67      0.92      0.78      1915
          9       0.86      0.82      0.84      1980

avg / total       0.90      0.89      0.89     20000





[[1894    0    1    0    2    8    3    6   14    4]
 [   0 2090   11    7    0    4    5   28  112    9]
 [   5    3 1718   59   26    5   15   59  110   10]
 [   2    0   24 1819    2   13    4   39  154   23]
 [   0    3    2    6 1772    3    8   45   38   94]
 [  17    1    5  129    9 1182   31   27  338   55]
 [  11    1    5    4    5   22 1899    8   27    0]
 [   0    0    7    3    2    0    0 2021    6   31]
 [   9    3    3   36    1   19    3   33 1771   37]
 [   4    0    1   17   26    3    0  228   74 1627]]

sgd_classifier





             precision    recall  f1-score   support

          0       0.93      0.98      0.95      1932
          1       0.96      0.95      0.95      2266
          2       0.87      0.88      0.87      2010
          3       0.95      0.75      0.84      2080
          4       0.85      0.92      0.88      1971
          5       0.98      0.57      0.72      1794
          6       0.93      0.96      0.94      1982
          7       0.74      0.96      0.84      2070
          8       0.58      0.94      0.71      1915
          9       0.94      0.49      0.64      1980

avg / total       0.87      0.84      0.84     20000





[[1888    1    8    3    2    3   11    2   13    1]
 [   0 2146   12    2    2    0    7   54   43    0]
 [  17   13 1766    4   31    2   28   44  104    1]
 [  23    8  151 1555    9    9   14   48  257    6]
 [   4    7    5    0 1807    0    5   31   98   14]
 [  45   16   46   64   66 1016   78   36  403   24]
 [  17    6   19    0   16    1 1904    5   14    0]
 [   7    3   16    2   14    0    1 1984   26   17]
 [  18   31   13   11    9    5   10   22 1793    3]
 [  16   10    4    2  161    1    0  453  360  973]]

perceptron


In [153]:
# TODO bagging
# from sklearn.ensemble import BaggingClassifier

In [154]:
# TODO stacking
# from mlxtend.classifier import StackingClassifier

In [155]:
!head svm_submit.csv

ImageId,Label
1,2
2,0
3,9
4,9
5,3
6,7
7,0
8,3
9,0
