## Load data

In [1]:
import pandas as pd

df_train = pd.read_csv("Datasets/train.csv")
y_train = df_train["label"]
X_train = df_train.drop("label", axis=1)

In [2]:
df_test = pd.read_csv("Datasets/test.csv")
y_test = []  # Empty no labels for test data
X_test = df_test

## Image processing

In [3]:
from sklearn.preprocessing import MinMaxScaler

scale = MinMaxScaler()

X_train = scale.fit_transform(X_train)

X_test = scale.transform(X_test)

## Stacked model creation

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import StackingClassifier, RandomForestClassifier

knn = KNeighborsClassifier(n_neighbors=4, weights='distance')
svc = svm.SVC(C=10)
rnd_forest = RandomForestClassifier(n_estimators=800)
dtclf = DecisionTreeClassifier(criterion='entropy', splitter='best')

sclf = StackingClassifier(estimators=[('knn', knn), ('svc', svc), ('rnd_forest', rnd_forest), ('dtclf', dtclf)])

## Stacked model training

In [5]:
print(sclf.fit(X_train, y_train))

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   12.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   31.4s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  4.4min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 11.0min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.2min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  4.4min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 11.1min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concur

StackingClassifier(estimators=[('knn',
                                KNeighborsClassifier(n_neighbors=4,
                                                     weights='distance')),
                               ('svc', SVC(C=10)),
                               ('rnd_forest',
                                RandomForestClassifier(n_estimators=800)),
                               ('dtclf',
                                DecisionTreeClassifier(criterion='entropy'))],
                   verbose=3)

## Stacked model test

In [6]:
y_pred_test = sclf.predict(X_test)

## Ouput file csv creation

In [20]:
df_pred = pd.DataFrame({'ImageId': list(range(1, len(y_pred_test) + 1)), 'Label': y_pred_test})
df_pred.to_csv("Im-Rises_stacked_model.csv", index=False)


In [None]:
## Score : 0.98107
