In [2]:
import pandas as pd

from sklearn.ensemble import BaggingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [3]:
df = pd.read_csv("kr-vs-kp.csv")

### Train / Test Split

In [4]:
le = LabelEncoder()
df["class"] = le.fit_transform(df["class"])

categories = ['bkblk','bknwy','bkon8','bkona','bkspr','bkxbq','bkxcr','bkxwp','blxwp','bxqsq','cntxt','dsopp','dwipd','hdchk','katri','mulch','qxmsq','r2ar8','reskd','reskr','rimmx','rkxwp','rxmsq','simpl','skach','skewr','skrxp','spcop','stlmt','thrsk','wkcti','wkna8','wknck','wkovl','wkpos','wtoeg']

OH_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
OH_df = pd.DataFrame(OH_encoder.fit_transform(df[categories]))

enc_df = pd.concat([OH_df, df["class"]], axis=1)

In [5]:
train = enc_df.drop("class", axis=1)
test = enc_df["class"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(train, test, train_size=0.8)

### Create Model

In [15]:
estimator = MLPClassifier(
    hidden_layer_sizes=(30,),
    learning_rate_init=0.15,
    momentum=0.9,
    random_state=0,
    verbose=1
)

model = BaggingClassifier(
    estimator=estimator,
    n_estimators=25,
    random_state=0,
    verbose=1
)

model.fit(X_train, y_train)

Iteration 1, loss = 1.14216653
Iteration 2, loss = 0.30906904
Iteration 3, loss = 0.15620338
Iteration 4, loss = 0.11543704
Iteration 5, loss = 0.08030149
Iteration 6, loss = 0.07159124
Iteration 7, loss = 0.07743882
Iteration 8, loss = 0.06126738
Iteration 9, loss = 0.04834072
Iteration 10, loss = 0.04263645
Iteration 11, loss = 0.03963293
Iteration 12, loss = 0.04039949
Iteration 13, loss = 0.03543922
Iteration 14, loss = 0.02726571
Iteration 15, loss = 0.03748782
Iteration 16, loss = 0.06502317
Iteration 17, loss = 0.05480651
Iteration 18, loss = 0.04011286
Iteration 19, loss = 0.02564983
Iteration 20, loss = 0.01727032
Iteration 21, loss = 0.01537764
Iteration 22, loss = 0.01234271
Iteration 23, loss = 0.01309119
Iteration 24, loss = 0.01276728
Iteration 25, loss = 0.01074184
Iteration 26, loss = 0.00890489
Iteration 27, loss = 0.00748257
Iteration 28, loss = 0.00717161
Iteration 29, loss = 0.01589381
Iteration 30, loss = 0.14822786
Iteration 31, loss = 0.06370636
Iteration 32, los

### Test

In [16]:
model.score(X_test, y_test)

0.984375

### Size

In [12]:
import pickle
import sys

p = pickle.dumps(model)
print(f"{sys.getsizeof(p)} bytes")

1470425 bytes
