In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import log_loss

from tqdm.auto import tqdm

np.random.seed(12345)

# ========== Data ==========

In [2]:
data = pd.read_csv('dataset2.csv')

In [3]:
data

Unnamed: 0,x1,x2,y
0,2,1,B
1,2,-1,B
2,-2,2,A
3,1,2,B
4,-2,3,B
5,2,0,B
6,-1,-1,A
7,-2,1,A
8,0,0,A
9,1,-1,A


In [4]:
X = data[['x1', 'x2']]

In [5]:
y = data['y']

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=True)

# ========== Model ==========

In [7]:
best_loss = np.inf

alphas = np.logspace(-3, 2, 100)

for alpha in tqdm(alphas):
    clf = MLPClassifier(
        hidden_layer_sizes=(5, 4),
        activation='relu',
        solver='sgd',
        alpha=alpha,
        learning_rate_init=0.01,
        max_iter=4000,
        momentum=0,
    )

    clf.fit(X_train, y_train)    
    y_pred_prob_val = clf.predict_proba(X_val)
    loss = log_loss(y_val, y_pred_prob_val, labels=np.unique(y_train))
    
    if loss < best_loss:
        best_loss = loss
        best_clf = clf

  0%|          | 0/100 [00:00<?, ?it/s]

In [8]:
best_clf.alpha

0.011497569953977356

In [9]:
best_clf.intercepts_

[array([-0.71729146,  1.22517304,  0.22826713,  0.35081379,  0.86342918]),
 array([ 0.45352865, -0.10851175, -0.34640978, -0.03165688]),
 array([-0.30286428])]

In [10]:
best_clf.coefs_

[array([[-0.62087815, -0.40246921,  0.83935598,  1.10295279, -0.83386044],
        [ 0.16170955, -0.67452466,  1.06738902,  0.03262305,  0.63845978]]),
 array([[-0.69459846, -0.45069863,  0.39369494, -0.30881234],
        [ 0.96190999,  0.16663821, -0.17553722, -0.55169654],
        [-0.46617222,  0.72226741, -0.70970522,  1.1537416 ],
        [-0.06478017,  0.53289889, -0.31402257,  0.60338896],
        [ 0.30422113, -0.52096309, -0.10091839,  0.13001853]]),
 array([[-1.38339551],
        [ 1.03050366],
        [-0.87723591],
        [ 1.10537026]])]

# ========== Prediction ==========

In [11]:
X = pd.read_csv('incoming_data2.csv')

In [12]:
best_clf.predict(X)

array(['B', 'A', 'B', 'A'], dtype='<U1')