In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import log_loss

from tqdm.auto import tqdm

np.random.seed(12345)

# ========== Data ==========

In [2]:
data = pd.read_csv('dataset3.csv')

In [3]:
data

Unnamed: 0,x1,x2,y
0,0,4,A
1,2,2,B
2,2,4,A
3,1,4,A
4,-1,3,A
5,0,2,A
6,3,1,B
7,-3,1,C
8,3,3,B
9,1,-2,C


In [4]:
X = data[['x1', 'x2']]

In [5]:
y = data['y']

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=True)

# ========== Model ==========

In [7]:
best_loss = np.inf

alphas = np.logspace(-3, 2, 100)

for alpha in tqdm(alphas):
    clf = MLPClassifier(
        hidden_layer_sizes=(5, 4),
        activation='relu',
        solver='sgd',
        alpha=alpha,
        learning_rate_init=0.01,
        max_iter=4000,
        momentum=0,
    )

    clf.fit(X_train, y_train)    
    y_pred_prob_val = clf.predict_proba(X_val)
    loss = log_loss(y_val, y_pred_prob_val, labels=np.unique(y_train))
    
    if loss < best_loss:
        best_loss = loss
        best_clf = clf

  0%|          | 0/100 [00:00<?, ?it/s]

In [8]:
best_clf.alpha

0.007220809018385464

In [9]:
best_clf.intercepts_

[array([-0.8986319 , -0.8398944 , -0.20193145,  0.29374917, -0.41864623]),
 array([-4.00164447e-07, -6.37350072e-02,  2.08149910e+00,  3.98819230e-01]),
 array([ 1.45924876, -0.71792676, -1.00221092])]

In [10]:
best_clf.coefs_

[array([[-0.18546053, -0.29136837,  1.87503892,  0.24276985,  1.09367641],
        [ 0.62078149, -0.04066712, -0.73503435,  0.42618829,  0.81901472]]),
 array([[-0.02734675,  0.34392533, -0.53122686, -0.51773405],
        [ 0.5346701 , -0.41622939,  0.69466702, -0.25067969],
        [-0.00949117, -0.34389967,  0.28342475,  1.63220317],
        [-0.07748686, -0.47910281, -0.67482686, -0.06930984],
        [-0.5322452 ,  0.01065504, -1.13655979, -0.11728024]]),
 array([[-0.22736085,  0.79443889, -0.17231464],
        [-0.7400076 , -0.04822763, -0.73862905],
        [-0.62851268, -1.15419263,  2.17398804],
        [-1.32957056,  0.56685645, -0.60100815]])]

# ========== Prediction ==========

In [11]:
X = pd.read_csv('incoming_data3.csv')

In [12]:
best_clf.predict(X)

array(['C', 'A', 'B', 'A'], dtype='<U1')