In [1]:
# !pip install torch

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import log_loss

from tqdm.auto import tqdm
import copy

np.random.seed(12345)
torch.manual_seed(123)

<torch._C.Generator at 0x13751d8d0>

# ========== Data ==========

In [3]:
data = pd.read_csv('dataset2.csv')

In [4]:
data

Unnamed: 0,x1,x2,y
0,2,1,B
1,2,-1,B
2,-2,2,A
3,1,2,B
4,-2,3,B
5,2,0,B
6,-1,-1,A
7,-2,1,A
8,0,0,A
9,1,-1,A


In [5]:
X = data[['x1', 'x2']].to_numpy()

In [6]:
y = data['y'].to_numpy()

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.20, shuffle=True)

In [8]:
le = LabelEncoder()
y_le_train = le.fit_transform(y_train)
y_le_val = le.transform(y_train)

# ========== Model ==========

In [9]:
if torch.cuda.is_available():
  device = 'cuda:0'
else:
  device = 'cpu'

In [10]:
X_train = torch.from_numpy(X_train).float().to(device)
y_le_train = torch.from_numpy(y_le_train).float().to(device)

X_val = torch.from_numpy(X_val).float().to(device)
y_le_val = torch.from_numpy(y_le_val).float().to(device)

In [11]:
layers = []

layers.append(nn.Linear(X.shape[1], 5))
layers.append(nn.ReLU())

layers.append(nn.Linear(5, 4))
layers.append(nn.ReLU())

layers.append(nn.Linear(4, 1))

model = nn.Sequential(*layers).to(device)

In [12]:
best_loss = np.inf

alphas = np.logspace(-3, 2, 100)

for alpha in tqdm(alphas):
    optimizer = torch.optim.SGD(model.parameters(), lr=1)
    loss_function = nn.BCEWithLogitsLoss()
    for i in tqdm(range(500)):
        model.train()
        optimizer.zero_grad()

        y_pred_le_train = model(X_train)
        y_pred_le_train = y_pred_le_train.reshape(-1)

        l2_norm = 0
        for layer in model.children():
            if isinstance(layer, nn.Linear):
                for params in layer.parameters():
                    l2_norm += sum(p.pow(2.0).sum() for p in params)

        loss = loss_function(y_pred_le_train, y_le_train) + alpha*l2_norm
        loss.backward()

        optimizer.step()
        
    with torch.no_grad():
        model.eval()
        y_pred_le_val = torch.sigmoid(model(X_val))
        
    if np.isnan(y_pred_le_val.min()):
        continue
        
    loss = log_loss(y_val, y_pred_le_val, labels=np.unique(y))
    
    if loss < best_loss:
        best_loss = loss
        best_model = copy.deepcopy(model)
        best_alpha = alpha

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)
  loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/500 [00:00<?, ?it/s]

In [13]:
best_alpha

0.001

In [14]:
for name, param in best_model.named_parameters():
    print(name, param)
    print()

0.weight Parameter containing:
tensor([[-0.5778, -0.5711],
        [-0.0999, -0.0813],
        [-0.3036,  0.2430],
        [-1.1497, -1.1698],
        [ 0.8827,  0.8523]], requires_grad=True)

0.bias Parameter containing:
tensor([ 0.5913,  0.1194, -0.1613,  1.2366,  0.2471], requires_grad=True)

2.weight Parameter containing:
tensor([[-0.3677, -0.1001,  0.2786, -0.7094,  1.1804],
        [ 0.4273,  0.1034,  0.0241,  0.8338, -0.1881],
        [ 0.6600,  0.0503,  0.0704,  1.4898, -0.3490],
        [ 0.4885,  0.0647,  0.0122,  0.7720, -0.1222]], requires_grad=True)

2.bias Parameter containing:
tensor([0.7902, 0.1250, 0.2384, 0.0653], requires_grad=True)

4.weight Parameter containing:
tensor([[ 1.6462, -0.9332, -1.6642, -0.8857]], requires_grad=True)

4.bias Parameter containing:
tensor([1.2483], requires_grad=True)



# ========== Prediction ==========

In [15]:
X = pd.read_csv('incoming_data2.csv').to_numpy()

In [16]:
X = torch.from_numpy(X).float().to(device)

In [17]:
with torch.no_grad():
    best_model.eval()
    y_pred_le = torch.sigmoid(best_model(X))
    
y_pred_le

tensor([[9.9974e-01],
        [1.7454e-10],
        [9.9030e-01],
        [1.5141e-05]])

In [18]:
y_pred_le = np.round(y_pred_le).int()
y_pred = le.inverse_transform(y_pred_le.reshape(-1))

y_pred

array(['B', 'A', 'B', 'A'], dtype=object)