In [1]:
import numpy as np
from sklearn.manifold import Isomap, TSNE, SpectralEmbedding
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
import torch
from torch import nn
from torchvision.transforms import v2
import wandb

In [2]:
def load(f):
    return np.load(f)['arr_0']

x_train = load('kmnist-train-imgs.npz')
x_test = load('kmnist-test-imgs.npz')
y_train = load('kmnist-train-labels.npz')
y_test = load('kmnist-test-labels.npz')

In [3]:
train_aug = v2.Compose([
    v2.RandomRotation(5),
    v2.RandomResizedCrop(size=(28, 28), scale=(0.9, 1))
])



In [4]:
X_train_aug = train_aug(torch.from_numpy(x_train).unsqueeze(1)).squeeze(1).numpy()
x_train_aug = np.vstack([x_train, X_train_aug])
y_train_aug = np.hstack([y_train, y_train])

In [5]:
x_train_aug = x_train_aug.reshape(-1, 784)
x_test = x_test.reshape(-1, 784)

x_train_aug = x_train_aug / 255.0
x_test = x_test / 255.0

Попробуем добавить уменьшение размерности с помощью PCA в KNN

In [126]:
wandb.init(entity='andre7416', project="kmnist", name='PCA_with_KNN')
pca = PCA(n_components=50)
knn = KNeighborsClassifier()
low_dim_x = pca.fit_transform(x_train_aug)
knn.fit(low_dim_x, y_train_aug)

y_pred = knn.predict(pca.transform(x_test))

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print(classification_report(y_test, y_pred))

wandb.log({
   "accuracy": accuracy,
   "classification_report": classification_report(y_test, y_pred, output_dict=True)
})
wandb.finish()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mandre7416[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Accuracy: 0.9376
              precision    recall  f1-score   support

           0       0.94      0.95      0.95      1000
           1       0.93      0.94      0.94      1000
           2       0.89      0.91      0.90      1000
           3       0.93      0.98      0.95      1000
           4       0.95      0.90      0.93      1000
           5       0.97      0.92      0.94      1000
           6       0.94      0.96      0.95      1000
           7       0.97      0.94      0.96      1000
           8       0.90      0.95      0.93      1000
           9       0.95      0.93      0.94      1000

    accuracy                           0.94     10000
   macro avg       0.94      0.94      0.94     10000
weighted avg       0.94      0.94      0.94     10000



0,1
accuracy,▁

0,1
accuracy,0.9376


Далее попробуем SVM

In [127]:
wandb.init(entity='andre7416', project="kmnist", name='SVM')
svm = SVC()
svm.fit(x_train_aug, y_train_aug)

y_pred = svm.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print(classification_report(y_test, y_pred))

wandb.log({
   "accuracy": accuracy,
   "classification_report": classification_report(y_test, y_pred, output_dict=True)
})
wandb.finish()

Accuracy: 0.9214
              precision    recall  f1-score   support

           0       0.94      0.95      0.94      1000
           1       0.92      0.91      0.91      1000
           2       0.87      0.87      0.87      1000
           3       0.92      0.97      0.94      1000
           4       0.90      0.91      0.90      1000
           5       0.98      0.89      0.93      1000
           6       0.88      0.95      0.92      1000
           7       0.96      0.91      0.93      1000
           8       0.91      0.94      0.93      1000
           9       0.95      0.92      0.94      1000

    accuracy                           0.92     10000
   macro avg       0.92      0.92      0.92     10000
weighted avg       0.92      0.92      0.92     10000



0,1
accuracy,▁

0,1
accuracy,0.9214


Затем попробуем классическую MLP

In [6]:
class MLP(nn.Module):
    def __init__(self, hidden_dim=512, num_layers=1, normalize=nn.BatchNorm1d, activation=nn.GELU, dropout=0.2):
        super().__init__()
        self.model = nn.ModuleList()
        self.model.add_module('start_proj', nn.Linear(784, hidden_dim))
        for i in range(num_layers):
            module = nn.Sequential(
                activation(), 
                normalize(hidden_dim),
                nn.Dropout(p=dropout),
                nn.Linear(hidden_dim, hidden_dim)
            )
            self.model.add_module(f'layer_{i}', module)
        out_module = nn.Sequential(
            activation(), 
            normalize(hidden_dim),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, 10)
        )
        self.model.add_module('out_proj', out_module)
    
    def forward(self, x):
        for module in self.model:
            x = module(x)
        return x

In [7]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
wandb.init(entity='andre7416', project="kmnist", name='MLP')
mlp = MLP()
mlp = mlp.to(device)
x_train_mlp = torch.from_numpy(x_train_aug).to(device, dtype=torch.float32)
y_train_mlp = torch.from_numpy(y_train_aug).to(device)
optimizer = torch.optim.Adam(mlp.parameters())
criterion = nn.CrossEntropyLoss()
for epoch in range(201):
    mlp.train()
    optimizer.zero_grad()
    X = train_aug(torch.from_numpy(x_train).unsqueeze(1)).squeeze(1).reshape(-1, 784).to(device=device, dtype=torch.float32) / 255
    y = torch.from_numpy(y_train).to(device)
    logits = mlp(X)
    y_pred = logits.argmax(-1).cpu().numpy()
    accuracy = accuracy_score(y_train, y_pred)
    loss = criterion(logits, y)
    loss.backward()
    optimizer.step()
    mlp.eval()
    test_logits = mlp(torch.from_numpy(x_test).to(device, dtype=torch.float32))
    test_loss = criterion(test_logits, torch.from_numpy(y_test).to(device))
    y_pred_test = test_logits.argmax(-1).cpu().numpy()
    accuracy_test = accuracy_score(y_test, y_pred_test)
    wandb.log({
        "train_loss": loss.item(),
        "train_accuracy": accuracy,
        "train_classification_report": classification_report(y_train, y_pred, output_dict=True),
        "loss": test_loss.item(),
        "accuracy": accuracy_test,
        "classification_report": classification_report(y_test, y_pred_test, output_dict=True),
    }, step=epoch)

wandb.finish()


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mandre7416[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0,1
accuracy,▁▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████████
loss,█▇▇▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▆▇▇▇▇▇▇▇▇██████████████████████████████
train_loss,█▄▄▄▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.9306
loss,0.24935
train_accuracy,0.9784
train_loss,0.07266


В конце возьмем сверточную нейросеть

In [6]:
class CNN(nn.Module):
    def __init__(self, hidden_dim=256, chan_dim=32, normalize=nn.BatchNorm2d, activation=nn.GELU, dropout=0.2):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, chan_dim, kernel_size=5),
            activation(),
            normalize(chan_dim),
            nn.Conv2d(chan_dim, chan_dim, kernel_size=5),
            nn.MaxPool2d(2),
            activation(),
            normalize(chan_dim),
            nn.Conv2d(chan_dim, 2 * chan_dim, kernel_size=5),
            nn.MaxPool2d(2),
            activation(),
            normalize(2 * chan_dim),
        )
        self.head = nn.Sequential(
            nn.Linear(18 * chan_dim, hidden_dim),
            activation(),
            nn.BatchNorm1d(hidden_dim),
            nn.Dropout(p=dropout),
            nn.Linear(hidden_dim, 10)
        )
    
    def forward(self, x):
        batch, _, _, _ = x.shape
        x = self.model(x)
        x = x.reshape(batch, -1)
        x = self.head(x)
        return x

In [7]:
x_train = load('kmnist-train-imgs.npz')
x_test = load('kmnist-test-imgs.npz')
y_train = load('kmnist-train-labels.npz')
y_test = load('kmnist-test-labels.npz')

In [8]:
from torch.utils.data import Dataset, DataLoader

class MyDataset(Dataset):
    def __init__(self, X, y, transform=None):
        self.X = torch.from_numpy(X).unsqueeze(1).to(dtype=torch.float32)
        self.y = torch.from_numpy(y)
        self.transform = transform

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, index):
        image = self.X[index]
        label = self.y[index]
        
        if self.transform is not None:
            image = self.transform(image)
            
        return image / 255, label

In [9]:
train_dataset = MyDataset(x_train, y_train, train_aug)
train_loader = DataLoader(train_dataset, batch_size=512, pin_memory=True, num_workers=10)

In [11]:
from tqdm.notebook import tqdm
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
wandb.init(entity='andre7416', project="kmnist", name='CNN')
cnn = CNN()
cnn = cnn.to(device)
optimizer = torch.optim.Adam(cnn.parameters())
criterion = nn.CrossEntropyLoss()
cnn.train()
for epoch in range(51):
    cnn.train()
    train_loss = 0
    y_pred = []
    for X, y in tqdm(train_loader):
        optimizer.zero_grad()
        X = X.to(device)
        y = y.to(device)
        logits = cnn(X)
        y_pred.append(logits.argmax(-1).cpu().numpy())
        loss = criterion(logits, y)
        train_loss += loss.item() * X.shape[0]
        loss.backward()
        optimizer.step()
    cnn.eval()
    test_logits = cnn(torch.from_numpy(x_test).unsqueeze(1).to(device, dtype=torch.float32)) / 255
    test_loss = criterion(test_logits, torch.from_numpy(y_test).to(device))
    y_pred_test = test_logits.argmax(-1).cpu().numpy()
    accuracy_test = accuracy_score(y_test, y_pred_test)
    accuracy = accuracy_score(y_train, np.hstack(y_pred))
    
    wandb.log({
        "train_loss": train_loss / len(train_dataset),
        "train_accuracy": accuracy,
        "train_classification_report": classification_report(y_train, np.hstack(y_pred), output_dict=True),
        "loss": test_loss.item(),
        "accuracy": accuracy_test,
        "classification_report": classification_report(y_test, y_pred_test, output_dict=True),
    }, step=epoch)

wandb.finish()


  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

0,1
accuracy,▁▃▁▂▃▄▃▅▅▅▆▆▅▅▄▅▆▅▇▆▆▇▇▇▇▇▇▇█▇██▇▇██████
loss,█▇▇▇▆▆▆▅▅▄▃▃▃▄▆▄▃▄▃▃▃▂▂▂▂▂▂▁▁▂▁▁▂▂▁▁▁▁▁▁
train_accuracy,▁▆▇▇▇▇██████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.9341
loss,0.23238
train_accuracy,0.998
train_loss,0.00562


Будем перебирать гиперпараметры для 2 последних моделей

In [7]:
import wandb

sweep_config = {
    'method': 'random', 
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'   
    },
    'parameters': {
        'hidden_dim': {
            'values': [128, 256, 512]
        },
        'num_layers': {
            'values': [1, 2, 3]
        },
        'lr': {
            'min': 3e-4,
            'max': 1e-3
        },
        'normalize': {
            'values': ['BatchNorm1d', 'LayerNorm']
        },
        'activation': {
            'values': ['ReLU', 'GELU', 'LeakyReLU']
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="kmnist")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Create sweep with ID: i57r7xxt
Sweep URL: https://wandb.ai/andre7416/kmnist/sweeps/i57r7xxt


In [8]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

def train(config=None):
    with wandb.init(config=config, project="kmnist") as run:
        config = wandb.config
        
        norm_dict = {
            'BatchNorm1d': nn.BatchNorm1d,
            'LayerNorm': nn.LayerNorm
        }
        act_dict = {
            'ReLU': nn.ReLU,
            'GELU': nn.GELU,
            'LeakyReLU': nn.LeakyReLU
        }
        
        mlp = MLP(
            hidden_dim=config.hidden_dim,
            num_layers=config.num_layers,
            normalize=norm_dict[config.normalize],
            activation=act_dict[config.activation]
        )
        mlp = mlp.to(device)
        optimizer = torch.optim.Adam(mlp.parameters(), lr=config.lr)
        criterion = nn.CrossEntropyLoss()
    
        for epoch in range(201):
            mlp.train()
            optimizer.zero_grad()
            X = train_aug(torch.from_numpy(x_train).unsqueeze(1)).squeeze(1).reshape(-1, 784).to(device=device, dtype=torch.float32) / 255
            y = torch.from_numpy(y_train).to(device)
            logits = mlp(X)
            y_pred = logits.argmax(-1).cpu().numpy()
            accuracy = accuracy_score(y_train, y_pred)
            loss = criterion(logits, y)
            loss.backward()
            optimizer.step()
            mlp.eval()
            test_logits = mlp(torch.from_numpy(x_test).to(device, dtype=torch.float32))
            test_loss = criterion(test_logits, torch.from_numpy(y_test).to(device))
            y_pred_test = test_logits.argmax(-1).cpu().numpy()
            accuracy_test = accuracy_score(y_test, y_pred_test)
            wandb.log({
                "train_loss": loss.item(),
                "train_accuracy": accuracy,
                "train_classification_report": classification_report(y_train, y_pred, output_dict=True),
                "loss": test_loss.item(),
                "accuracy": accuracy_test,
                "classification_report": classification_report(y_test, y_pred_test, output_dict=True),
            }, step=epoch)

In [9]:
wandb.agent(sweep_id, train, count=10)

[34m[1mwandb[0m: Agent Starting Run: xhbyqw7z with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	lr: 0.0004795473906940611
[34m[1mwandb[0m: 	normalize: BatchNorm1d
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: Currently logged in as: [33mandre7416[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

0,1
accuracy,▁▁▂▃▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█████████████████
loss,█▇▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▄▅▅▅▆▇▇▇▇▇▇▇▇▇█▇▇██▇██▇▇██▇████████████
train_loss,█▆▅▅▄▃▃▃▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.889
loss,0.37156
train_accuracy,0.9363
train_loss,0.20957


[34m[1mwandb[0m: Agent Starting Run: 39ueu88v with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	lr: 0.0006016926506848145
[34m[1mwandb[0m: 	normalize: BatchNorm1d
[34m[1mwandb[0m: 	num_layers: 1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0,1
accuracy,▁▂▂▃▃▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████████████████
loss,██▇▇▆▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▃▃▄▃▅▅▅▆▆▆▇▆▇▇▇▆▇▇▇▇▆▇▇▇▇▇██▇▇█▇██▇████
train_loss,█▆▅▄▄▄▃▃▂▃▂▂▃▃▂▂▂▂▂▂▁▂▂▂▂▂▁▃▂▁▁▁▁▁▂▁▁▁▁▁

0,1
accuracy,0.8945
loss,0.34687
train_accuracy,0.95668
train_loss,0.14731


[34m[1mwandb[0m: Agent Starting Run: vzmj5okv with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	lr: 0.0005749151334635517
[34m[1mwandb[0m: 	normalize: LayerNorm
[34m[1mwandb[0m: 	num_layers: 3


0,1
accuracy,▁▃▃▄▄▄▅▆▆▆▆▆▇▇▆▇▇▇▇▇▇▇▇▇▇███████████████
loss,█▅▄▄▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▄▄▅▅▆▆▅▇▆▇▆▇▇▇▇▇▇▇▇██▇▇███████████▇████
train_loss,█▆▅▅▄▄▃▃▂▃▃▂▂▂▂▂▁▁▂▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.9066
loss,0.33996
train_accuracy,0.95175
train_loss,0.15672


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: v6zzovht with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	lr: 0.0006668872202251885
[34m[1mwandb[0m: 	normalize: BatchNorm1d
[34m[1mwandb[0m: 	num_layers: 3


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

0,1
accuracy,▁▂▄▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████████
loss,█████▇▆▅▅▄▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▆▆▆▆▇▇▇▇▇▇▇███████▇█████████▇█▇███████
train_loss,███▇▆▅▅▄▄▄▃▂▃▂▂▂▃▂▃▂▂▂▂▁▂▂▂▂▁▂▁▂▁▂▁▂▁▁▁▂

0,1
accuracy,0.8776
loss,0.40642
train_accuracy,0.91423
train_loss,0.27628


[34m[1mwandb[0m: Agent Starting Run: 2u9a1zrb with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	lr: 0.0008143556726045694
[34m[1mwandb[0m: 	normalize: BatchNorm1d
[34m[1mwandb[0m: 	num_layers: 3


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

0,1
accuracy,▁▃▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇███████████████
loss,█████▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▃▄▇▇▇▇▇▇▇▇▇██▇██▇▇█████████▇███████████
train_loss,█▆█▆▅▃▃▃▃▃▂▂▃▂▃▂▃▃▂▂▂▂▂▂▂▂▃▂▂▂▂▁▂▁▁▁▂▁▂▁

0,1
accuracy,0.8938
loss,0.35997
train_accuracy,0.94247
train_loss,0.18835


[34m[1mwandb[0m: Agent Starting Run: q8sk4axr with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	lr: 0.0009871253638310006
[34m[1mwandb[0m: 	normalize: LayerNorm
[34m[1mwandb[0m: 	num_layers: 1


0,1
accuracy,▁▃▃▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████████████
loss,█▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▂▄▄▄▅▆▆▆▅▆▇▆▆▆▆▆▆▇▇▆▇▇█▇▇█▇██▇▇███████
train_loss,█▆▆▅▄▄▄▃▄▃▄▃▃▃▂▂▂▂▂▂▂▁▃▁▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁

0,1
accuracy,0.9006
loss,0.33173
train_accuracy,0.95665
train_loss,0.1466


[34m[1mwandb[0m: Agent Starting Run: 2lksgff5 with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	lr: 0.0007788027158068347
[34m[1mwandb[0m: 	normalize: BatchNorm1d
[34m[1mwandb[0m: 	num_layers: 1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

0,1
accuracy,▁▃▄▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇█████████████████████
loss,█▇▅▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▄▄▄▄▅▅▆▇▆▆▇▇▇▇▇▇▇▇▇▇█▇▇▇▇█▇▇▇▇████████
train_loss,█▇▇▅▅▅▃▃▄▃▃▃▄▄▃▂▂▂▂▂▂▂▁▂▁▁▂▂▂▁▂▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.8671
loss,0.43564
train_accuracy,0.9231
train_loss,0.24737


[34m[1mwandb[0m: Agent Starting Run: 32pimypl with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	lr: 0.0008786721366967098
[34m[1mwandb[0m: 	normalize: LayerNorm
[34m[1mwandb[0m: 	num_layers: 3


0,1
accuracy,▁▂▂▃▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇█████████████████████
loss,█▆▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▃▆▆▆▇▇▇▇▇▇▇▇████▇▇█████████████████████
train_loss,█▇▆▅▅▄▃▄▃▄▃▃▂▂▂▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▂▁▁▂▁▂▁▂▁

0,1
accuracy,0.9179
loss,0.3068
train_accuracy,0.95098
train_loss,0.15816


[34m[1mwandb[0m: Agent Starting Run: rljcffbx with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	lr: 0.0006440181083852854
[34m[1mwandb[0m: 	normalize: LayerNorm
[34m[1mwandb[0m: 	num_layers: 1


0,1
accuracy,▁▃▄▄▆▆▆▆▆▆▆▆▇▆▆▇▇▇▇▇▇▇▇▇████████████████
loss,█▆▆▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▃▄▂▅▅▆▆▆▆▆▆▆▇▅▇▇▇▇▇▇▇▇█▇██▇█▇████████▇
train_loss,█▆▃▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.8985
loss,0.34166
train_accuracy,0.95537
train_loss,0.14782


[34m[1mwandb[0m: Agent Starting Run: kf9x73k4 with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	lr: 0.0009567012809216408
[34m[1mwandb[0m: 	normalize: BatchNorm1d
[34m[1mwandb[0m: 	num_layers: 1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0,1
accuracy,▁▁▂▂▄▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇██▇███████████████
loss,█▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▂▂▃▄▅▅▅▅▆▆▆▆▅▆▆▅▆▇▇▇▅▇▇▇▇▇▇▇█▇██▇▇▇███
train_loss,█▇▆▆▅▃▃▄▃▃▃▂▃▂▂▂▂▂▂▂▁▃▂▂▂▁▁▁▂▁▂▁▂▁▂▁▁▁▁▁

0,1
accuracy,0.8837
loss,0.37918
train_accuracy,0.92873
train_loss,0.22934


In [13]:
import wandb

sweep_config = {
    'method': 'bayes', 
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'   
    },
    'parameters': {
        'hidden_dim': {
            'values': [64, 128, 256, 512]
        },
        'chan_dim': {
            'values': [16, 32, 64]
        },
        'lr': {
            'min': 3e-4,
            'max': 1e-3
        },
        'activation': {
            'values': ['ReLU', 'GELU', 'LeakyReLU']
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="kmnist")

Create sweep with ID: 9oc2aqea
Sweep URL: https://wandb.ai/andre7416/kmnist/sweeps/9oc2aqea


In [14]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

def train(config=None):
    with wandb.init(config=config, project="kmnist") as run:
        config = wandb.config
        
        norm_dict = {
            'BatchNorm2d': nn.BatchNorm2d,
            'LayerNorm': nn.LayerNorm
        }
        act_dict = {
            'ReLU': nn.ReLU,
            'GELU': nn.GELU,
            'LeakyReLU': nn.LeakyReLU
        }
        
        cnn = CNN(
            hidden_dim=config.hidden_dim,
            chan_dim=config.chan_dim,
            activation=act_dict[config.activation]
        )
        cnn = cnn.to(device)
        optimizer = torch.optim.Adam(cnn.parameters(), lr=config.lr)
        criterion = nn.CrossEntropyLoss()
    
        for epoch in range(51):
            cnn.train()
            train_loss = 0
            y_pred = []
            for X, y in train_loader:
                optimizer.zero_grad()
                X = X.to(device)
                y = y.to(device)
                logits = cnn(X)
                y_pred.append(logits.argmax(-1).cpu().numpy())
                loss = criterion(logits, y)
                train_loss += loss.item() * X.shape[0]
                loss.backward()
                optimizer.step()
            cnn.eval()
            test_logits = cnn(torch.from_numpy(x_test).unsqueeze(1).to(device, dtype=torch.float32)) / 255
            test_loss = criterion(test_logits, torch.from_numpy(y_test).to(device))
            y_pred_test = test_logits.argmax(-1).cpu().numpy()
            accuracy_test = accuracy_score(y_test, y_pred_test)
            accuracy = accuracy_score(y_train, np.hstack(y_pred))
            
            wandb.log({
                "train_loss": train_loss / len(train_dataset),
                "train_accuracy": accuracy,
                "train_classification_report": classification_report(y_train, np.hstack(y_pred), output_dict=True),
                "loss": test_loss.item(),
                "accuracy": accuracy_test,
                "classification_report": classification_report(y_test, y_pred_test, output_dict=True),
            }, step=epoch)

In [15]:
wandb.agent(sweep_id, train, count=10)

[34m[1mwandb[0m: Agent Starting Run: 9vjumrqd with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	chan_dim: 16
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	lr: 0.0004539636301262055


0,1
accuracy,▁▂▄▃▅▅▆▅▆▆▆▆▆▆▇▇▇▆▆▆▆▇▇▇█▇▆▇▇▇▇▇███▇▇██▇
loss,█▇▆▆▅▅▅▄▃▄▃▄▄▄▃▃▃▂▃▃▃▂▂▂▂▁▂▃▂▁▂▂▁▁▁▂▂▁▁▁
train_accuracy,▁▆▇▇▇▇▇█████████████████████████████████
train_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.7725
loss,0.69651
train_accuracy,0.99173
train_loss,0.02553


[34m[1mwandb[0m: Agent Starting Run: gpqr8hcp with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	lr: 0.0006723540461438908


0,1
accuracy,▄▃▁▁▃▄▄▅▆▆▆▆▆▆▆▇▇▇▇▆▇▇▇██▇▇██▇█▇▇██████▇
loss,█▄▅▆▅▄▄▄▃▂▃▃▂▃▂▁▂▂▂▂▂▂▂▂▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁
train_accuracy,▁▆▇▇████████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.9132
loss,0.3315
train_accuracy,0.9987
train_loss,0.00392


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 152v5jmy with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	lr: 0.0008797255661611143


0,1
accuracy,▁▃▅▆▆▆▅▆▇▅▇▆▆▇▇▇█▆▇▇▇▇▇▇▇▆▇▇▇▇▇▇█▇▇█████
loss,█▆▅▄▄▄▄▃▄▃▃▃▃▂▂▂▃▂▂▂▃▂▃▂▂▂▃▂▂▂▂▂▂▂▂▂▁▁▁▁
train_accuracy,▁▆▇▇▇███████████████████████████████████
train_loss,█▆▄▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.904
loss,0.34781
train_accuracy,0.99837
train_loss,0.00543


[34m[1mwandb[0m: Agent Starting Run: wwdst1z7 with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	lr: 0.0009628427292292732


0,1
accuracy,▁▃▆▅▆▆▆▆▇▆▇▇▇▇▆▇▆█▇█▇▆▇█▇▆▆▇▇█▇▇█▇███▇▇█
loss,█▅▃▄▄▃▃▃▂▃▂▂▁▂▂▂▂▃▁▁▂▃▂▂▁▂▃▃▂▁▁▁▂▁▂▂▁▂▂▁
train_accuracy,▁▃▅▆▆▇▇▇▇▇██▇███████████████████████████
train_loss,█▆▄▄▃▃▃▂▂▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.926
loss,0.25822
train_accuracy,0.99827
train_loss,0.00556


[34m[1mwandb[0m: Agent Starting Run: xibcbqk0 with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	lr: 0.0009921449006443923


0,1
accuracy,▁▁▄▂▁▄▅▅▄▆▆▅▇▅▆▆▇▆▆▇▇▆▇▇███████▇█▇█▇▇▇██
loss,▇▇▅▆█▅▅▄▆▄▃▄▃▄▃▃▃▂▂▃▁▂▃▂▂▂▁▁▁▁▁▁▁▂▂▂▂▂▁▁
train_accuracy,▁▆▇▇▇███████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.9288
loss,0.26789
train_accuracy,0.99857
train_loss,0.00484


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 22ocjt80 with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	chan_dim: 32
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	lr: 0.0009849299510960284


0,1
accuracy,▁▃▄▄▅▅▅▃▄▃▁▃▄▅▃▅▆▃▃▅▅▄▇▇▇▇▆▇▇█▇▇█▇▇▇▇▄▆▇
loss,▆▅▄▅▃▃▇▅▆▇▆▆▄▇▅▄█▆▄▄█▂▂▅▂▁▂▂▁▂▂▁▂▂▂▂▄▆▃▂
train_accuracy,▁▆▇▇▇▇██████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.8298
loss,0.56524
train_accuracy,0.99878
train_loss,0.00393


[34m[1mwandb[0m: Agent Starting Run: bkx0np9c with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	lr: 0.0009191885628344476


0,1
accuracy,▁▃▃▄▅▅▆▆▇▇▆▇▇▇▆▆▆▇▆▆▇▇▆▇▇█▇▇▇███▇███▇███
loss,█▇▇▇▄▃▄▃▃▂▃▂▂▂▃▃▃▂▃▃▂▃▂▂▂▁▂▂▁▁▂▁▂▁▁▂▂▁▁▁
train_accuracy,▁▆▇▇▇███████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.932
loss,0.27048
train_accuracy,0.99865
train_loss,0.0044


[34m[1mwandb[0m: Agent Starting Run: y1f0vtoo with config:
[34m[1mwandb[0m: 	activation: ReLU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 512
[34m[1mwandb[0m: 	lr: 0.0008945041304814919


0,1
accuracy,▁▃▄▆▅▆▆▄▆▅▆▇▆▇▆▇▆▇▇▇▆▇▇▇▇▇▇▇▇▇██████▇███
loss,█▅▅▃▄▃▃▆▃▃▃▃▃▂▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▁▁▂▂▁▁▁▁▂▁▁
train_accuracy,▁▆▇▇▇███████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.8685
loss,0.4385
train_accuracy,0.99917
train_loss,0.00283


[34m[1mwandb[0m: Agent Starting Run: z7dcsou3 with config:
[34m[1mwandb[0m: 	activation: LeakyReLU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	lr: 0.0009457000558576332


0,1
accuracy,▁▃▄▅▆▄▃▅▆▅▆▇▆▆▇▆█▇▆▇█▇▇█▇▇▇▇▇▇███▆▇▇██▇█
loss,█▇▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▂▁▁▁▂▁
train_accuracy,▁▆▇▇▇███████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.904
loss,0.51205
train_accuracy,0.99907
train_loss,0.00293


[34m[1mwandb[0m: Agent Starting Run: 2uno9x3f with config:
[34m[1mwandb[0m: 	activation: GELU
[34m[1mwandb[0m: 	chan_dim: 64
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	lr: 0.0009989429440366035


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0,1
accuracy,▁▃▄▃▅▅▆▅▆▇▆▆▆▆▅▇▆▇▆▇▇▆▇▇▇▇█▇███▇███▇████
loss,█▇▇▆▆▅▅▄▄▄▃▃▄▄▃▄▃▃▃▂▃▄▃▂▂▂▂▁▁▁▂▂▁▁▁▂▁▁▁▁
train_accuracy,▁▆▇▇▇███████████████████████████████████
train_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.9116
loss,0.32318
train_accuracy,0.99893
train_loss,0.00373
