In [1]:
import os
import torch
import numpy as np

from torch.utils.data import TensorDataset, DataLoader, Subset
import torchvision
from os import path

from src.utils import *
from src.models import *
from src.methods import *

data_path = path.join(os.getcwd(), 'project_data/')

USE_GPU = True
if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
print(device)

cuda


In [2]:
X_test = np.load(data_path + "X_test.npy")
y_test = np.load(data_path + "y_test.npy") - 769
person_train_valid = np.load(data_path + "person_train_valid.npy")
X_train_valid = np.load(data_path + "X_train_valid.npy")
y_train_valid = np.load(data_path + "y_train_valid.npy") - 769
person_test = np.load(data_path + "person_test.npy")

In [3]:
print ('Training/Valid data shape: {}'.format(X_train_valid.shape))
print ('Test data shape: {}'.format(X_test.shape))
print ('Training/Valid target shape: {}'.format(y_train_valid.shape))
print ('Test target shape: {}'.format(y_test.shape))
print ('Person train/valid shape: {}'.format(person_train_valid.shape))
print ('Person test shape: {}'.format(person_test.shape))

Training/Valid data shape: (2115, 22, 1000)
Test data shape: (443, 22, 1000)
Training/Valid target shape: (2115,)
Test target shape: (443,)
Person train/valid shape: (2115, 1)
Person test shape: (443, 1)


In [4]:
X_train_valid = X_train_valid[:, np.newaxis, :, :]
X_test = X_test[:, np.newaxis, :, :]

In [5]:
X_train_valid = torch.from_numpy(X_train_valid).float()
y_train_valid = torch.from_numpy(y_train_valid).long()
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).long()

train_valid_dataset = TensorDataset(X_train_valid, y_train_valid)
test_dataset = TensorDataset(X_test, y_test)

# Splitting the dataset into train and valid sets
num_train = int(0.8 * len(train_valid_dataset))
num_valid = len(train_valid_dataset) - num_train
train_indices, valid_indices = random_split(range(len(train_valid_dataset)), [num_train, num_valid])

train_dataset = Subset(train_valid_dataset, train_indices)
valid_dataset = Subset(train_valid_dataset, valid_indices)

# Wrapping datasets with GaussianNoisyDataset
train_dataset_noisy = GaussianNoisyDataset(train_dataset, mean=0., std=1.)
valid_dataset_noisy = GaussianNoisyDataset(valid_dataset, mean=0., std=1.)
test_dataset_noisy = GaussianNoisyDataset(test_dataset, mean=0., std=1.)

batch_size = 32
train_loader = DataLoader(train_dataset_noisy, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset_noisy, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset_noisy, batch_size=batch_size, shuffle=False)

print('Training/Valid data shape:', X_train_valid.shape)
print('Test data shape:', X_test.shape)

Training/Valid data shape: torch.Size([2115, 1, 22, 1000])
Test data shape: torch.Size([443, 1, 22, 1000])


In [6]:
model = ConvNet().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001, betas=(0.9, 0.99), eps=1e-6, weight_decay=0.0005)

train(model, train_loader, valid_loader, criterion, optimizer, device=device, epochs=50)

test_accuracy = evaluate(model, test_loader, device=device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

 20%|██        | 10/50 [00:21<01:26,  2.15s/it]

Epoch [10/50], Loss: 0.4634, Validation Accuracy: 52.25%


 40%|████      | 20/50 [00:44<01:09,  2.32s/it]

Epoch [20/50], Loss: 0.2118, Validation Accuracy: 51.06%


 48%|████▊     | 24/50 [00:55<00:59,  2.30s/it]

Early stopping!
Test Accuracy: 50.56%





In [6]:
lstm = LSTMModel()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr = 0.001, betas=(0.9, 0.99), eps=1e-6, weight_decay=0.0005)

lstm = lstm.to(device)
train(lstm, train_loader, valid_loader, criterion, optimizer, device=device)

test_accuracy = evaluate(lstm, test_loader, device=device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

 50%|█████     | 10/20 [02:30<02:32, 15.29s/it]

Epoch [10/20], Loss: 1.2707, Validation Accuracy: 27.90%


100%|██████████| 20/20 [04:42<00:00, 14.12s/it]

Epoch [20/20], Loss: 1.0690, Validation Accuracy: 28.84%





Test Accuracy: 26.19%


In [14]:
cnn_lstm = CNN_LSTM()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn_lstm.parameters(), lr = 0.001, betas=(0.9, 0.99), eps=1e-6)

cnn_lstm = cnn_lstm.to(device)
train(cnn_lstm, train_loader, valid_loader, criterion, optimizer, device=device, epochs=50)

test_accuracy = evaluate(cnn_lstm, test_loader, device=device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

 20%|██        | 10/50 [00:06<00:27,  1.45it/s]

Epoch [10/50], Loss: 1.2336, Validation Accuracy: 39.01%


 40%|████      | 20/50 [00:16<00:33,  1.10s/it]

Epoch [20/50], Loss: 1.0758, Validation Accuracy: 47.75%


 60%|██████    | 30/50 [00:24<00:14,  1.37it/s]

Epoch [30/50], Loss: 0.9771, Validation Accuracy: 52.72%


 80%|████████  | 40/50 [00:33<00:07,  1.40it/s]

Epoch [40/50], Loss: 0.8258, Validation Accuracy: 52.72%


100%|██████████| 50/50 [00:41<00:00,  1.20it/s]

Epoch [50/50], Loss: 0.7014, Validation Accuracy: 52.72%
Test Accuracy: 53.95%





In [15]:
ult_cnn = UltimateConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(ult_cnn.parameters(), lr = 0.001, betas=(0.9, 0.99), eps=1e-6, weight_decay=0.0005)

ult_cnn = ult_cnn.to(device)
train(ult_cnn, train_loader, valid_loader, criterion, optimizer, device=device, epochs=200)

test_accuracy = evaluate(ult_cnn, test_loader, device=device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

  5%|▌         | 10/200 [00:20<06:22,  2.01s/it]

Epoch [10/200], Loss: 1.0880, Validation Accuracy: 56.97%


 10%|█         | 20/200 [00:40<06:02,  2.02s/it]

Epoch [20/200], Loss: 0.9396, Validation Accuracy: 64.07%


 15%|█▌        | 30/200 [01:01<05:47,  2.04s/it]

Epoch [30/200], Loss: 0.8465, Validation Accuracy: 67.14%


 20%|██        | 40/200 [01:21<05:32,  2.08s/it]

Epoch [40/200], Loss: 0.8096, Validation Accuracy: 69.98%


 25%|██▌       | 50/200 [01:41<05:03,  2.02s/it]

Epoch [50/200], Loss: 0.7547, Validation Accuracy: 69.98%


 30%|███       | 60/200 [02:03<05:06,  2.19s/it]

Epoch [60/200], Loss: 0.7272, Validation Accuracy: 73.52%


 35%|███▌      | 70/200 [02:24<04:40,  2.16s/it]

Epoch [70/200], Loss: 0.7055, Validation Accuracy: 70.69%


 40%|████      | 80/200 [02:45<04:04,  2.04s/it]

Epoch [80/200], Loss: 0.6401, Validation Accuracy: 74.47%


 45%|████▌     | 90/200 [03:05<03:44,  2.04s/it]

Epoch [90/200], Loss: 0.6358, Validation Accuracy: 72.58%


 50%|█████     | 100/200 [03:25<03:24,  2.04s/it]

Epoch [100/200], Loss: 0.6173, Validation Accuracy: 69.27%


 55%|█████▌    | 110/200 [03:45<03:03,  2.04s/it]

Epoch [110/200], Loss: 0.5970, Validation Accuracy: 71.63%


 60%|██████    | 120/200 [04:06<02:43,  2.05s/it]

Epoch [120/200], Loss: 0.6042, Validation Accuracy: 71.87%


 65%|██████▌   | 130/200 [04:26<02:22,  2.04s/it]

Epoch [130/200], Loss: 0.5575, Validation Accuracy: 73.52%


 70%|███████   | 140/200 [04:46<02:02,  2.04s/it]

Epoch [140/200], Loss: 0.5397, Validation Accuracy: 73.29%


 72%|███████▎  | 145/200 [04:58<01:53,  2.06s/it]

Early stopping!
Test Accuracy: 71.56%





In [11]:
# utilize ray tune to find best hyperparameters
import ray
from ray import tune
from ray import train as raytrain
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from src.methods_ray import train_ray_Ultimateconfig

config = {
    'lr': tune.loguniform(1e-4, 1e-2),
    'weight_decay': tune.loguniform(1e-4, 1e-3)
}

# Initialize Ray Tune
ray.shutdown()
ray.init()
scheduler = ASHAScheduler(metric='loss', mode='min')
reporter = CLIReporter(metric_columns=['loss'])

# Execute the hyperparameter search
analysis = tune.run(
    train_ray_Ultimateconfig,
    resources_per_trial={'gpu': 1},  # If you want to use GPUs, you can set {'gpu': 1} instead
    config=config,
    num_samples=100,
    scheduler=scheduler,
    progress_reporter=reporter
)

# Get the best hyperparameters
best_config = analysis.get_best_config(metric='loss', mode='min')
print('Best hyperparameters found are: ', best_config)

# get best accuracy using tune config


2024-03-08 10:31:40,056	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-08 10:31:41,542	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-03-08 10:31:41 (running for 00:00:00.35)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: None | Iter 1.000: None
Logical resource usage: 0/40 CPUs, 0/8 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/banyh2000/ray_results/train_ray_Ultimateconfig_2024-03-08_10-31-41
Number of trials: 44/100 (44 PENDING)
+--------------------------------------+----------+-------+-------------+----------------+
| Trial name                           | status   | loc   |          lr |   weight_decay |
|--------------------------------------+----------+-------+-------------+----------------|
| train_ray_Ultimateconfig_1bb25_00000 | PENDING  |       | 0.000257568 |    0.000835305 |
| train_ray_Ultimateconfig_1bb25_00001 | PENDING  |       | 0.00295947  |    0.000181362 |
| train_ray_Ultimateconfig_1bb25_00002 | PENDING  |       | 0.000109542 |    0.000824365 |
| train_ray_Ultimateconfig_1bb25_00003 | PENDING  |       | 0

Trial name,loss
train_ray_Ultimateconfig_1bb25_00000,0.780561
train_ray_Ultimateconfig_1bb25_00001,0.775812
train_ray_Ultimateconfig_1bb25_00002,1.17109
train_ray_Ultimateconfig_1bb25_00003,1.34017
train_ray_Ultimateconfig_1bb25_00004,1.33698
train_ray_Ultimateconfig_1bb25_00005,1.3381
train_ray_Ultimateconfig_1bb25_00006,1.24223
train_ray_Ultimateconfig_1bb25_00007,1.37293
train_ray_Ultimateconfig_1bb25_00008,0.757649
train_ray_Ultimateconfig_1bb25_00009,1.35415


== Status ==
Current time: 2024-03-08 10:31:52 (running for 00:00:10.51)
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: None | Iter 1.000: -1.3344119409579194
Logical resource usage: 0/40 CPUs, 8.0/8 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/banyh2000/ray_results/train_ray_Ultimateconfig_2024-03-08_10-31-41
Number of trials: 52/100 (44 PENDING, 4 RUNNING, 4 TERMINATED)
+--------------------------------------+------------+----------------------+-------------+----------------+---------+
| Trial name                           | status     | loc                  |          lr |   weight_decay |    loss |
|--------------------------------------+------------+----------------------+-------------+----------------+---------|
| train_ray_Ultimateconfig_1bb25_00000 | RUNNING    | 131.179.88.212:20088 | 0.000257568 |    0.000835305 | 1.27929 |
| train_ray_Ultimateconfig_1bb25_00001 | RUNNING    | 131.179.88.212:20089 | 0.00295947  |   

2024-03-08 10:36:35,600	INFO tune.py:1042 -- Total run time: 294.06 seconds (294.02 seconds for the tuning loop).


== Status ==
Current time: 2024-03-08 10:36:35 (running for 00:04:54.03)
Using AsyncHyperBand: num_stopped=100
Bracket: Iter 64.000: -0.7732875917115674 | Iter 16.000: -0.9874656955682921 | Iter 4.000: -1.186904677262543 | Iter 1.000: -1.3223708301571244
Logical resource usage: 0/40 CPUs, 1.0/8 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/banyh2000/ray_results/train_ray_Ultimateconfig_2024-03-08_10-31-41
Number of trials: 100/100 (100 TERMINATED)
+--------------------------------------+------------+----------------------+-------------+----------------+----------+
| Trial name                           | status     | loc                  |          lr |   weight_decay |     loss |
|--------------------------------------+------------+----------------------+-------------+----------------+----------|
| train_ray_Ultimateconfig_1bb25_00000 | TERMINATED | 131.179.88.212:20088 | 0.000257568 |    0.000835305 | 0.780561 |
| train_ray_Ultimateconfig_1bb25_00001 | TERMINATED | 131.179.8

In [12]:
# run with best hyperparameters
ult_cnn = UltimateConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(ult_cnn.parameters(), lr = best_config['lr'].real, betas=(0.9, 0.99), eps=1e-6, weight_decay=best_config['weight_decay'].real)

ult_cnn = ult_cnn.to(device)
train(ult_cnn, train_loader, valid_loader, criterion, optimizer, device=device, epochs=100,patience=100)

test_accuracy = evaluate(ult_cnn, test_loader, device=device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

  0%|          | 0/100 [00:00<?, ?it/s]

 10%|█         | 10/100 [00:11<01:48,  1.20s/it]

Epoch [10/100], Loss: 1.0858, Validation Accuracy: 60.05%


 20%|██        | 20/100 [00:24<01:41,  1.27s/it]

Epoch [20/100], Loss: 0.9935, Validation Accuracy: 65.01%


 30%|███       | 30/100 [00:35<01:23,  1.19s/it]

Epoch [30/100], Loss: 0.8718, Validation Accuracy: 65.96%


 40%|████      | 40/100 [00:47<01:11,  1.19s/it]

Epoch [40/100], Loss: 0.8201, Validation Accuracy: 68.79%


 50%|█████     | 50/100 [00:59<01:00,  1.20s/it]

Epoch [50/100], Loss: 0.8163, Validation Accuracy: 68.79%


 60%|██████    | 60/100 [01:10<00:47,  1.19s/it]

Epoch [60/100], Loss: 0.7358, Validation Accuracy: 70.21%


 70%|███████   | 70/100 [01:23<00:38,  1.28s/it]

Epoch [70/100], Loss: 0.6984, Validation Accuracy: 70.21%


 80%|████████  | 80/100 [01:34<00:23,  1.19s/it]

Epoch [80/100], Loss: 0.6616, Validation Accuracy: 69.74%


 90%|█████████ | 90/100 [01:46<00:11,  1.19s/it]

Epoch [90/100], Loss: 0.6502, Validation Accuracy: 72.34%


100%|██████████| 100/100 [01:58<00:00,  1.18s/it]

Epoch [100/100], Loss: 0.5971, Validation Accuracy: 71.39%
Test Accuracy: 69.75%





In [6]:
import ray
from ray import tune
from ray import train as raytrain
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from src.methods_ray import train_ray_Ultimateconfig
from ray.tune.search.bayesopt import BayesOptSearch
ray.shutdown()
ray.init()
scheduler = ASHAScheduler(metric='loss', mode='min', max_t= 250)
reporter = CLIReporter(metric_columns=['loss'])
bayesopt = BayesOptSearch(
    metric='loss',
    mode='min',
)

config = {
    'lr': tune.loguniform(1e-4, 1e-2),
    'weight_decay': tune.loguniform(1e-4, 1e-3)
}


analysis = tune.run(
    train_ray_Ultimateconfig,
    resources_per_trial={'gpu': 1},  # If you want to use GPUs, you can set {'gpu': 1} instead
    config=config,
    num_samples=20,
    search_alg=bayesopt,
    scheduler=scheduler,
    progress_reporter=reporter
)

# Get the best hyperparameters
best_config = analysis.get_best_config(metric='loss', mode='min')
print('Best hyperparameters found are: ', best_config)

2024-03-08 10:43:50,802	INFO worker.py:1724 -- Started a local Ray instance.
2024-03-08 10:43:52,179	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


== Status ==
Current time: 2024-03-08 10:43:52 (running for 00:00:00.15)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: None | Iter 1.000: None
Logical resource usage: 0/40 CPUs, 0/8 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/banyh2000/ray_results/train_ray_Ultimateconfig_2024-03-08_10-43-52
Number of trials: 1/1000 (1 PENDING)
+-----------------------------------+----------+-------+------------+----------------+
| Trial name                        | status   | loc   |         lr |   weight_decay |
|-----------------------------------+----------+-------+------------+----------------|
| train_ray_Ultimateconfig_b607210d | PENDING  |       | 0.00380795 |    0.000955643 |
+-----------------------------------+----------+-------+------------+----------------+


== Status ==
Current time: 2024-03-08 10:43:57 (running for 00:00:05.21)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: N

Trial name,loss
train_ray_Ultimateconfig_01d2964c,1.06836
train_ray_Ultimateconfig_03330f67,1.33648
train_ray_Ultimateconfig_07348f16,1.35008
train_ray_Ultimateconfig_25c2624d,1.17291
train_ray_Ultimateconfig_3d59af18,1.18564
train_ray_Ultimateconfig_4413bb2b,1.34318
train_ray_Ultimateconfig_577edbc0,1.33977
train_ray_Ultimateconfig_663a1481,1.35791
train_ray_Ultimateconfig_b607210d,0.876586
train_ray_Ultimateconfig_ea1e2f23,1.17551


== Status ==
Current time: 2024-03-08 10:44:02 (running for 00:00:10.27)
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 64.000: None | Iter 16.000: None | Iter 4.000: -1.1737329158095322 | Iter 1.000: -1.3364956967585475
Logical resource usage: 0/40 CPUs, 3.0/8 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/banyh2000/ray_results/train_ray_Ultimateconfig_2024-03-08_10-43-52
Number of trials: 4/1000 (1 PENDING, 2 RUNNING, 1 TERMINATED)
+-----------------------------------+------------+----------------------+-------------+----------------+---------+
| Trial name                        | status     | loc                  |          lr |   weight_decay |    loss |
|-----------------------------------+------------+----------------------+-------------+----------------+---------|
| train_ray_Ultimateconfig_b607210d | RUNNING    | 131.179.88.212:38956 | 0.00380795  |    0.000955643 | 1.17373 |
| train_ray_Ultimateconfig_ea1e2f23 | RUNNING    | 131.179.88.212:39150 | 0.00164458  |    

2024-03-08 10:44:35,144	INFO bayesopt_search.py:293 -- Skipping duplicated config: {'lr': 0.0064901757294809626, 'weight_decay': 0.00025939961146634403}.


== Status ==
Current time: 2024-03-08 10:44:37 (running for 00:00:45.56)
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 64.000: None | Iter 16.000: -1.0353157251157377 | Iter 4.000: -1.172910097924812 | Iter 1.000: -1.3321297526289104
Logical resource usage: 0/40 CPUs, 1.0/8 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/banyh2000/ray_results/train_ray_Ultimateconfig_2024-03-08_10-43-52
Number of trials: 10/1000 (1 RUNNING, 9 TERMINATED)
+-----------------------------------+------------+----------------------+-------------+----------------+----------+
| Trial name                        | status     | loc                  |          lr |   weight_decay |     loss |
|-----------------------------------+------------+----------------------+-------------+----------------+----------|
| train_ray_Ultimateconfig_b607210d | RUNNING    | 131.179.88.212:38956 | 0.00380795  |    0.000955643 | 0.862208 |
| train_ray_Ultimateconfig_07348f16 | TERMINATED | 131.179.88.212:39032 | 0.0073467

2024-03-08 10:44:45,235	INFO bayesopt_search.py:293 -- Skipping duplicated config: {'lr': 0.0038249698967560662, 'weight_decay': 0.00017515064502880188}.


== Status ==
Current time: 2024-03-08 10:44:45 (running for 00:00:53.14)
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 64.000: None | Iter 16.000: -1.0353157251157377 | Iter 4.000: -1.172910097924812 | Iter 1.000: -1.3321297526289104
Logical resource usage: 0/40 CPUs, 1.0/8 GPUs (0.0/1.0 accelerator_type:G)
Result logdir: /home/banyh2000/ray_results/train_ray_Ultimateconfig_2024-03-08_10-43-52
Number of trials: 10/1000 (1 RUNNING, 9 TERMINATED)
+-----------------------------------+------------+----------------------+-------------+----------------+----------+
| Trial name                        | status     | loc                  |          lr |   weight_decay |     loss |
|-----------------------------------+------------+----------------------+-------------+----------------+----------|
| train_ray_Ultimateconfig_b607210d | RUNNING    | 131.179.88.212:38956 | 0.00380795  |    0.000955643 | 0.852913 |
| train_ray_Ultimateconfig_07348f16 | TERMINATED | 131.179.88.212:39032 | 0.0073467

2024-03-08 10:44:45,715	INFO tune.py:1042 -- Total run time: 53.54 seconds (53.14 seconds for the tuning loop).
Resume experiment with: tune.run(..., resume=True)


Best hyperparameters found are:  {'lr': 0.003807947176588889, 'weight_decay': 0.0009556428757689245}


In [None]:
dfs = analysis.trial_dataframes
import matplotlib.pyplot as plt

for d in dfs.values():
    # TODO: Plot the results
    plt.plot(d['training_iteration'], d['loss'], label=d['config/lr'][0])

plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.title('Trial Progress Over Time')
plt.show()

In [10]:
vit = ViT(patch_size=(22, 1), num_classes=4, dim=64, num_head=8, num_layers = 2)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(vit.parameters(), lr=0.01, momentum=0.9)

vit = vit.to(device)
train(vit, train_loader, valid_loader, criterion, optimizer, device=device, epochs=200)

test_accuracy = evaluate(vit, test_loader, device=device)
print(f'Test Accuracy: {test_accuracy:.2f}%')

  0%|          | 0/200 [00:00<?, ?it/s]

  5%|▌         | 10/200 [01:38<31:33,  9.96s/it]

Epoch [10/200], Loss: 1.3946, Validation Accuracy: 24.59%


 10%|█         | 20/200 [03:16<29:49,  9.94s/it]

Epoch [20/200], Loss: 1.3668, Validation Accuracy: 28.37%


 15%|█▌        | 30/200 [04:55<28:12,  9.96s/it]

Epoch [30/200], Loss: 1.3423, Validation Accuracy: 26.71%


 20%|██        | 40/200 [06:33<26:34,  9.96s/it]

Epoch [40/200], Loss: 1.3213, Validation Accuracy: 31.68%


 25%|██▌       | 50/200 [08:12<24:55,  9.97s/it]

Epoch [50/200], Loss: 1.3151, Validation Accuracy: 35.46%


 30%|███       | 60/200 [09:50<23:14,  9.96s/it]

Epoch [60/200], Loss: 1.2410, Validation Accuracy: 37.12%


 33%|███▎      | 66/200 [10:58<22:17,  9.98s/it]

Early stopping!





Test Accuracy: 33.86%
