In [1]:
from tree_dataset import TreeDataset
import model as m
from torch.utils.data import DataLoader
import os
import torchvision.transforms as transforms
from d2lvit import *
import copy
import torch.nn as nn

In [2]:
preprocess = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor()
])
train_set = TreeDataset(os.path.join('..', 'data', 'extra_variety_4k'), preprocess) 
val_set = TreeDataset(os.path.join('..', 'data', 'extra_variety_2k'), preprocess)
print(f'Train size: {len(train_set)} Val size: {len(val_set)}')
train_loader = DataLoader(train_set, batch_size=32)
val_loader = DataLoader(val_set, batch_size=32)
device = m.get_device()
config = {'labels_key': 'digit_labels'}

Train size: 4000 Val size: 2000
Identified CUDA device: NVIDIA GeForce RTX 3060


In [3]:
model = torch.load(os.path.join('..', 'models', 'd2lvit_3', 'digit-model.pt'))

In [4]:
train_acc = m.predict(model, train_loader, device, config, None)
print(train_acc)

1.0


In [5]:
val_acc = m.predict(model, val_loader, device, config, None)
print(val_acc)

1.0


In [6]:
digits_model = copy.deepcopy(model)

In [7]:
model.head = nn.Sequential(
    nn.LayerNorm((512,), eps=1e-05, elementwise_affine=True),
    nn.Linear(512, 45)
)
model = model.to(device)

In [8]:
model(torch.unsqueeze(train_set[0]['image'], 0).to(device))

tensor([[ 0.8923,  0.2212,  0.0799, -0.0810,  0.2277, -0.6431, -0.0592, -0.4235,
         -0.2504, -0.3902,  0.0564,  0.3363,  0.5360, -0.1227, -0.5349, -0.7290,
          0.1480, -0.4880, -0.1359,  0.5826,  0.8987, -0.8298, -0.2213, -0.3091,
         -0.3172,  0.3534, -0.2759, -0.0949, -0.1481,  0.8064, -0.7120, -0.1012,
         -1.0263,  0.5950,  0.0548,  0.6043,  0.0860, -0.6375,  0.9210, -0.4749,
         -0.5395,  0.5700,  0.8247, -0.5015,  1.0631]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [9]:
m.train(model, 0.0001, 0, 100, train_loader, val_loader, device, os.path.join('..', 'models', 'd2lvit_noprims'), digits_model, False, False)

Epoch 10 done, train loss: 0.0017 val acc: 0.9930
Epoch 20 done, train loss: 0.0010 val acc: 0.9960
Epoch 30 done, train loss: 0.0010 val acc: 0.9965
Epoch 40 done, train loss: 0.0031 val acc: 0.9920
Epoch 50 done, train loss: 0.0001 val acc: 0.9985
Epoch 60 done, train loss: 0.0022 val acc: 0.9920
Epoch 70 done, train loss: 0.0000 val acc: 0.9990
Epoch 80 done, train loss: 0.0001 val acc: 0.9995
Epoch 90 done, train loss: 0.0018 val acc: 0.9965
Epoch 100 done, train loss: 0.0001 val acc: 0.9980


In [10]:
model = torch.load(os.path.join('..', 'models', 'd2lvit_noprims', 'tree-model.pt'))

In [11]:
config = {'labels_key': 'tree_label'}
train_acc = m.predict(model, train_loader, device, config, digits_model, True, False)
print(train_acc)

1.0


In [12]:
val_acc = m.predict(model, val_loader, device, config, digits_model, True, False)
print(val_acc)

tree_1852: tensor([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       device='cuda:0')
0.9995
