In [3]:
import os
import sys
import torch
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.optim as optim
import math
from tensorboardX import SummaryWriter

import import_ipynb
from CustomDataset import ControlsDataset
from Model import ConvNet

REPORT_EVERY_ITER = 20

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = "cpu"
print('using device', device)

importing Jupyter notebook from Model.ipynb
   Number  Angle
0       0      9
1       1      9
2       2      9
3       3      9
4       4      9
5       5      9
6       6      9
7       7      9
8       8      9
9       9      9
units after conv 512
conv parameters:  168224
fc parameters:  166804
input torch.Size([256, 3, 480, 640])
tensor([[ 1.6291, -0.4316,  0.9552,  ...,  0.2999,  1.8533,  0.4294],
        [ 1.8139, -0.9876,  1.0120,  ...,  0.1016,  1.9854,  0.2627],
        [ 1.8472, -0.6828,  1.0593,  ...,  0.1325,  1.7137,  0.4267],
        ...,
        [ 1.6707, -0.6002,  1.2496,  ...,  0.1934,  1.6989,  0.3790],
        [ 1.8006, -0.9413,  1.1329,  ...,  0.0251,  1.9035,  0.3072],
        [ 1.6474, -0.6626,  1.4250,  ...,  0.2373,  2.5900,  0.1302]],
       grad_fn=<AddmmBackward>)
output torch.Size([256, 20])
using device cpu


In [4]:
dataset = ControlsDataset()
dataset.convertTOClass()
print("Stack size", dataset.stack_size)
# dataloader = DataLoader(dataset, batch_size = 256, shuffle = True, num_workers = 0)
net = ConvNet().to(device)

print("Data size", dataset.data_frame.size)
print("number of parameters: ", sum(p.numel() for p in net.parameters()))

Stack size 1
units after conv 512
conv parameters:  168224
fc parameters:  166804
Data size 22038
number of parameters:  335028


In [5]:
torch.cuda.empty_cache()

In [6]:
print(dataset.data_frame.head(10))

   Number  Angle
0       0      9
1       1      9
2       2      9
3       3      9
4       4      9
5       5      9
6       6      9
7       7      9
8       8      9
9       9      9


In [7]:
validation_split = .2
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

# spliting the dataset
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)


# Training data loader # NOTE had to remove shuffle
dataloader = DataLoader(dataset, batch_size = 256, num_workers = 0, sampler=train_sampler)

# Validation data loader # NOTE had to remove shuffle
validLoader = DataLoader(dataset, batch_size = 256, num_workers = 0, sampler=valid_sampler)

print("Total training stacks", len(dataloader))
print("Total validation stacks",len(validLoader))

Total training stacks 35
Total validation stacks 9


In [8]:
epochs = 25
optimizer = optim.Adam(net.parameters(), lr=0.0001)
criterion = nn.CrossEntropyLoss() # Changed from Mean-Squared to Cross-Entropy
writer = SummaryWriter()
iter_no = 0

torch.cuda.empty_cache()
for epoch in range(epochs):
    for i_batch, sampled_batch in enumerate(dataloader):
        iter_no += 1
        images = sampled_batch['image'].to(device).float()
        controls = sampled_batch['control'].to(device).long()
        print("imagesSize: ", images.size(), "controlsSize:", controls.size())
        optimizer.zero_grad()
        prediction = net(images)
        
        controls = torch.flatten(controls)
        print(prediction.size(), controls.size())
        loss = criterion(prediction, controls)
        loss.backward()
        optimizer.step()

        if iter_no % REPORT_EVERY_ITER == 0:
            writer.add_scalar("Loss", loss.item(), iter_no)
            torch.save(net.state_dict(), "snapshots/saved_model_{}".format(loss.item()))
        
        out = "{0},{1}\tLoss:{2}\tAllocated:{3}GB\tCached:{4}GB\n"
        print(out.format(str(epoch),
                        str(iter_no),
                        round(loss.item(),5),
                         'na', 'na'
#                         round(torch.cuda.memory_allocated(0)/1024**3,3),
#                         round(torch.cuda.memory_allocated(0)/1024**3,3)
                        ))
        torch.cuda.empty_cache()

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.3201, -1.0427,  0.6101,  ...,  1.7291,  0.8079,  1.9942],
        [-0.9376, -0.9584,  0.0901,  ...,  1.9816,  0.9418,  1.4599],
        [-1.0834, -0.5917,  0.6566,  ...,  1.7480,  1.1641,  1.2342],
        ...,
        [-1.2901, -0.7337,  0.0058,  ...,  2.0439,  0.4704,  1.2447],
        [-1.8945, -0.7674,  1.0277,  ...,  2.1893,  0.9297,  2.0602],
        [-1.1960,  0.1260,  0.9092,  ...,  2.4461,  0.9637,  2.0132]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
0,1	Loss:4.18003	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.0009, -0.7573,  0.0441,  ...,  1.1721,  0.8049,  1.3038],
        [-0.5639, -0.3504,  0.1465,  ...,  0.8900,  0.5261,  1.0396],
        [-0.9787, -0.3626,  0.1129,  ...,  1.0658,  0.6778,  1.1843],
        ...,
        [-1.2825, -0.4866,  0.1521,  ...,  1.9180,  0.4597,  1.4658]

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.9424, -0.8848,  0.1731,  ..., -0.2963, -0.2585, -0.1502],
        [-1.9990, -1.2782, -0.2111,  ..., -0.1948, -0.2326, -0.3675],
        [-2.3294, -0.8387,  0.0950,  ..., -0.2834, -0.1265, -0.1540],
        ...,
        [-1.8545, -0.9501,  0.0690,  ..., -0.0837, -0.3067, -0.0402],
        [-2.1890, -1.0995,  0.1068,  ...,  0.0164, -0.3966, -0.1311],
        [-1.6993, -0.7759,  0.0802,  ..., -0.0558, -0.1134, -0.2067]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
0,14	Loss:1.71105	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.8760, -0.8887, -0.0099,  ..., -0.0468, -0.2593, -0.1414],
        [-2.2995, -1.0018, -0.2224,  ..., -0.0068, -0.4063, -0.1236],
        [-2.0286, -0.8534,  0.1415,  ..., -0.1741, -0.1140, -0.1460],
        ...,
        [-1.7828, -0.7373, -0.2714,  ...,  0.2036, -0.4427,  0.1168

0,26	Loss:1.50029	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.9591, -0.6253, -0.2598,  ...,  0.0875, -0.4179,  0.0678],
        [-2.0714, -0.9434, -0.2955,  ...,  0.0877, -0.4835, -0.0627],
        [-2.2198, -0.6796, -0.6398,  ...,  0.2736, -0.2071,  0.0881],
        ...,
        [-1.5047, -0.4800, -0.2253,  ...,  0.0954, -0.2344,  0.1083],
        [-1.5642, -0.4675, -0.0445,  ...,  0.2860, -0.2869,  0.0707],
        [-2.3179, -0.7040, -0.5150,  ...,  0.2763, -0.2967,  0.0470]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
0,27	Loss:1.68	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.8439e+00, -6.4005e-01, -2.2125e-01,  ...,  1.5164e-01,
         -2.5174e-01,  3.7201e-02],
        [-1.6203e+00, -4.9709e-01, -1.6756e-01,  ...,  6.4422e-02,
         -2.2167e-01,  8.6003e-02],
        [-1.8324e+00, -6.2186e-01, -1.446

1,39	Loss:1.54487	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.6450, -0.7447,  0.0374,  ..., -0.5075, -0.4446,  0.0113],
        [-1.3570, -0.6363, -0.2921,  ..., -0.6095, -0.4753, -0.0735],
        [-1.9621, -0.9898, -0.0571,  ..., -0.3438, -0.7702, -0.1922],
        ...,
        [-1.6673, -0.6794, -0.3142,  ..., -0.4941, -0.8041,  0.0676],
        [-2.1399, -0.9171,  0.0235,  ..., -0.7098, -0.6324,  0.0966],
        [-1.5166, -0.5654,  0.2479,  ..., -0.5106, -0.5417,  0.1218]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
1,40	Loss:1.38809	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.1562, -0.8715, -0.7179,  ..., -0.5748, -0.7535, -0.4001],
        [-2.2241, -1.3391, -0.4482,  ..., -0.7447, -1.1837, -0.2162],
        [-2.2126, -0.6629, -0.3417,  ..., -0.5634, -0.8505, -0.2540],
        ...,
        [-1.5441, -0.

1,52	Loss:1.39393	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.0273, -0.8977, -0.4739,  ..., -0.4925, -1.0909, -0.0822],
        [-1.8751, -0.7146, -0.6106,  ..., -0.0616, -0.9621, -0.1525],
        [-2.0531, -0.9260, -0.4423,  ..., -0.4465, -1.1348, -0.0759],
        ...,
        [-2.4308, -1.8306, -0.7037,  ..., -0.7588, -1.9278,  0.0424],
        [-2.5542, -1.1892, -1.4750,  ..., -0.4700, -1.5224,  0.0469],
        [-2.0945, -1.3028, -0.8292,  ..., -0.6245, -1.1596,  0.0392]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
1,53	Loss:1.50191	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.7940, -1.3135, -0.2290,  ..., -0.4322, -1.4829, -0.0032],
        [-2.2180, -1.2882, -0.9367,  ..., -0.3761, -1.3766, -0.1874],
        [-2.4119, -1.0236, -0.9194,  ..., -0.1854, -1.6568,  0.0686],
        ...,
        [-1.8545, -1.

1,65	Loss:1.57605	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.6697, -1.0304, -0.1071,  ..., -1.0170, -1.5999, -0.1724],
        [-1.9880, -1.3619, -0.7204,  ..., -0.7516, -1.8133, -0.3537],
        [-1.8213, -1.3381, -0.4868,  ..., -0.4087, -1.3975, -0.3347],
        ...,
        [-2.4105, -1.8658, -1.1231,  ..., -0.8592, -2.0495, -0.5938],
        [-2.4329, -1.5092, -0.3081,  ..., -0.9537, -1.8660, -0.1350],
        [-2.0440, -1.1593, -0.3592,  ..., -0.5948, -1.5771,  0.0373]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
1,66	Loss:1.65027	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.5392, -1.2289, -1.0262,  ..., -0.3143, -1.6540, -0.3587],
        [-1.6246, -0.6208, -0.4848,  ..., -0.2021, -1.0796, -0.1624],
        [-2.3995, -1.4384, -0.5636,  ..., -1.1064, -1.7598,  0.0237],
        ...,
        [-2.4809, -1.

2,78	Loss:1.3379	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.2892, -0.1907, -0.6469,  ...,  0.0659, -0.6725, -0.1574],
        [-1.1237, -0.5902, -0.0156,  ..., -0.8483, -1.2721, -0.0243],
        [-1.6598, -0.4831, -0.1380,  ..., -0.5119, -1.1175,  0.1769],
        ...,
        [-2.6850, -1.3287, -2.3323,  ..., -1.1297, -1.5282, -0.0254],
        [-2.0452, -0.9574, -0.8678,  ..., -0.8196, -1.2258,  0.1131],
        [-1.9374, -0.7624, -0.6111,  ..., -0.6332, -1.5453, -0.1035]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
2,79	Loss:1.36457	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.5919, -0.2971, -1.0037,  ...,  0.1025, -0.7993, -0.0267],
        [-2.1417, -1.0409, -0.2561,  ..., -0.8569, -1.7520, -0.0836],
        [-2.1162, -1.0191, -0.2717,  ..., -1.0407, -1.3407,  0.1078],
        ...,
        [-2.6171, -1.8

2,91	Loss:1.42284	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.7458, -1.2603,  0.1421,  ..., -1.1618, -1.9885, -0.0079],
        [-2.2173, -1.2086, -0.7848,  ..., -0.9401, -1.7411, -0.1180],
        [-1.6804, -0.7138, -0.0211,  ..., -1.0803, -1.3878,  0.1354],
        ...,
        [-2.1458, -1.2588, -0.8770,  ..., -0.7724, -1.6714, -0.0630],
        [-1.7151, -0.8203,  0.2361,  ..., -0.9468, -1.7337, -0.0853],
        [-1.8408, -1.1437, -0.0109,  ..., -0.8081, -2.0071, -0.2366]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
2,92	Loss:1.42022	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.2147, -1.2359, -0.2550,  ..., -0.7393, -1.5440,  0.1024],
        [-1.6161, -0.7430, -0.0892,  ..., -0.9213, -1.2674,  0.0736],
        [-1.6567, -0.4049, -1.4905,  ...,  1.1007, -1.4431, -0.5929],
        ...,
        [-1.9801, -1.

2,104	Loss:1.40414	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([111, 3, 480, 640]) controlsSize: torch.Size([111, 1])
tensor([[-1.5430e+00, -8.6263e-01, -3.4428e-02,  ..., -1.4747e+00,
         -1.5044e+00, -5.3591e-01],
        [-1.7651e+00, -1.2633e+00, -5.1753e-01,  ..., -1.2204e+00,
         -1.3765e+00, -1.9535e-03],
        [-2.2820e+00, -1.4527e+00, -8.3263e-02,  ..., -1.1724e+00,
         -1.8084e+00, -2.1335e-03],
        ...,
        [-1.7125e+00, -1.6169e+00, -4.7062e-01,  ..., -9.2347e-01,
         -1.7181e+00, -5.3439e-01],
        [-2.4425e+00, -1.5410e+00, -1.1886e-01,  ..., -9.0654e-01,
         -1.6023e+00,  1.2422e-01],
        [-1.9043e+00, -8.4872e-01, -2.3421e-01,  ..., -8.6180e-01,
         -1.4095e+00,  4.7709e-01]], grad_fn=<AddmmBackward>)
torch.Size([111, 20]) torch.Size([111])
2,105	Loss:1.21812	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.6944, -1.2870, -0.0392,  ..., -1.1775, 

3,117	Loss:1.26345	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.7850, -0.5250, -0.7559,  ..., -0.5394, -1.6705, -0.2571],
        [-2.4179, -0.6708, -1.2172,  ...,  0.2386, -1.6270, -0.0627],
        [-1.8374, -1.0893, -0.0807,  ..., -1.1624, -1.6807, -0.3418],
        ...,
        [-2.6678, -2.1533, -1.5734,  ..., -0.6597, -2.3114, -0.2817],
        [-1.5918, -0.5383,  0.1266,  ..., -1.1043, -1.3824,  0.2299],
        [-1.3601, -0.2316,  0.6816,  ..., -0.8833, -0.7330,  0.3515]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
3,118	Loss:1.40213	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.6936, -1.5094,  0.4024,  ..., -1.3178, -2.0313, -0.1261],
        [-2.0407, -0.8302,  0.2436,  ..., -1.6140, -1.8290, -0.2658],
        [-1.1186, -0.6850,  0.0040,  ..., -0.9544, -1.3341, -0.2631],
        ...,
        [-1.3423, -

3,130	Loss:1.18683	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.5870, -0.4023, -1.0637,  ..., -0.7065, -1.3232,  0.4152],
        [-2.0871, -0.6553, -0.3888,  ...,  0.1502, -1.1817,  0.1166],
        [-2.1252, -0.6313,  0.2611,  ..., -0.9829, -2.1030, -0.1978],
        ...,
        [-1.7679, -0.8723, -0.0291,  ..., -0.2435, -1.3108, -0.1677],
        [-1.7332, -0.7943, -0.7023,  ..., -1.1834, -1.9659, -0.2252],
        [-2.1798, -0.8211, -0.2815,  ..., -0.2080, -1.4647, -0.2929]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
3,131	Loss:1.34943	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.5852, -1.8144, -1.2668,  ..., -1.7171, -3.3867, -0.8038],
        [-2.2664, -0.1998, -0.2775,  ..., -1.1734, -1.8668, -0.5543],
        [-1.0163, -0.8902, -0.1770,  ..., -1.0309, -1.7354, -0.3415],
        ...,
        [-3.0316,  

4,143	Loss:1.21917	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.9343, -1.0300, -1.1196,  ..., -1.3174, -1.6215, -0.8095],
        [-2.2369, -0.7504, -0.8156,  ..., -1.3337, -2.2592, -0.4256],
        [-2.1150, -1.1375, -1.2970,  ..., -1.0338, -1.9307,  0.4184],
        ...,
        [-2.8220, -0.7480, -0.9493,  ..., -1.5554, -2.3254, -0.9912],
        [-2.2158, -1.0279, -1.0882,  ..., -1.8831, -2.6826, -0.3629],
        [-2.3347, -0.6854, -2.1755,  ...,  0.0377, -1.8147, -0.3798]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
4,144	Loss:1.04431	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.9869, -0.7940,  0.0144,  ..., -1.4696, -1.2612,  0.2278],
        [-2.1672, -1.2891, -1.2812,  ..., -1.2295, -1.8449, -0.3271],
        [-3.1047, -0.7426, -3.2585,  ...,  0.7588, -2.8620, -0.8207],
        ...,
        [-1.0392, -

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-3.2797, -0.4007, -1.1194,  ..., -0.7526, -1.4983, -0.0511],
        [-2.5120, -0.7498, -0.7555,  ...,  0.1292, -2.3190, -0.2850],
        [-1.3400, -0.5952,  0.4982,  ..., -0.7117, -1.9335, -0.3222],
        ...,
        [-1.2601, -0.3823, -0.1742,  ...,  0.2412, -0.9704,  0.5650],
        [-1.8531, -0.3773, -1.3715,  ..., -0.8087, -1.4859,  0.9324],
        [-2.5042, -1.2783, -0.2913,  ..., -0.6050, -1.9558, -0.2838]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
4,156	Loss:1.29171	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.2558, -1.1652, -0.5017,  ..., -1.0032, -2.1786, -0.2542],
        [-1.8491, -0.3467,  0.3306,  ..., -0.2598, -2.3183, -1.4915],
        [-0.6786, -0.4197, -0.6494,  ..., -0.5577, -1.9344, -1.7170],
        ...,
        [-1.6732, -0.0636,  0.8542,  ..., -0.1695, -1.5673, -1.528

4,168	Loss:1.17365	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.8731, -0.6064, -0.1779,  ..., -0.6151, -1.7289, -1.3457],
        [-0.2335, -0.4191, -0.2450,  ..., -1.1295, -1.9033, -0.4845],
        [-0.7607, -0.9294,  0.2552,  ..., -1.0802, -1.8109, -0.2016],
        ...,
        [-0.1991,  0.3390, -0.6259,  ..., -0.9580, -1.4867, -1.4540],
        [-0.4907, -0.2118,  0.5046,  ..., -1.6550, -1.5350, -0.3047],
        [-1.2670, -0.2060, -2.3920,  ...,  0.7856, -0.0254, -0.0208]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
4,169	Loss:1.01607	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.6686, -1.2954, -0.7643,  ..., -1.1527, -3.0093,  0.1382],
        [-1.2275, -0.9833, -0.4377,  ..., -1.4022, -1.9121,  0.2701],
        [-1.2023, -0.7483, -0.4457,  ..., -1.3566, -1.6141,  0.3991],
        ...,
        [-0.4369, -

5,181	Loss:1.09385	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.5140, -1.9027, -0.9431,  ..., -0.3972, -1.5442,  0.4685],
        [-2.0062, -1.6880,  1.2992,  ..., -2.2814, -3.4776, -0.7252],
        [-1.1226, -0.1676, -0.6711,  ..., -1.0272, -2.9566, -0.6087],
        ...,
        [-1.8457, -1.5215, -0.3460,  ...,  0.0346, -1.7459, -0.8603],
        [-2.1857, -2.9043, -0.6366,  ..., -2.2416, -4.4318, -2.2855],
        [-2.0399, -1.2074, -1.7532,  ...,  0.1673, -1.9168, -1.5108]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
5,182	Loss:1.02371	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.7225, -1.6449,  0.7884,  ..., -2.9074, -2.0891, -2.4974],
        [-1.5522,  0.1814,  0.9398,  ..., -2.0237, -1.4266,  0.2313],
        [-1.3670, -0.9127,  1.6841,  ..., -2.6955, -2.5480, -0.8960],
        ...,
        [-1.6563, -

5,194	Loss:0.96993	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.2801, -0.2166,  0.5885,  ..., -1.4693, -1.2658, -1.5726],
        [-1.1012,  0.3274,  0.6095,  ..., -1.5068, -2.5546, -1.5551],
        [-1.2030,  0.9294,  0.3577,  ..., -2.4287, -0.5632,  0.3139],
        ...,
        [-0.6484,  0.1496,  2.0628,  ..., -2.0289, -2.2158, -3.1526],
        [-2.1527, -1.3668,  0.8979,  ..., -2.3895, -2.1150, -0.3081],
        [-1.7437, -1.1566, -1.5174,  ..., -0.1653, -2.0596, -1.2392]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
5,195	Loss:0.93243	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.1696,  0.2445, -0.0518,  ..., -1.6926, -2.6075, -2.8223],
        [ 0.1247, -0.1312, -0.0229,  ..., -0.8687, -1.8733, -2.6520],
        [-1.2392,  0.6246, -0.2464,  ..., -1.0181, -2.0606, -1.1307],
        ...,
        [-1.6890,  

5,207	Loss:1.04602	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.5984, -2.2434, -2.1330,  ...,  0.3436, -1.0276,  1.8177],
        [-0.4836,  0.4145,  0.9814,  ..., -2.4587, -1.1085, -1.4693],
        [-0.4450, -0.8747, -0.4739,  ..., -1.0568, -0.2286,  1.0336],
        ...,
        [-0.9109, -1.4752, -1.2483,  ...,  0.0878, -1.0775,  0.1743],
        [-0.2397, -1.0880, -4.0005,  ..., -0.9996, -1.3679,  0.2115],
        [-0.4537, -1.7838, -3.4656,  ...,  1.5846,  0.1833,  1.4682]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
5,208	Loss:1.05024	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.7082, -2.4249, -3.5550,  ..., -0.7123, -1.8469, -2.5277],
        [-2.1044, -0.2190, -2.1839,  ...,  0.4324,  0.3758,  1.6642],
        [-2.1812, -0.7360, -1.6476,  ..., -0.6918, -1.8813,  1.9788],
        ...,
        [-0.8072, -

6,220	Loss:0.90545	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.1782, -0.9515,  0.1697,  ..., -0.0295, -1.9399, -1.0704],
        [-1.5663, -0.6725,  0.9566,  ..., -1.1820, -2.0753, -0.9633],
        [-1.3285,  0.0336, -0.9836,  ..., -1.2472, -1.0910, -2.3253],
        ...,
        [-1.1544, -0.5675, -1.4511,  ..., -0.4948, -2.3172, -1.8065],
        [-2.3430, -0.6436, -0.6129,  ..., -0.5850, -1.4106,  1.5781],
        [-0.0202, -0.7612, -0.3677,  ..., -0.2661, -1.9957, -0.3360]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
6,221	Loss:0.94748	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.2742, -0.1220, -0.7394,  ..., -0.2433, -2.0895, -1.7897],
        [-3.1918, -1.0701, -2.7602,  ..., -0.5762, -1.5982,  0.7910],
        [-2.6378, -1.9539, -0.5070,  ..., -0.6701, -2.5237, -3.0714],
        ...,
        [-2.4619, -

6,233	Loss:0.8899	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.0674e+00, -8.9454e-01, -8.3323e-01,  ..., -5.6776e-01,
         -1.4966e+00,  6.8988e-04],
        [ 5.1802e-01,  2.3284e-01,  2.3707e+00,  ..., -6.6607e-01,
         -6.4632e-01, -2.6388e-01],
        [-6.4789e-01, -1.9566e+00, -1.0768e+00,  ..., -1.0727e+00,
         -1.5301e+00, -3.3461e-02],
        ...,
        [-2.4995e-01,  1.8746e-01,  2.6161e+00,  ..., -1.6589e+00,
         -6.9127e-01, -1.9555e-01],
        [-1.9408e+00, -6.0747e-01, -5.1623e-01,  ..., -1.1002e+00,
         -2.0227e+00, -1.6452e-01],
        [ 3.5672e-01,  6.8643e-01,  2.8923e-01,  ...,  3.4927e-01,
         -1.8521e+00,  3.3405e-01]], grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
6,234	Loss:0.84675	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.2819,  0.1529,  0.0042,  ..., -0.6258,  

7,246	Loss:0.73662	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.2482,  0.2449,  1.7827,  ..., -1.5609, -1.2169,  0.7142],
        [-3.3206, -1.9125, -0.8442,  ..., -0.3660, -0.9448,  0.0367],
        [ 0.0257,  0.0942,  1.4478,  ..., -2.3966, -0.9835, -0.1422],
        ...,
        [-1.8780, -0.6485, -0.8721,  ..., -0.7621, -1.5182,  2.0541],
        [-1.3817, -0.9953, -2.4842,  ..., -0.6177, -1.9676, -1.5601],
        [-1.9756, -1.4067, -1.8361,  ..., -0.7074, -1.6128,  0.6861]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
7,247	Loss:0.88298	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.8348, -0.5035,  0.4538,  ..., -0.4320, -1.7422,  0.3024],
        [-0.3523,  0.4532,  0.3194,  ..., -0.0966, -0.9401,  0.2143],
        [ 0.8950,  0.8696,  0.1954,  ..., -1.9512, -1.2328, -0.8489],
        ...,
        [-0.7162, -

7,259	Loss:0.71492	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.5881,  0.9860,  2.4894,  ..., -1.6337, -2.5104, -4.4275],
        [ 0.0104,  0.7185,  0.6126,  ..., -1.7834, -1.6572, -1.0430],
        [ 0.8040,  0.8947,  0.7269,  ..., -2.2041, -1.0721, -1.5428],
        ...,
        [-2.6300, -1.4707, -2.0822,  ...,  0.2526, -2.4091,  0.4085],
        [-0.3118, -1.3477,  1.1303,  ..., -0.9315, -2.8757, -2.5759],
        [-2.1382, -0.6110, -1.3116,  ..., -0.7632, -1.9232,  2.3763]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
7,260	Loss:0.76652	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-3.1107, -1.3694, -2.0685,  ...,  1.6062, -1.1633, -0.7905],
        [-2.4932, -0.5678, -1.9021,  ..., -1.1936, -2.2232,  2.8854],
        [-1.4337,  0.7907,  0.1820,  ..., -1.9359, -0.2466, -1.2681],
        ...,
        [-2.7742, -

7,272	Loss:0.70526	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.7400, -1.0718, -0.3830,  ..., -1.0419, -2.0567,  0.5011],
        [-1.8737,  0.4219, -1.0410,  ...,  0.9144, -0.8011,  1.6528],
        [-0.8150, -2.2607, -1.4178,  ..., -0.3993, -1.2291, -0.5065],
        ...,
        [-2.3651, -0.1765,  0.4123,  ..., -1.0391, -2.0728, -0.7778],
        [-0.2066, -0.4486,  0.6642,  ..., -0.1803, -1.3272, -1.0057],
        [ 0.1129, -1.5071, -2.5936,  ...,  0.8424, -1.9975, -0.4574]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
7,273	Loss:0.76894	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.5130e+00, -6.0004e-01, -4.0701e+00,  ...,  1.3691e+00,
         -1.9682e+00, -6.8611e-01],
        [ 1.0623e+00, -1.3987e+00, -1.3682e+00,  ...,  8.3515e-01,
         -9.4477e-01, -2.3291e+00],
        [-2.3470e+00, -5.0236e-01, -

8,285	Loss:0.68864	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.1319,  0.6181,  2.1076,  ..., -2.1701, -1.9613, -1.8881],
        [-2.3483, -3.0559, -2.2490,  ..., -2.0180,  0.2730,  0.1699],
        [-0.1187,  0.6733, -2.1040,  ...,  1.2971, -0.1354, -0.9522],
        ...,
        [-0.7145, -1.8623, -0.1901,  ..., -1.4154, -2.3169, -3.0855],
        [-0.3983, -0.0955,  3.4552,  ..., -3.7682, -0.6399,  0.6322],
        [-0.3696,  0.2674, -2.1768,  ...,  1.7736, -0.9601, -3.8596]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
8,286	Loss:0.73904	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.4022,  0.1646,  1.0850,  ..., -0.5668, -1.3699, -1.9152],
        [-0.7043,  0.7302,  2.4303,  ...,  0.4579, -1.2518, -1.3810],
        [-0.5672, -0.6934, -2.5922,  ..., -0.5088, -1.2286, -1.6147],
        ...,
        [-0.5465,  

8,298	Loss:0.64112	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.9739, -0.4623, -0.9353,  ..., -1.6283, -1.5913, -1.8047],
        [-0.0348, -3.6335, -0.2957,  ..., -3.8692, -2.6881, -5.0163],
        [ 0.7926, -0.6192,  1.5393,  ..., -2.7652, -3.1702, -1.8367],
        ...,
        [-1.5665, -0.6124,  0.9120,  ..., -0.0505, -1.7557, -0.9291],
        [ 0.3692, -0.5689,  0.8397,  ..., -1.2440, -3.5107, -1.8871],
        [-1.2474,  0.9227,  1.5127,  ..., -2.3705, -2.9422, -1.9895]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
8,299	Loss:0.85635	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.7874,  0.5823,  1.1098,  ..., -0.5297, -3.4328, -2.5671],
        [-1.5883,  0.3508,  1.6348,  ..., -1.2742, -0.4062, -1.0199],
        [-0.9752, -1.6242,  1.1179,  ..., -0.9692, -2.0704, -0.8217],
        ...,
        [ 0.0661,  

8,311	Loss:0.65299	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.1283,  0.4015,  2.4560,  ..., -1.5216, -0.4076, -0.5308],
        [ 0.5359,  0.9127,  2.1160,  ..., -2.3058, -1.3142, -0.9189],
        [ 0.3782, -3.1331, -3.2104,  ..., -0.4983, -3.3788, -4.1847],
        ...,
        [ 0.1096, -1.9837, -1.8232,  ...,  0.4852, -2.8111,  0.4876],
        [-0.4074,  0.3369,  2.2540,  ..., -2.6923, -1.4443, -2.7039],
        [-0.7462, -1.8157, -1.7878,  ..., -0.3625, -1.5095, -0.8282]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
8,312	Loss:0.58247	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.1682, -1.6603, -0.0760,  ...,  0.0289, -1.4959, -0.1940],
        [ 0.2072, -0.9237,  0.1029,  ..., -2.2669,  0.1771, -4.7530],
        [-1.9695,  0.6061,  5.5293,  ..., -1.7833, -0.7360, -0.1150],
        ...,
        [ 0.8389,  

9,324	Loss:0.68172	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.1170,  0.7807,  0.9126,  ..., -1.7639, -1.2416, -2.4862],
        [-2.1965,  2.7909,  1.2364,  ..., -0.5334, -1.4464, -1.2654],
        [-1.1247, -2.4368,  1.8274,  ..., -1.8666, -1.4868, -1.8788],
        ...,
        [-1.6949, -1.0667, -1.3369,  ..., -0.6653, -1.8538,  1.9118],
        [-0.1355,  0.8160,  2.0487,  ...,  0.1620, -1.5664, -1.0450],
        [-0.9562,  1.1771,  3.3282,  ..., -1.4614, -2.3809, -4.9005]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
9,325	Loss:0.60872	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.9576,  0.3601,  2.4167,  ..., -2.6445, -0.2806, -2.4562],
        [-0.1096, -0.5198, -0.7545,  ..., -4.0715, -1.3943, -3.8000],
        [-0.8284, -3.5117, -1.0472,  ..., -2.3761, -3.3202, -2.1843],
        ...,
        [-1.1676, -

9,337	Loss:0.50537	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.2465, -2.5570, -2.5648,  ..., -0.0801, -4.1951, -2.3915],
        [ 0.0305, -0.4789, -1.9670,  ...,  0.0933, -3.5289, -4.3750],
        [-0.4059, -2.0004, -3.4639,  ...,  1.1300, -1.8848, -3.1866],
        ...,
        [-1.6908, -1.7164, -2.0090,  ...,  0.5173, -1.9915, -0.4965],
        [-0.3689,  0.0641, -0.5444,  ..., -1.3766, -2.3573, -1.6986],
        [-1.0900, -1.3181, -0.0686,  ..., -1.5417,  0.3144,  1.8807]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
9,338	Loss:0.6138	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.9603,  0.0433,  1.6905,  ..., -1.9061, -1.1577, -1.2824],
        [ 0.2414, -0.1053, -1.7560,  ..., -2.2829, -1.2415, -1.9665],
        [-3.9075, -0.9361, -6.3942,  ..., -0.6777,  0.2218,  3.4954],
        ...,
        [ 0.2448, -0

9,350	Loss:0.59999	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 1.0263,  0.1352, -0.7051,  ..., -1.5908, -3.2734, -4.0422],
        [-1.3581,  0.0345, -0.8553,  ..., -1.3903, -2.0432, -4.9676],
        [-0.7039,  0.5733, -0.9742,  ..., -0.6990, -1.3004,  0.7167],
        ...,
        [-0.8999,  1.1741,  3.6173,  ...,  0.5658, -1.6354, -1.2116],
        [-0.2995, -1.3949, -2.1214,  ...,  0.2401, -3.3139, -2.9900],
        [-0.6049,  1.5093,  2.5557,  ..., -0.9484, -2.3011, -3.8801]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
10,351	Loss:0.63159	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.5594, -1.1453,  0.4067,  ...,  0.2691, -3.0213, -2.6325],
        [-2.1268,  0.4281, -4.5863,  ...,  1.9785, -2.3429, -0.0750],
        [-1.9533,  0.0659, -0.0462,  ..., -0.9979, -1.7997, -0.2402],
        ...,
        [-0.5778, 

10,363	Loss:0.6471	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.0663,  0.5652, -1.8147,  ...,  0.1829, -1.8386,  0.7619],
        [ 0.3917,  0.3438,  2.4069,  ..., -1.7493, -1.7979, -1.3062],
        [ 0.2980, -0.0922, -1.8378,  ...,  0.6284, -1.1336,  1.1197],
        ...,
        [-1.9712, -1.2691, -1.3760,  ..., -0.2398, -3.2849, -1.4965],
        [ 0.1614,  2.4146, -0.5027,  ..., -0.2056, -0.0679, -1.3420],
        [ 1.1275, -0.4281,  0.3142,  ..., -0.3925, -2.7871, -3.9304]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
10,364	Loss:0.50391	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.8953, -0.1509, -2.1343,  ..., -1.5136, -1.8521,  0.7609],
        [-0.7186, -1.2192, -0.6323,  ..., -2.8676, -2.8492, -6.4917],
        [-1.8864, -1.1882,  1.0568,  ..., -1.1901, -0.9146, -5.7751],
        ...,
        [-3.2919, 

10,376	Loss:0.64857	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.7552,  1.5382,  1.2316,  ..., -0.6247, -1.6353, -1.5479],
        [ 0.9801, -1.1490, -0.0261,  ..., -0.1980, -1.3939, -2.9304],
        [-1.3010,  1.4715, -3.7062,  ..., -0.5479,  0.5516,  1.9345],
        ...,
        [-0.2510, -3.2336, -5.8740,  ..., -1.3795, -0.8397, -4.0167],
        [ 2.5329, -2.9329, -2.9935,  ..., -1.1353, -2.8484, -2.1675],
        [-1.7949,  0.9918, -0.5539,  ..., -2.6719, -2.8731, -5.0708]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
10,377	Loss:0.4829	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.3995,  1.5917,  2.8731,  ..., -0.0836, -0.6271, -0.2163],
        [ 1.7812, -3.0978, -2.1329,  ..., -2.5005, -4.2559, -2.7456],
        [-0.6798, -2.2469, -3.7441,  ..., -0.0187, -0.3365, -1.0512],
        ...,
        [-0.9644, 

11,389	Loss:0.45311	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.5208,  0.4818, -3.1409,  ..., -0.0895,  1.9764,  3.8442],
        [-0.5333, -1.4871, -0.4593,  ..., -1.6641, -3.8094, -0.9138],
        [-1.3859,  0.5618,  0.2049,  ..., -1.5641, -1.7458, -5.3041],
        ...,
        [-0.8508, -0.8265, -0.4160,  ..., -0.1739, -4.5345, -3.2755],
        [-2.5115, -3.9040, -0.2808,  ...,  0.0770, -1.0031, -2.2928],
        [-5.5936, -1.6823, -2.0765,  ...,  1.5222,  1.3528,  0.2654]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
11,390	Loss:0.53512	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.0127, -0.3792,  0.4556,  ..., -2.6067, -1.6488, -3.8384],
        [-2.4844,  0.5781,  1.1438,  ..., -1.6707, -3.0157, -1.5967],
        [-2.9739, -0.2362, -5.7255,  ...,  0.8814, -4.5245,  0.5717],
        ...,
        [-2.4845,

11,402	Loss:0.39981	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-5.2349, -0.0354, -2.4483,  ...,  0.5990,  0.0540,  3.1859],
        [-4.4336, -5.6154,  0.2672,  ...,  2.5352, -0.1083, -1.8415],
        [-0.3386, -0.2974,  2.3269,  ...,  0.0522, -3.8888, -3.7411],
        ...,
        [-1.7948,  0.6210,  0.6667,  ..., -0.5358, -0.3615, -3.1146],
        [-3.6062, -1.5885, -0.1963,  ..., -2.0529, -2.4725, -1.5489],
        [-2.3237, -3.1636,  2.0044,  ..., -1.1075, -3.1250, -3.7246]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
11,403	Loss:0.4393	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.0232, -0.1823,  0.2986,  ..., -0.1791, -2.6491, -3.1673],
        [ 0.5238, -2.9344, -0.4795,  ...,  0.4695, -4.1214, -2.2865],
        [-1.4317, -1.7512,  1.1929,  ..., -0.3773, -3.8635, -2.4280],
        ...,
        [-1.2296, 

11,415	Loss:0.50486	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.1066, -1.0875, -0.7056,  ..., -1.7525, -3.8933,  0.2635],
        [ 2.3646,  2.0860,  1.5393,  ..., -3.4712, -0.8806, -3.2592],
        [-2.1449,  0.4388,  1.9870,  ..., -1.5080, -3.4961, -0.4067],
        ...,
        [-0.2273, -1.0142, -1.2964,  ..., -1.9198, -4.0205, -4.3489],
        [-0.3182,  1.0614,  1.1167,  ..., -0.0681,  0.5489, -3.3716],
        [-2.3055, -0.5980, -6.4554,  ..., -0.6314,  2.4442,  4.9548]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
11,416	Loss:0.61025	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.9858, -0.2432,  0.6283,  ..., -2.5737, -2.3987, -1.2113],
        [-3.2283, -2.2778, -0.1850,  ..., -5.0431, -1.2004,  3.8422],
        [-1.1398, -1.5616, -0.3494,  ...,  0.0293, -2.5065, -1.7356],
        ...,
        [-0.7311,

12,428	Loss:0.3759	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.6199,  0.0210,  1.4747,  ..., -0.3914,  1.2140, -3.1772],
        [-0.3415, -2.1272,  2.1812,  ..., -1.8505, -1.5956, -3.7246],
        [ 1.2629,  0.1519,  0.4329,  ..., -1.7849, -4.8435, -2.9980],
        ...,
        [-0.9932, -0.1857, -0.5881,  ...,  1.8871, -0.1980,  0.2959],
        [-0.9881, -1.0004,  0.1133,  ...,  0.4696, -2.3177, -2.5957],
        [ 0.5520, -0.1575,  0.9704,  ..., -2.7382, -2.1601, -4.7646]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
12,429	Loss:0.5093	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.8872, -0.0084, -2.3830,  ..., -1.6300, -1.6616, -0.2991],
        [-2.9694, -1.7353, -2.0547,  ..., -0.0925, -4.0311, -3.4878],
        [-0.1421,  1.1426,  0.6252,  ..., -0.1194, -1.3354, -2.8620],
        ...,
        [ 1.0819, -

12,441	Loss:0.5188	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.9738, -0.1238, -0.4457,  ..., -4.1769, -3.5699, -6.4952],
        [-2.1644, -0.9762, -0.1507,  ..., -0.0688, -3.7415, -0.8402],
        [ 1.3700,  0.8095,  3.6781,  ..., -2.9441, -0.6302, -0.7419],
        ...,
        [-3.6976, -0.4705, -2.7333,  ..., -0.5338, -1.7086,  0.0165],
        [-0.1530, -0.6469, -2.8173,  ..., -0.3014, -1.8817, -2.4326],
        [-5.0618, -4.5395, -5.2713,  ...,  2.4890, -2.5237,  5.1393]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
12,442	Loss:0.41758	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.1113,  0.7033,  1.4615,  ...,  0.8989,  0.0771,  0.4642],
        [-5.0803, -1.1745, -0.2049,  ...,  0.0201, -2.4020,  1.0762],
        [-2.2499, -0.3177, -2.9798,  ...,  0.7108,  0.6103,  1.2890],
        ...,
        [ 3.0902, 

12,454	Loss:0.46014	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([111, 3, 480, 640]) controlsSize: torch.Size([111, 1])
tensor([[-1.9576,  1.2364, -3.3618,  ..., -2.0172, -1.5134, -8.1444],
        [-4.7136, -0.7333, -2.8974,  ...,  1.8482,  0.7698,  3.4889],
        [ 0.7649, -1.3497,  4.3279,  ..., -4.2893, -1.6870,  0.5411],
        ...,
        [-0.9146, -5.3624, -4.2936,  ..., -3.0017, -4.6057, -3.4478],
        [ 1.6335,  0.1496,  1.6195,  ..., -3.3475, -2.1681, -6.1636],
        [-2.5885, -1.9678,  1.2068,  ..., -1.8278, -1.6431,  2.7346]],
       grad_fn=<AddmmBackward>)
torch.Size([111, 20]) torch.Size([111])
12,455	Loss:0.45628	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.9372,  0.2702, -1.9714,  ..., -0.9649, -2.1964, -6.1317],
        [-5.8945, -3.5665, -3.6636,  ...,  4.0870, -0.2263,  2.1852],
        [-1.0120, -2.3888, -5.7375,  ...,  1.1329, -1.0632, -1.8173],
        ...,
        [-0.6504,

13,467	Loss:0.42529	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -1.4795,  -3.6580,  -1.7788,  ...,   1.1014,  -3.0811,  -1.8711],
        [  1.5558,  -0.4799,   2.5334,  ...,  -1.8603,  -4.4370,  -2.7570],
        [ -0.6818,  -1.2030,  -0.0584,  ...,   0.3987,  -5.7624,  -6.3341],
        ...,
        [  0.5214,  -6.0837,  -1.9549,  ...,  -1.8200,  -2.8501,  -5.2732],
        [ -0.5173,  -0.4717,  -2.1922,  ...,   1.7440,   0.9598,  -2.6294],
        [  0.8665,  -2.2911,  -2.7680,  ...,  -3.9289,  -4.3137, -10.8043]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
13,468	Loss:0.55222	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 2.9581e+00,  3.2383e+00, -2.2078e+00,  ..., -8.8917e-01,
          6.2233e-01, -2.8793e+00],
        [ 1.1148e+00,  4.6414e-03,  3.9757e+00,  ..., -2.9202e+00,
         -3.2086e-01, -8.1280e-01]

13,480	Loss:0.39195	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.0312, -2.7194, -4.3654,  ..., -0.9214, -0.7520, -0.6736],
        [-2.1852, -0.5609, -2.5165,  ..., -0.0791, -2.0613,  3.3242],
        [-1.9754, -1.3449, -3.4496,  ...,  0.4274, -1.8556,  3.0235],
        ...,
        [-0.3174,  0.1369,  1.5737,  ..., -2.0931, -2.3792, -3.0405],
        [-0.5090, -1.6612, -2.5261,  ..., -0.2841, -2.9881, -1.9618],
        [-5.4907, -2.8855, -4.1383,  ...,  1.8323,  0.3193,  0.4637]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
13,481	Loss:0.44407	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.8070,  1.8940,  0.2289,  ...,  0.7904, -1.5579, -1.4971],
        [-1.3319,  0.9672,  2.5341,  ..., -3.7605, -3.4004, -4.3974],
        [-1.3503, -1.0583, -1.7230,  ..., -0.8820, -2.0963, -6.4944],
        ...,
        [ 1.5710,

14,493	Loss:0.34536	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.6990e+00, -2.2498e+00, -3.9247e+00,  ...,  8.7643e-01,
         -4.9451e+00,  1.3021e-01],
        [ 1.4000e+00, -4.6017e-01,  2.7439e+00,  ..., -1.8932e+00,
         -3.9041e+00, -2.2285e+00],
        [-3.7819e+00, -2.4819e+00, -5.3578e+00,  ...,  2.2343e+00,
          4.6406e-01,  5.4587e+00],
        ...,
        [ 5.9704e-01,  2.3802e+00, -7.3463e-01,  ..., -2.0426e+00,
         -1.3554e+00, -1.9367e+00],
        [-1.4785e+00, -2.8590e+00, -1.3990e+00,  ..., -3.0843e+00,
         -7.8792e-01, -7.5538e+00],
        [-5.9345e-04, -6.9371e-01,  2.4121e+00,  ...,  4.0981e-01,
         -1.0625e+00, -1.3198e+00]], grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
14,494	Loss:0.42974	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-4.7952, -4.9327,  0.8415,  ..., -0.2997

14,506	Loss:0.42953	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.5356,  1.9325, -4.0233,  ...,  0.4737, -1.1164, -0.7985],
        [ 0.0425,  0.0697, -0.4041,  ...,  0.8666, -1.3564, -3.5683],
        [ 1.6635, -3.2161, -3.6724,  ..., -0.3183, -3.9164, -3.1567],
        ...,
        [ 2.6054, -2.6580,  0.1336,  ..., -1.8487, -4.7377, -2.6523],
        [ 0.9760, -0.3832,  0.9615,  ..., -0.1299, -5.4919, -4.5125],
        [-0.1162,  2.0170,  0.8276,  ..., -1.6868,  0.6111, -1.3623]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
14,507	Loss:0.36769	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.7243, -2.8218, -3.4893,  ..., -2.0115, -3.4876, -6.8618],
        [-5.1477, -1.9447, -5.6256,  ...,  0.7050,  0.3032,  2.0877],
        [ 0.7388,  0.8364,  1.2516,  ..., -3.0855, -1.4062, -4.5473],
        ...,
        [-2.6899,

14,519	Loss:0.36791	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.6235, -1.8593, -0.8254,  ...,  0.0683, -2.8340, -5.2688],
        [-0.1968, -1.6409, -0.7330,  ..., -2.5004, -3.2182, -3.4377],
        [ 0.4075, -0.6122,  1.2671,  ..., -2.3879, -4.4979, -4.7197],
        ...,
        [-0.0766, -0.9475, -0.0961,  ..., -3.1032, -3.4814, -7.9357],
        [-3.1794, -3.0682, -6.0821,  ..., -1.0791, -6.3445, -1.3664],
        [-0.0513,  0.2165,  1.6035,  ..., -4.4356, -2.6920, -3.1497]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
14,520	Loss:0.35646	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.7770, -0.6128, -1.8512,  ..., -0.4992,  0.4835,  0.4997],
        [ 0.1358,  2.6938,  0.6439,  ..., -0.8141,  0.6957, -3.0198],
        [-1.2815,  5.5404,  2.5605,  ..., -3.8611, -1.1870, -0.3289],
        ...,
        [-2.0606,

15,532	Loss:0.40121	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 3.9054e-01, -1.0446e-01,  1.6388e+00,  ...,  1.3806e+00,
         -1.4553e+00, -3.3781e+00],
        [-2.8309e+00, -6.7774e-01,  3.6899e-01,  ...,  4.1640e+00,
          1.8126e+00, -2.0553e+00],
        [-2.4610e+00,  5.5431e-01,  4.2484e+00,  ..., -1.9310e+00,
         -2.3010e+00, -1.1800e+00],
        ...,
        [-9.8124e-01, -2.9206e-01,  1.3790e+00,  ..., -1.4107e-01,
         -2.3310e+00, -5.4383e+00],
        [-5.1618e+00, -3.9132e+00, -2.9185e+00,  ...,  2.2042e-03,
          7.4791e-01, -6.0685e-01],
        [-9.4570e-01, -2.5414e+00, -1.5878e+00,  ..., -1.0352e+00,
         -6.3765e+00, -2.8771e+00]], grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
15,533	Loss:0.32414	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.0425, -0.6798, -1.1309,  ...,  2.1047

15,545	Loss:0.41688	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.4451, -0.7442,  0.6835,  ..., -1.4471,  1.4517, -2.8582],
        [ 3.0667,  1.9978,  5.0860,  ..., -1.8123, -1.4663, -5.2029],
        [ 0.7297, -1.0534,  1.6750,  ..., -0.0465, -4.7639, -5.4233],
        ...,
        [-5.4279, -4.7346, -3.6248,  ..., -1.2337, -0.2510, -2.2764],
        [-1.0863, -0.2760,  0.6761,  ..., -1.2890, -3.1612, -9.9066],
        [-0.3821, -4.0130, -1.0496,  ..., -3.2731, -5.3168, -4.0599]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
15,546	Loss:0.30282	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.4780,  0.2244,  1.0742,  ..., -2.4528, -2.9774, -6.9323],
        [-0.4400,  0.1489, -4.7636,  ...,  0.9389, -1.3074, -0.8976],
        [-0.5391,  1.3872,  1.6830,  ..., -1.6851, -2.8770, -3.0316],
        ...,
        [ 0.4836,

15,558	Loss:0.30605	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.9362, -2.2369, -1.3961,  ...,  0.2368,  0.8178,  2.5121],
        [-0.0111, -1.2485,  0.6906,  ..., -4.0031, -4.8892, -3.7182],
        [ 1.1047, -0.7442,  0.5935,  ..., -5.0172, -5.4080, -5.0456],
        ...,
        [-1.7754, -2.4481, -3.4883,  ..., -1.9011, -3.4369, -1.6830],
        [-4.7346, -7.4597, -7.3343,  ...,  1.4681, -2.3403,  7.6088],
        [-0.2527, -1.0725,  3.1671,  ..., -1.5890,  0.7982, -3.6820]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
15,559	Loss:0.37737	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([111, 3, 480, 640]) controlsSize: torch.Size([111, 1])
tensor([[-1.9965, -0.6132, -3.2814,  ...,  0.2541, -3.3440,  3.6762],
        [-2.6805, -0.6191, -1.9919,  ..., -2.9560, -3.5487, -2.5207],
        [-6.3704, -4.9855, -5.9948,  ...,  2.9467, -2.6126,  6.5513],
        ...,
        [-1.3904,

16,571	Loss:0.34449	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.2498,  0.0965,  2.5925,  ..., -0.2159, -1.1206, -1.8342],
        [-2.9402, -1.8422, -3.7246,  ..., -0.3282, -1.4746,  6.0276],
        [-2.7744, -3.2279, -6.1143,  ..., -1.3225, -6.5437, -4.3482],
        ...,
        [-0.9488,  2.6224,  1.0605,  ..., -3.6225, -2.3171, -4.0469],
        [ 0.9590, -0.4065,  4.1771,  ..., -4.7939, -1.7396, -1.2580],
        [ 0.3726,  1.3886,  1.0935,  ..., -2.6976, -3.0639, -3.8852]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
16,572	Loss:0.33498	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.2145, -1.9309, -1.1902,  ..., -1.3293, -4.1402, -4.8027],
        [-2.3554, -1.1155, -0.0271,  ..., -2.1587, -4.1163, -2.3671],
        [-4.3345, -0.8803, -7.8451,  ..., -0.9241, -0.6072, -4.4186],
        ...,
        [ 0.1866,

16,583	Loss:0.4144	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.4437,  0.5299, -0.2005,  ..., -0.3960,  0.5556, -2.6117],
        [-2.1315, -1.3368, -1.2899,  ..., -3.1274,  0.8032,  1.9273],
        [ 0.0214, -0.4758,  0.9779,  ..., -0.4517, -2.9000, -5.8412],
        ...,
        [-3.6274, -3.1900,  0.9936,  ..., -1.4896, -4.5843, -0.9105],
        [ 3.6892, -0.9997, -0.5882,  ..., -1.7460, -0.8650, -4.4353],
        [-0.9349,  0.4795,  1.3133,  ..., -1.4229, -0.1287,  3.0345]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
16,584	Loss:0.37286	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -3.9045,  -4.7998,  -5.9338,  ...,  -0.0766,  -0.8157,   1.2255],
        [ -0.5806,  -3.3629,  -6.1958,  ...,  -4.1731,  -2.9721, -11.0397],
        [ -0.2897,   1.7250,   2.2908,  ...,  -2.4958,  -1.9380,  -4.0258],
        ...,


17,596	Loss:0.27588	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 2.5881,  2.3393, -2.3767,  ...,  2.8876,  4.7251, -2.9969],
        [ 0.7876,  1.6453,  1.9285,  ..., -4.0307, -3.2119, -6.2155],
        [ 1.9643,  0.8645,  2.2224,  ..., -3.0357, -3.0563, -5.4545],
        ...,
        [ 1.9687,  0.3582, -1.6943,  ..., -0.4375, -3.6831, -3.2186],
        [ 0.0439,  0.2922, -5.1269,  ..., -1.6344, -1.7641, -9.8982],
        [-6.2088, -5.1290, -7.4469,  ..., -0.6864, -0.0622,  8.7187]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
17,597	Loss:0.33454	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 2.0515,  1.2019,  0.4793,  ..., -1.3357, -2.5781, -4.1468],
        [-3.0527,  0.0395,  3.2692,  ..., -1.4264,  0.0577, -2.6287],
        [ 0.4287,  1.0919,  2.0938,  ..., -3.1711, -0.9477, -4.3862],
        ...,
        [ 1.1657,

17,609	Loss:0.29731	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.7058e+00, -2.5476e+00, -6.9382e-03,  ..., -3.2297e+00,
         -7.0486e+00, -4.9599e+00],
        [-4.9933e+00, -2.5334e+00, -4.4632e+00,  ...,  2.5174e-01,
          1.9998e-01,  6.8302e-01],
        [-1.9752e-01, -2.5358e+00, -1.3488e-01,  ...,  1.8611e-01,
         -2.4453e+00, -6.3776e+00],
        ...,
        [-1.5411e+00, -4.2498e+00, -1.0871e+00,  ..., -4.6320e+00,
         -2.5677e+00, -4.7976e+00],
        [-5.0954e-01, -4.8545e-01,  2.1676e+00,  ..., -1.5955e+00,
         -6.1302e+00, -1.6290e+00],
        [ 1.6222e-01,  1.4409e+00,  3.7770e+00,  ..., -2.8505e+00,
          1.0879e+00, -3.1729e+00]], grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
17,610	Loss:0.34358	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-3.1331, -2.9702,  0.3202,  ..., -0.1482

17,622	Loss:0.33051	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-4.4939, -6.8729, -2.5687,  ...,  1.1326, -3.5654,  2.5763],
        [-0.5183,  0.4094, -4.8045,  ...,  0.7032, -1.8269, -0.6785],
        [ 1.0355,  0.5966,  2.5106,  ..., -3.1043, -3.0967, -3.7502],
        ...,
        [-2.0427, -0.7118,  1.8244,  ..., -4.8174, -9.4054, -4.0036],
        [ 0.2060, -1.9264, -1.5758,  ...,  1.3813,  0.5398, -1.0646],
        [-2.0473, -2.7206, -1.2519,  ...,  1.0302, -2.5025,  0.0377]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
17,623	Loss:0.3094	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.7891,  0.5946,  0.6306,  ..., -3.2631, -1.2032, -0.7100],
        [-1.1939, -1.3553, -2.8373,  ..., -1.9027, -4.2067,  0.6117],
        [-2.0307, -2.4393, -1.0328,  ..., -1.6333, -5.7791, -0.4595],
        ...,
        [-0.7088, 

18,635	Loss:0.35116	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-6.2992, -9.6268, -7.5637,  ...,  3.5573, -0.8104,  5.3496],
        [ 1.6177, -1.2386, -0.7875,  ...,  1.0991, -2.4018, -5.7378],
        [ 0.8790, -0.8352, -2.8263,  ..., -0.8978, -4.8552, -9.6125],
        ...,
        [ 0.9561,  0.3139,  0.6601,  ...,  0.4547, -5.7798, -3.7277],
        [ 0.3077, -0.4049, -0.6856,  ..., -3.9559, -4.7355, -5.6422],
        [-0.3809, -1.9166,  2.0191,  ..., -3.8930,  0.3514, -0.9730]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
18,636	Loss:0.2714	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-5.3451, -4.8278, -4.5815,  ...,  4.2769, -2.7962, -3.5333],
        [ 2.8848,  5.1051,  4.0413,  ..., -2.0439, -2.1917, -2.3333],
        [-1.5472,  3.1258,  2.1268,  ..., -0.8519, -0.4245, -7.2487],
        ...,
        [-1.7414, 

18,648	Loss:0.36134	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.6497, -1.3378, -2.3631,  ..., -3.6132, -2.2047, -3.5468],
        [-3.2487, -3.0107,  2.5880,  ..., -2.7004, -9.5727, -8.3058],
        [-0.7338, -1.2294, -0.5398,  ..., -3.4525, -3.2937, -3.6074],
        ...,
        [ 1.2763,  0.0101,  2.2327,  ..., -4.1808, -3.4786, -3.8585],
        [-3.1201,  2.5120, -2.4838,  ...,  1.5966, -1.5477, -0.9461],
        [ 0.9904, -0.0572,  1.0679,  ..., -1.5857,  0.8900, -1.8187]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
18,649	Loss:0.32116	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.9555, -3.2901, -2.1236,  ..., -2.0269, -4.7123,  4.8672],
        [-2.2917, -0.5752, -1.9269,  ..., -1.4789, -2.6748,  0.1901],
        [-3.1591, -5.0694,  1.9106,  ..., -1.8068, -5.7675, -4.5726],
        ...,
        [-1.5612,

18,661	Loss:0.34532	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-4.4956e-01,  2.1461e+00,  1.7068e-01,  ..., -4.1206e+00,
         -2.6192e+00, -3.2118e+00],
        [-4.5844e+00, -1.5089e+00, -7.2601e+00,  ..., -2.6121e+00,
          3.8332e-01, -6.9925e+00],
        [-1.7189e+00, -1.2523e+00,  3.7261e+00,  ..., -3.0317e+00,
         -3.1675e+00, -1.0104e+00],
        ...,
        [-5.9870e+00, -5.3332e-01, -1.6921e+00,  ..., -3.1945e+00,
         -7.8122e-01, -4.0293e+00],
        [-8.8792e-01,  1.1354e+00, -6.6307e-03,  ..., -1.6445e+00,
         -2.5264e+00, -1.8790e+00],
        [ 2.2320e+00, -2.5327e-01, -1.7172e+00,  ..., -3.3550e+00,
         -3.0466e+00, -8.3431e+00]], grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
18,662	Loss:0.39129	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.9176e+00, -1.0732e+00, -3.0251e-01,  

19,673	Loss:0.26991	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.7194, -0.7549, -4.6317,  ..., -2.2830, -3.0852, -7.9312],
        [-0.8009, -1.2606, -3.3113,  ..., -0.8754, -3.6664,  1.1973],
        [ 1.9562,  1.0331,  0.1729,  ..., -4.9503, -1.6940, -4.8457],
        ...,
        [-2.5121, -1.9432, -0.2576,  ..., -7.2193, -1.6846,  5.2744],
        [ 0.7320, -2.8773,  2.0111,  ..., -2.3078, -5.8000, -4.2155],
        [ 0.9468, -1.9464, -2.2708,  ..., -1.1911, -3.1027, -6.3601]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
19,674	Loss:0.26538	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[  2.7020,   0.5003,   0.4730,  ...,  -2.6571,  -3.0295,   0.6667],
        [ -2.4264,  -1.3950,  -1.7655,  ...,  -2.7343,  -4.3123,  -3.4958],
        [  2.4852,   2.0332,   3.2631,  ...,   0.0398,  -2.5483,  -3.9367],
        ...,

19,686	Loss:0.23672	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.2725e+00, -4.0380e+00, -5.6181e+00,  ..., -2.1248e+00,
         -3.8289e-01, -5.0754e+00],
        [-1.4293e+00, -2.2986e+00, -3.0726e-03,  ..., -6.1374e+00,
         -4.8717e+00, -4.8633e+00],
        [-2.8837e+00, -3.2360e+00, -7.5021e+00,  ..., -9.2498e-01,
         -4.0791e+00, -5.2251e+00],
        ...,
        [ 1.0840e+00, -5.1102e+00, -4.2219e+00,  ..., -3.5787e+00,
         -2.7012e+00, -1.0227e+01],
        [ 7.2057e-01, -2.2322e+00, -8.4011e+00,  ..., -1.6808e+00,
         -1.7978e+00, -9.1299e+00],
        [-6.7207e+00, -5.1284e+00, -5.0991e-01,  ...,  2.4143e+00,
         -2.3800e+00, -4.3779e+00]], grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
19,687	Loss:0.26396	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 1.1973, -1.5882, -0.5171,  ...,  0.0463

tensor([[-1.6661,  0.6085, -0.0217,  ..., -2.6507, -7.2687, -5.7237],
        [ 0.2811, -0.7959,  0.3933,  ..., -4.3469, -3.9907, -4.1367],
        [-3.1151,  0.6233,  0.1404,  ..., -0.7705, -3.4585, -3.6320],
        ...,
        [-0.0576, -0.2090,  0.1438,  ...,  0.3763, -2.7610, -2.3066],
        [ 1.5434,  1.7281,  2.4904,  ..., -2.1477, -2.8201, -5.2663],
        [-4.3465, -8.1433, -2.1304,  ...,  1.4627, -3.8716,  1.8665]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
19,699	Loss:0.36063	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([111, 3, 480, 640]) controlsSize: torch.Size([111, 1])
tensor([[ 0.8448, -1.6717, -2.2214,  ..., -2.1580, -3.0650, -5.8367],
        [-3.3369, -2.6916, -1.5762,  ..., -1.7915, -9.2121, -4.4967],
        [-0.8379,  0.8907,  1.4785,  ...,  0.3899, -2.6566, -6.8461],
        ...,
        [-3.7081,  1.3010, -3.9686,  ...,  4.2742,  0.7702, -2.2148],
        [ 2.2788,  0.8927,  1.8003,  ..., -2.5097, -3.6665, -8.7014],
    

20,711	Loss:0.39138	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.4208,  0.0678, -1.2618,  ..., -0.1487, -3.2421, -5.1227],
        [-3.8227, -1.0548, -5.1832,  ...,  2.4717,  3.6301,  2.9077],
        [-0.2292,  0.9686,  0.8855,  ..., -0.3737, -2.5894, -6.7959],
        ...,
        [-2.7067,  2.3189, -0.4210,  ...,  0.4294, -0.1917, -3.7840],
        [-1.5398,  0.3305, -1.7180,  ..., -3.1443, -3.7831, -0.9571],
        [ 4.4713,  3.1566,  2.0575,  ...,  0.7639, -1.4374, -3.6597]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
20,712	Loss:0.34472	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-2.5880, -1.5276, -3.7346,  ..., -0.9705, -3.0536,  1.0441],
        [ 0.1662,  2.0752,  2.0766,  ..., -2.9261, -2.6208, -7.2803],
        [-1.4239, -3.0945, -2.8619,  ..., -0.8157, -1.6787, -1.6037],
        ...,
        [-1.1925,

20,724	Loss:0.24107	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.2898, -2.3941, -1.8871,  ..., -1.8900, -5.9694, -7.0946],
        [-0.0324,  1.0357,  2.6217,  ...,  0.3889, -2.2133, -2.3062],
        [ 2.0012, -0.1816,  5.5603,  ..., -5.3366, -0.8189,  0.1053],
        ...,
        [ 0.4073, -0.8435,  3.1238,  ..., -5.2784, -4.6865, -6.8436],
        [ 2.9357,  0.6373, -1.8780,  ..., -8.6915, -3.3123, -8.6142],
        [ 0.4070,  5.2614,  4.1939,  ..., -1.9962, -2.4190, -8.1621]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
20,725	Loss:0.32934	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -0.2568,  -1.5607,  -3.5974,  ...,  -1.0543,   0.5679,   0.1798],
        [ -2.7398,   0.6422,   1.8602,  ...,  -2.8949,  -2.2665,  -1.4281],
        [ -0.2038,   1.0210,   1.9742,  ...,  -2.3399,  -1.5984,   3.1002],
        ...,

21,737	Loss:0.26611	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-5.4439, -0.9657,  1.5176,  ..., -0.5982, -5.1008, -4.4559],
        [ 0.8696, -4.5007, -0.6649,  ..., -2.9072, -5.4899, -4.1252],
        [-1.5503, -2.2917, -2.2112,  ..., -4.3902,  2.8990, -3.0155],
        ...,
        [ 0.0332,  0.7889,  0.9835,  ...,  1.8812, -6.1897, -5.0132],
        [-2.5880,  2.0414,  7.7917,  ..., -6.6852, -4.6716, -6.0252],
        [-0.0449, -1.1007,  1.9562,  ...,  0.6449,  1.9251, -2.5971]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
21,738	Loss:0.2755	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -2.9571,  -0.4309,   2.5036,  ...,  -5.6630,  -3.2284, -11.5176],
        [ -3.2097,  -2.2657,  -5.9672,  ...,  -0.2385,  -6.4365,  -1.6259],
        [ -2.5205,  -1.5630,  -4.7367,  ...,   1.8038,  -0.5576,  -5.4906],
        ...,


21,750	Loss:0.23522	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.7226,  0.8107, -0.2562,  ..., -1.4162, -1.7470, -5.6023],
        [-1.9556, -0.4215, -1.3216,  ..., -0.9059, -4.7500, -7.9948],
        [ 0.5321,  4.2571,  1.4228,  ..., -1.4352, -0.6312, -3.7307],
        ...,
        [-4.2876, -4.7219, -9.1571,  ..., -0.2243, -4.5622,  2.5057],
        [ 2.3653,  3.7000,  4.0807,  ..., -4.7896, -0.5765, -3.9567],
        [ 1.0858,  2.8852,  2.0962,  ..., -2.2945, -2.1272, -4.5124]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
21,751	Loss:0.29694	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[  1.0025,   3.2870,  -8.1041,  ...,   1.9889,   0.0897,  -7.5076],
        [  0.2266,  -1.0278,  -0.1134,  ...,  -3.9532,  -3.0125,  -7.7419],
        [ -1.7760,  -1.4196,  -4.0949,  ...,  -1.0159,  -2.8921,   0.8886],
        ...,

21,763	Loss:0.25268	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.9257,  0.1474, -1.2038,  ..., -3.1644, -3.6363, -6.5397],
        [-3.7647, -1.6478, -4.3200,  ..., -0.4580, -3.1525, -4.9456],
        [-0.7658, -1.5462, -3.0936,  ..., -2.0556, -6.6724, -9.4119],
        ...,
        [-4.6893, -3.0846, -0.8509,  ..., -3.3008, -4.6058, -9.2490],
        [-9.2318, -3.0233, -2.7571,  ..., -0.6533, -5.5549, -0.7197],
        [ 4.3162,  3.6206,  3.1259,  ..., -2.0636, -2.1814, -6.3125]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
21,764	Loss:0.30122	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -6.1117,  -4.7031,   0.7158,  ...,  -2.8660,  -1.2225,   4.6011],
        [ -1.3045,  -0.4842,   0.5272,  ...,  -1.0702,  -4.2951,  -5.6869],
        [  2.4028,   0.2228,  -2.7787,  ...,  -2.4036,  -6.9326,  -3.9149],
        ...,

22,776	Loss:0.31881	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -3.7506,  -2.1749,  -6.7766,  ...,  -3.1141,  -2.1457,   2.5832],
        [ -4.5201,  -1.7299,  -0.8659,  ...,  -0.9921,  -4.6369,  -3.8930],
        [ -4.9676,   0.5330,  -1.4717,  ...,  -2.2183,  -1.0466,  -5.7132],
        ...,
        [ -2.8509,  -0.3109,  -4.2296,  ...,  -2.9317,  -4.1899, -10.7836],
        [  1.8541,  -0.5622,  -1.5486,  ...,  -2.5837,  -6.2655,  -4.1850],
        [ -0.4749,  -0.8582,   0.5992,  ...,  -1.6215,  -2.3204,  -7.9510]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
22,777	Loss:0.25227	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 0.6208,  2.1573,  2.0987,  ..., -1.7146, -0.1271, -5.9471],
        [-1.4464,  1.1734, -2.0335,  ..., -3.8101, -1.0797, -2.5515],
        [ 0.4114,  2.3939,  2.2813,  ..., -4.9048, -4.3154, -6.3

22,789	Loss:0.24842	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-3.4570,  1.0536, -0.6945,  ..., -3.4583, -3.2271,  0.4251],
        [ 0.2187,  2.9659,  1.4949,  ..., -5.8872, -4.0511, -4.9718],
        [-2.6809, -1.1916, -0.4913,  ..., -4.9960, -5.1116, -9.9956],
        ...,
        [-2.9163, -4.0375, -3.4693,  ..., -2.1584, -0.6811,  8.5459],
        [-2.4234, -0.4119, -0.5154,  ...,  1.2727, -4.6617, -3.4959],
        [-0.6432, -0.6713,  0.1956,  ..., -1.2522, -5.7561, -7.3731]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
22,790	Loss:0.29565	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.5067, -1.0553, -0.7091,  ..., -2.0701, -5.6997, -1.5088],
        [ 1.3965, -0.1470, -0.0698,  ..., -3.6013, -6.8061, -4.0397],
        [ 1.4306,  1.8704,  0.9849,  ..., -4.6031, -3.7887, -7.0701],
        ...,
        [-6.4784,

22,802	Loss:0.24399	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-0.9061, -1.2374, -0.6272,  ..., -3.3251, -5.8523, -2.1131],
        [-2.8440, -3.3014,  0.0235,  ..., -3.7942, -4.8846, -3.3366],
        [ 1.2000,  2.0168, -5.6880,  ..., -2.6126, -2.3479, -0.1197],
        ...,
        [ 0.2190, -0.3148, -1.3241,  ..., -0.2637, -4.3510, -5.4470],
        [-2.4198,  0.0535, -4.3622,  ..., -0.8204, -5.4377, -6.2526],
        [-0.1455,  2.1336,  0.7095,  ..., -3.1020, -7.6089, -0.6979]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
22,803	Loss:0.17165	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-6.7578, -2.1247, -6.7377,  ...,  2.8380,  0.7692,  2.8993],
        [-0.6716,  0.9249, -2.9146,  ..., -2.4635, -2.4452, -2.6599],
        [-0.0149,  0.7241,  0.2574,  ..., -3.3130, -1.6092, -4.3924],
        ...,
        [ 0.0842,

23,815	Loss:0.23085	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 1.0959e+00, -2.9047e-01,  9.3089e-02,  ..., -5.7194e+00,
         -7.4130e+00, -1.4443e+01],
        [-9.2942e+00, -2.6010e+00, -1.6787e+00,  ..., -1.4239e-02,
          2.0939e+00, -1.9798e-01],
        [-5.7690e-01, -4.0301e+00, -4.1677e-01,  ..., -4.2468e+00,
         -1.5390e+00,  1.7086e-01],
        ...,
        [-1.7541e+00,  1.6977e+00, -1.1676e+00,  ..., -3.3363e+00,
         -1.8094e+00, -2.4710e+00],
        [-1.4308e-01, -3.0935e-01, -1.3096e+00,  ..., -1.3062e+00,
         -2.4334e+00, -4.4601e+00],
        [-2.4202e+00, -6.0140e-01, -1.2904e+00,  ..., -3.5208e+00,
         -6.7978e+00, -2.3525e+00]], grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
23,816	Loss:0.22449	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[-1.0049,  1.8205,  4.2677,  ..., -8.6704

tensor([[-0.2203,  0.5973, -3.3846,  ..., -5.9831, -1.7167, -8.7610],
        [-0.2013, -1.7592,  0.2082,  ..., -0.4543, -4.1980, -3.5832],
        [-2.7191,  1.9293,  2.7738,  ..., -6.1411, -4.3295, -7.3851],
        ...,
        [ 1.5927, -1.1933,  0.5215,  ..., -6.3941, -3.2761, -3.1855],
        [-4.3231,  2.2425,  1.4830,  ..., -3.4888, -2.3000, -6.7106],
        [ 0.7013,  0.2893,  3.5985,  ..., -5.0844, -4.9242, -5.3300]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
23,828	Loss:0.24991	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[  2.9111,  -0.4911,  -2.9880,  ...,  -1.6185,  -5.8577,  -2.8936],
        [  0.6094,   2.6120,   2.0635,  ...,  -5.1792,  -5.8573, -10.1774],
        [ -0.3256,  -3.1672,   1.2981,  ...,  -1.4120,  -0.0269,   5.1511],
        ...,
        [ -4.2247,  -0.2490,  -1.5519,  ...,  -2.8247,  -4.2033, -11.8519],
        [ -3.2631,  -2.7235,   0.4042,  ...,  -1.9

23,840	Loss:0.23602	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -7.6410,   0.9081,   2.3015,  ...,  -2.9058,  -2.7804,   3.2553],
        [ -1.2043,  -2.7730,  -0.6438,  ...,  -2.5928,  -2.6708,  -2.7711],
        [ -3.3646,  -5.0735,  -3.0621,  ...,   2.0738,  -3.3621,  -1.2445],
        ...,
        [ -0.6886,   2.6577,   4.8764,  ...,  -1.6591,  -1.7456,  -3.0218],
        [  2.4930,  -2.4620,  -2.4147,  ...,  -3.2914,  -4.7507, -12.1533],
        [ -2.2250,   1.3429,   4.9236,  ...,  -1.5387,  -0.6908,  -8.0701]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
24,841	Loss:0.18659	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ -0.1414,  -0.0161,  -2.0455,  ...,  -3.2574,  -3.5542,  -4.1972],
        [ -3.5733,  -2.2977,  -1.1891,  ...,   6.9901,   3.1256,  -2.4200],
        [ -1.4170,  -0.5229,  -7.0942,  ...,  -2.049

tensor([[ -3.5059,  -1.0561,   5.0466,  ...,  -3.8389,  -3.1166,  -2.0483],
        [ -0.9425,   1.7624,  -0.1243,  ...,  -3.7029,  -1.6296,  -2.2252],
        [ -0.5934,  -2.2027,   0.2227,  ...,  -2.9921,  -2.3343,  -4.5112],
        ...,
        [ -2.7094,  -1.0315,  -4.7181,  ...,  -3.3118,  -4.7219,  -7.2202],
        [ -1.8619,   2.0888,   4.5597,  ...,  -2.0677,  -1.2041,  -8.8758],
        [  1.5080,   2.3397,   0.8159,  ...,  -2.6810,  -4.5666, -10.4161]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
24,853	Loss:0.20592	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[  0.6331,  -1.8440,   1.0438,  ...,  -1.5349,  -3.4592,  -6.9997],
        [ -4.3179,  -1.5082,  -1.4036,  ...,  -5.6927,  -3.6841, -12.8835],
        [  1.8892,   0.2538,  -1.3224,  ...,  -8.1059,  -1.7854,  -5.7991],
        ...,
        [  3.6973,  -4.0230,  -2.9232,  ...,  -0.9497,  -3.5747,  -6.7058],
        [  1.1

24,865	Loss:0.20609	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[  3.1399,  -0.6419,   4.1570,  ...,  -5.1718,  -3.5008, -12.3602],
        [  0.8838,  -0.2689,  -3.1901,  ...,  -4.5282,  -4.9018,  -9.3290],
        [ -0.5990,   1.2585,   3.2127,  ...,  -2.9913,  -4.5203,  -8.3821],
        ...,
        [ -2.2188,   0.5083,   1.8662,  ...,  -6.1295,  -5.4267,  -5.8705],
        [ -0.1309,   0.7132,  -5.6675,  ...,   1.5287,  -4.3620,  -9.0279],
        [ -1.6144,   0.3544,   0.7053,  ...,  -1.7590,  -0.1771,  -1.0544]],
       grad_fn=<AddmmBackward>)
torch.Size([256, 20]) torch.Size([256])
24,866	Loss:0.27595	Allocated:naGB	Cached:naGB

imagesSize:  torch.Size([256, 3, 480, 640]) controlsSize: torch.Size([256, 1])
tensor([[ 1.9828,  0.9582, -1.0831,  ..., -5.9410, -1.3684, -6.1842],
        [-4.6123, -0.3180,  2.1778,  ..., -0.3070, -5.5771, -4.1759],
        [ 0.4784, -1.5815,  4.1823,  ..., -6.1979, -5.0542, -4.8

In [49]:
# Validation Test
iter_no = 0
total = 0
correct = 0
for i_batch, sampled_batch in enumerate(validLoader):
    iter_no += 1
    images = sampled_batch['image'].to(device).float()
    controls = sampled_batch['control'].to(device).long()
    controls = torch.flatten(controls)
    optimizer.zero_grad()
    prediction = net(images)
    
    maximum = torch.argmax(prediction,axis = 1)
    print(maximum)
    print(controls)
    shared = maximum == controls
    shared = 1 * shared
    correct += int(torch.sum(shared))
    print(correct)
    total += len(controls)
    print(correct/total)

tensor([[ -1.3430,  -0.7843,  -0.9322,  ...,  -2.3487,  -4.8785,  -5.7487],
        [ -7.0056,  -3.8967,  -3.1968,  ...,   4.9140,  -0.0906,  -0.5639],
        [ -2.5804,  -1.4535,  -2.2521,  ...,   0.2093,  -2.2434,   1.6080],
        ...,
        [ -5.5809,   3.9301,  -6.5104,  ...,  -2.5037,  -4.3072, -11.3393],
        [  0.5970,  -0.8343,   0.1480,  ...,  -3.8259,  -3.8487,  -2.7238],
        [ -4.4897,  -0.7815,  -3.4670,  ...,  -3.8485,  -5.1744,  -5.1709]],
       grad_fn=<AddmmBackward>)
tensor([ 8, 11, 14,  8,  9, 10,  8,  9,  9,  9,  9,  8,  9,  9,  9,  9, 10,  7,
         9, 10, 14,  9,  8, 11,  8,  9,  9,  9,  9,  9,  9, 12,  9,  7,  8,  9,
        10, 18,  9,  8,  8,  9, 10,  8,  9,  9,  9,  9,  8,  9,  9,  8, 19,  9,
         9,  9,  8,  2,  9,  9,  6,  9,  8, 10,  9,  9,  9,  9,  8,  9,  8,  9,
         9, 19,  9,  9,  9,  9, 18, 18, 10,  4,  9,  9,  9,  9,  9,  6, 10,  9,
         9,  9,  9,  9, 11, 19, 12,  9,  8,  9,  9,  9,  6,  7, 10,  8,  6,  9,
         9,  7,  9

KeyboardInterrupt: 

In [46]:
print(total)
print(correct)
print("Accuracy: {}".format(correct/total))

2203
1003
Accuracy: 0.45528824330458467


In [None]:
torch.cuda.empty_cache()

In [None]:
#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_cached(0)/1024))