In [1]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch import nn
from torch.nn import Linear, Conv2d, MaxPool2d, ReLU
from torchvision import transforms, datasets
import numpy as np

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cpu device


In [3]:
class LeBal_ConvNet(nn.Module):
	def __init__(self, num_channels, num_features, num_outputs):
		super(LeBal_ConvNet, self).__init__()

		self.conv1 = Conv2d(in_channels=num_channels, out_channels=20, kernel_size=(5, 5))
		self.relu1 = ReLU()
		self.maxpool1 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

		self.conv2 = Conv2d(in_channels=20, out_channels=50, kernel_size=(5, 5))
		self.relu2 = ReLU()
		self.maxpool2 = MaxPool2d(kernel_size=(2, 2), stride=(2, 2))

		self.fc1 = Linear(in_features=400, out_features=500)
		self.relu3 = ReLU()

		self.fc2 = Linear(in_features=500, out_features=25)

		self.sent_fc1 = Linear(in_features=num_features, out_features=100)
		self.sent_relu1 = ReLU()
		self.sent_fc2 = Linear(in_features=100, out_features=50)
		self.sent_relu2 = ReLU()
		self.sent_fc3 = Linear(in_features=50, out_features=25)
		self.sent_relu3 = ReLU()

		self.output1 = Linear(in_features=50, out_features=25)
		self.output_relu1 = ReLU()
		self.output2 = Linear(in_features=25, out_features=num_outputs)

	def forward(self, img, sent):
		x = self.conv1(img)
		x = self.relu1(x)
		x = self.maxpool1(x)

		x = self.conv2(x)
		x = self.relu2(x)
		x = self.maxpool2(x)
		x = torch.flatten(x, 1)
		x = self.fc1(x)
		x = self.relu3(x)

		x = self.fc2(x)

		xs = self.sent_fc1(sent)
		xs = self.sent_relu1(xs)
		xs = self.sent_fc2(xs)
		xs = self.sent_relu2(xs)
		xs = self.sent_fc3(xs)
		xs = self.sent_relu3(xs)

		joined = torch.cat((x, xs), dim=1)

		xo = self.output1(joined)
		xo = self.output_relu1(xo)
		return self.output2(xo)



In [6]:
data = np.load("dataset.npz")
x_img = data['x_img'] / 255.
x_sentiment = data['x_sent']
y = data['y']
print(x_img.shape, x_sentiment.shape, y.shape)
print(x_img[0])
print(x_sentiment[0])
print(y[0])

(4551, 23, 31, 3) (4551, 3) (4551, 7)
[[[0.34117647 0.09803922 0.43529412]
  [0.37254902 0.10588235 0.45882353]
  [0.43921569 0.1254902  0.4745098 ]
  ...
  [0.43529412 0.12156863 0.48235294]
  [0.38823529 0.10980392 0.47058824]
  [0.24313725 0.07058824 0.38431373]]

 [[0.39215686 0.11372549 0.43921569]
  [0.41960784 0.11764706 0.48235294]
  [0.4745098  0.1372549  0.48235294]
  ...
  [0.48627451 0.1372549  0.49019608]
  [0.4        0.10980392 0.48627451]
  [0.31372549 0.09019608 0.42745098]]

 [[0.38039216 0.10588235 0.44313725]
  [0.43921569 0.12156863 0.49411765]
  [0.50980392 0.14901961 0.48235294]
  ...
  [0.47843137 0.13333333 0.49411765]
  [0.37647059 0.10196078 0.4627451 ]
  [0.31372549 0.09019608 0.41960784]]

 ...

 [[0.73333333 0.25882353 0.41568627]
  [0.77254902 0.27843137 0.44705882]
  [0.7254902  0.23921569 0.45490196]
  ...
  [0.85098039 0.31764706 0.41568627]
  [0.75294118 0.25490196 0.44705882]
  [0.56470588 0.17647059 0.47843137]]

 [[0.81568627 0.36470588 0.39215686]

In [7]:
x_img = np.moveaxis(x_img, -1, 1)
x_img.shape

(4551, 3, 23, 31)

In [8]:
def unison_shuffled_copies(a, b, c):
    assert len(a) == len(b) == len(c)
    p = np.random.permutation(len(a))
    return a[p], b[p], c[p]

sh_x_img, sh_x_sent, sh_y = unison_shuffled_copies(x_img, x_sentiment, y)
tt_split = 0.9

x_img_train, x_sent_train, y_train = sh_x_img[:int(sh_x_img.shape[0] * tt_split)], sh_x_sent[:int(sh_x_sent.shape[0] * tt_split)], sh_y[:int(sh_y.shape[0] * tt_split)]
x_img_test, x_sent_test, y_test = sh_x_img[int(sh_x_img.shape[0] * tt_split):], sh_x_sent[int(sh_x_sent.shape[0] * tt_split):], sh_y[int(sh_y.shape[0] * tt_split):]

In [9]:
tx_img_train = torch.Tensor(x_img_train)
tx_sentiment_train = torch.Tensor(x_sent_train)
ty_train = torch.Tensor(y_train)

tx_img_test = torch.Tensor(x_img_test)
tx_sentiment_test = torch.Tensor(x_sent_test)
ty_test = torch.Tensor(y_test)

In [10]:
print(tx_img_train.shape, tx_sentiment_train.shape, ty_train.shape)
print(tx_img_test.shape, tx_sentiment_test.shape, ty_test.shape)

torch.Size([4095, 3, 23, 31]) torch.Size([4095, 3]) torch.Size([4095, 7])
torch.Size([456, 3, 23, 31]) torch.Size([456, 3]) torch.Size([456, 7])


In [11]:
train_dataset = TensorDataset(tx_img_train, tx_sentiment_train, ty_train)
train_dataloader = DataLoader(train_dataset)

test_dataset = TensorDataset(tx_img_test, tx_sentiment_test, ty_test)
test_dataloader = DataLoader(test_dataset)

In [4]:
model = LeBal_ConvNet(3, 3, 7).to(device)


In [11]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())

In [12]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (image, sentiment, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(image, sentiment)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(image)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for image, sentiment, y in dataloader:
            pred = model(image, sentiment)
            test_loss += loss_fn(pred, y).item()

    test_loss /= num_batches
    correct /= size

In [13]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)

Epoch 1
-------------------------------
loss: 0.300130  [    1/ 4095]
loss: 0.028181  [  101/ 4095]
loss: 0.030573  [  201/ 4095]
loss: 0.139067  [  301/ 4095]
loss: 0.067394  [  401/ 4095]
loss: 0.064598  [  501/ 4095]
loss: 0.036743  [  601/ 4095]
loss: 0.056417  [  701/ 4095]
loss: 0.018677  [  801/ 4095]
loss: 0.094284  [  901/ 4095]
loss: 0.058457  [ 1001/ 4095]
loss: 0.032074  [ 1101/ 4095]
loss: 0.037947  [ 1201/ 4095]
loss: 0.041506  [ 1301/ 4095]
loss: 0.089511  [ 1401/ 4095]
loss: 0.066060  [ 1501/ 4095]
loss: 0.013697  [ 1601/ 4095]
loss: 0.162546  [ 1701/ 4095]
loss: 0.041129  [ 1801/ 4095]
loss: 0.050229  [ 1901/ 4095]
loss: 0.019073  [ 2001/ 4095]
loss: 0.127824  [ 2101/ 4095]
loss: 0.068418  [ 2201/ 4095]
loss: 0.036076  [ 2301/ 4095]
loss: 0.023897  [ 2401/ 4095]
loss: 0.117299  [ 2501/ 4095]
loss: 0.005497  [ 2601/ 4095]
loss: 0.105234  [ 2701/ 4095]
loss: 0.072570  [ 2801/ 4095]
loss: 0.024711  [ 2901/ 4095]
loss: 0.066459  [ 3001/ 4095]
loss: 0.090960  [ 3101/ 4095]


In [14]:
# sample predictions

with torch.no_grad():
	count = 0
	for image, sentiment, y in test_dataloader:
		if count == 10:
			break
		count += 1
		print('True:', np.around(y.numpy(), decimals=3))
		print('Pred:', np.around(model(image, sentiment).numpy(), decimals=3))
		print()

True: [[0.586 0.821 0.082 0.101 0.006 0.699 0.638]]
Pred: [[0.565 0.622 0.094 0.259 0.121 0.176 0.497]]

True: [[0.724 0.517 0.041 0.117 0.072 0.069 0.926]]
Pred: [[0.593 0.659 0.106 0.245 0.138 0.18  0.518]]

True: [[0.769 0.551 0.293 0.117 0.    0.216 0.897]]
Pred: [[0.566 0.682 0.098 0.23  0.177 0.181 0.477]]

True: [[0.737 0.217 0.031 0.77  0.    0.115 0.291]]
Pred: [[0.566 0.62  0.094 0.261 0.117 0.176 0.501]]

True: [[0.612 0.463 0.035 0.743 0.    0.141 0.535]]
Pred: [[0.488 0.824 0.124 0.12  0.305 0.217 0.383]]

True: [[0.441 0.241 0.041 0.838 0.    0.335 0.399]]
Pred: [[0.558 0.737 0.111 0.186 0.228 0.189 0.456]]

True: [[0.937 0.632 0.157 0.026 0.    0.111 0.542]]
Pred: [[0.571 0.669 0.099 0.239 0.159 0.181 0.491]]

True: [[0.5   0.908 0.042 0.017 0.    0.128 0.744]]
Pred: [[0.572 0.648 0.097 0.25  0.139 0.179 0.499]]

True: [[0.733 0.783 0.039 0.156 0.    0.286 0.634]]
Pred: [[0.537 0.773 0.118 0.157 0.26  0.199 0.43 ]]

True: [[0.376 0.39  0.047 0.866 0.921 0.081 0.172]]
Pre

In [12]:
import torchviz

iterator = iter(test_dataloader)
x1, x2, y = next(iterator)

pred = model(x1, x2)

torchviz.make_dot(pred.mean(), params=dict(model.named_parameters())).render("model_architecture", format="png")

'model_architecture.png'

In [13]:
import hiddenlayer as hl

hl.build_graph(model, args=(x1, x2))

TypeError: 'torch._C.Node' object is not subscriptable