In [None]:
from pathlib import Path
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
import torchvision
from shapeworld_data import load_raw_data, get_vocab, ShapeWorld

# Extract Images Data

In [None]:
root = Path(os.path.abspath('')).parent.parent.absolute()
data_path = os.path.join(root,"data\shapeworld_np")
print(data_path)
data_list = os.listdir(data_path)
print(data_list)

In [None]:
imgs = []
for data in data_list:
    d = load_raw_data(os.path.join(data_path,data))
    img_set = d["imgs"].reshape(-1,3,64,64)
    imgs.append(img_set)
imgs_data = np.array(imgs).reshape(-1,3,64,64)
print(imgs_data.shape)

In [None]:
split = -1000
train_set = imgs_data[:split]
test_set = imgs_data[split:]
batch_size = 128
trainloader = DataLoader(torch.tensor(train_set).to(torch.float), batch_size=batch_size, shuffle=True)
testloader = DataLoader(torch.tensor(test_set).to(torch.float), batch_size=batch_size // 10, shuffle=False)


In [None]:
for data in trainloader:
    print(data.shape)
    #print(data)
    break

# Helper function

In [None]:
def imshow(img):
    img = torchvision.utils.make_grid(img)
    img = img / 2 + 0.5
    npimg = img.detach().numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

In [None]:
def train(net, criterion, optimizer, epochs, trainloader, device="cpu"):
    losses = []
    output_and_label = []
    net = net.to(device)

    for epoch in range(1, epochs+1):
        print(f'epoch: {epoch}, ', end='')
        running_loss = 0.0
        for counter, img in enumerate(trainloader, 1):
            img = img.to(device)
            optimizer.zero_grad()
            output = net(img)
            #print(output.shape,img.shape)
            loss = criterion(output, img)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        avg_loss = running_loss / counter
        losses.append(avg_loss)
        print('loss:', avg_loss)
        output_and_label.append((output, img))
    print('finished')
    return output_and_label, losses

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# Model

In [None]:
class AutoEncoder2(torch.nn.Module):
    def __init__(self, enc, dec):
        super().__init__()
        self.enc = enc
        self.dec = dec
    def forward(self, x):
        x = self.enc(x)
        x = self.dec(x)
        return x

# Experiments

## Model01

In [None]:
enc1 = torch.nn.Sequential(
    torch.nn.Conv2d(3, 16, 3, padding=1),  # [-1, 16, 32, 32]
    #torch.nn.BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2),  # [-1, 16, 16, 16]
    torch.nn.Conv2d(16, 32, 3, padding=1),  # [-1, 8, 16, 16]
    #torch.nn.BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2)  # [-1, 8, 8, 8]
)

dec1 = torch.nn.Sequential(
    torch.nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2),
    torch.nn.ReLU(),
    torch.nn.ConvTranspose2d(16, 3, kernel_size=2, stride=2),
    torch.nn.Sigmoid()
)

In [None]:
net1 = AutoEncoder2(enc1, dec1)
net1.to(device)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(net1.parameters())
EPOCHS = 10

output_and_label1, losses1 = train(net1, criterion, optimizer, EPOCHS, trainloader,device=device)

In [None]:
img, org = output_and_label1[-1]
imshow(org.to("cpu"))
imshow(img.to("cpu"))

In [None]:
torch.save(net1.enc,"model_params/cnn_autoencoder3-16-32.cnnet")
torch.save(net1,"model_params/cnn_autoenc-dec3-16-32.cnnet")

In [None]:
for counter, img in enumerate(testloader, 1):
    img = img.to(device)
    output = net1(img)
    print(output.shape,img.shape)
    imshow(img.to("cpu"))
    imshow(output.to("cpu"))
    if counter>4: break

## Model02

In [None]:
enc2 = torch.nn.Sequential(
    torch.nn.Conv2d(3, 16, kernel_size=4, padding=1, stride=2),
    torch.nn.ReLU(),
    torch.nn.Conv2d(16, 32, kernel_size=4, padding=1, stride=2),
    torch.nn.ReLU()
)

dec2 = torch.nn.Sequential(
    torch.nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),
    torch.nn.ReLU(),
    torch.nn.ConvTranspose2d(16, 3, kernel_size=4, stride=2, padding=1),
    torch.nn.Tanh()
)

In [None]:
net2 = AutoEncoder2(enc2, dec2)
net2

In [None]:
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(net2.parameters())
EPOCHS = 10

output_and_label2, losses2 = train(net2, criterion, optimizer, EPOCHS, trainloader)

In [None]:
img, org = output_and_label2[-1]
imshow(org)
imshow(img)

## Model03

In [None]:
enc3 = torch.nn.Sequential(
    torch.nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
)

dec3 = torch.nn.Sequential(
    torch.nn.ConvTranspose2d(64, 64, kernel_size=2, stride=2, padding=1),
    torch.nn.ReLU(),
    torch.nn.ConvTranspose2d(64, 64, kernel_size=2, stride=2, padding=1),
    torch.nn.ReLU(),
    torch.nn.ConvTranspose2d(64, 64, kernel_size=2, stride=2, padding=1),
    torch.nn.ReLU(),
    torch.nn.ConvTranspose2d(64, 3, kernel_size=2, stride=3, padding=1),
    torch.nn.Tanh()
)

In [None]:
net3 = AutoEncoder2(enc3, dec3)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(net3.parameters())
EPOCHS = 10

#output_and_label3, losses3 = train(net3, criterion, optimizer, EPOCHS, trainloader)

In [None]:
#img, org = output_and_label3[-1]
#imshow(org)
#imshow(img)

## Model04

In [None]:
enc4 = torch.nn.Sequential(
    torch.nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
    torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
)

dec4 = torch.nn.Sequential(
    torch.nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
    torch.nn.Tanh()
)

In [None]:
net4 = AutoEncoder2(enc4, dec4)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(net4.parameters())
EPOCHS = 10

#output_and_label4, losses4 = train(net4, criterion, optimizer, EPOCHS, trainloader)

In [None]:
#img, org = output_and_label4[-1]
#imshow(org)
#imshow(img)

## Model05

In [None]:
enc5 = torch.nn.Sequential(
    torch.nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1),
    #torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(inplace=True),
    torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    #torch.nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
    torch.nn.ReLU(inplace=True),
    #torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    #torch.nn.ReLU(inplace=True),
    #torch.nn.Conv2d(64, 64, kernel_size=3, padding=1, stride=1),
    #torch.nn.ReLU(inplace=True),
)

dec5 = torch.nn.Sequential(
    #torch.nn.ConvTranspose2d(64, 64, kernel_size=3, stride=1, padding=1),
    #torch.nn.ReLU(inplace=True),
    #torch.nn.ConvTranspose2d(64, 64, kernel_size=3, stride=1, padding=1),
    #torch.nn.ReLU(inplace=True),
    torch.nn.ConvTranspose2d(64, 64, kernel_size=3, stride=1, padding=1),
    torch.nn.ReLU(inplace=True),
    torch.nn.ConvTranspose2d(64, 3, kernel_size=3, stride=1, padding=1),
    torch.nn.Tanh()
)

In [None]:
net5 = AutoEncoder2(enc5, dec5)
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(net5.parameters())
EPOCHS = 10

output_and_label4, losses4 = train(net5, criterion, optimizer, EPOCHS, trainloader, device)

In [None]:
img, org = output_and_label4[-1]
imshow(org.to("cpu"))
imshow(img.to("cpu"))

In [None]:
torch.save(net5.enc,"model_params/cnn_autoencoder3-64-64_ver3.cnnet")

# Model read test

In [None]:
cnn_encoder = torch.load("model_params/cnn_autoencoder3-64-64.cnnet")
cnn_encoder

In [None]:
for counter, img in enumerate(trainloader, 1):
    img = img.to(device)
    output = cnn_encoder(img)
    print(output.shape,img.shape)
    break
    