In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import torch.utils as utils
from torchvision import datasets, transforms
from PIL import Image
from io import BytesIO 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
im2tensor = transforms.ToTensor()

def imfile2tensor(filename):
    img = Image.open(filename)
    alpha = Image.fromarray(np.zeros(img.size), mode='L')
    # UnityのRenderTextureフォーマットに則して
    # アルファチャンネルを付加
    img.putalpha(alpha) 
    return im2tensor(img)

In [None]:
class UnityDataset(torch.utils.data.Dataset):
    def __init__(self, begin, end, transform = None):
        self.transform = transform
        # images
        image_title = './trial/Image'
        self.data = list()
        for i in range(begin, end):
            image_name = image_title + str(i).zfill(4) + '.png'
            tensor = imfile2tensor(image_name)
            self.data.append(tensor)
        # labels
        label_name = './trial/Visibility.csv'
        with open(label_name) as label_file:
            labels = label_file.readlines()
        self.label = []
        for i in range(begin, end):
            l = int(labels[i].split('\n')[0])
            self.label.append(torch.FloatTensor([l]))
        self.num_items = len(self.label)

    def __len__(self):
        return self.num_items

    def __getitem__(self, idx):
        out_data = self.data[idx]
        out_label = self.label[idx]
        if self.transform:
            out_data = self.transform(out_data)
        return out_data, out_label

In [None]:
trainset = UnityDataset(0, 1800)
testset = UnityDataset(1800, 2000)
dataloader = torch.utils.data.DataLoader(
    trainset,
    batch_size = 100,
    shuffle = True)

In [None]:
class Encoder(nn.Module):
    def __init__(self,
                 in_channels : int,
                 num_hiddens : int):
        super(Encoder, self).__init__()

        self.conv1 = nn.Conv2d(
            in_channels=in_channels,
            out_channels=num_hiddens//2,
            kernel_size=3,
            stride=1,
            padding=1)
        self.pool1 = nn.MaxPool2d(
            kernel_size=3,
            stride=3)
        self.conv2 = nn.Conv2d(
            in_channels=num_hiddens//2,
            out_channels=num_hiddens,
            kernel_size=3,
            stride=1,
            padding=1)

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = F.relu(x)
        x = self.pool1(x)
        x = self.conv2(x)
        return x

In [None]:
class Decoder(nn.Module):
    def __init__(self, num_features):
        super(Decoder, self).__init__()
        self.linear1 = nn.Linear(num_features, num_features // 2)
        self.linear2 = nn.Linear(num_features // 2, 1)

    def forward(self, inputs):
        x = self.linear1(inputs)
        x = F.relu(x)
        x = self.linear2(x)
        x = F.sigmoid(x)
        return x

In [None]:
class Detector(nn.Module):
    def __init__(self,
                 in_channels : int,
                 num_hiddens : int,
                 num_features : int):
        super(Detector, self).__init__()
        
        self.encoder = Encoder(in_channels, num_hiddens)
        self.decoder = Decoder(num_features)

    def forward(self, inputs):
        z = inputs[:, 0:3, :, :] # remove alpha channel
        z = self.encoder(z)
        z = torch.flatten(z, 1) 
        return self.decoder(z)

In [None]:
model = Detector(3, 6, 6 * 21 * 21).to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1.0e-3)

In [None]:
model.train()
num_epochs = 50
loss_list = []
for i in range(num_epochs):
    losses = []
    for x, label in dataloader:
        model.zero_grad()
        x = x.to(device)
        y = model(x)
        loss = criterion(y, label)
        loss.backward()
        optimizer.step()
        losses.append(loss.cpu().detach().numpy())
    loss_list.append(np.average(losses))
    print("EPOCH: {} loss: {}".format(i, np.average(losses)))
torch.save(model.state_dict(), 'sentis_weights.pth')

In [None]:
testloader = torch.utils.data.DataLoader(
    testset,
    batch_size = 100,
    shuffle = True)

def discretize(y):
    return 1.0 if y[0] >= 0.5 else 0.0

model.eval()
num_total = 0
num_oks = 0
with torch.no_grad():
    for x, t in testloader:
        x = x.to(device)
        y = model(x)
        for iy, it in zip(y, t):
            if discretize(iy) == it[0]:
                num_oks += 1
            num_total += 1
    print(num_oks / num_total)

In [None]:
import onnx
import onnxsim

model = Detector(3, 6, 6 * 21 * 21).to(device)
checkpoint = torch.load('sentis_weights.pth')
model.load_state_dict(checkpoint)

onnx_file = 'sentis.onnx'
torch.onnx.export(
    model=model,
    args=torch.randn((1, 4, 64, 64)),
    f='sentis.onnx',
    opset_version=15,
    export_params=True,
    do_constant_folding=True,
    input_names=['inputs'],
    output_names=['output'],
)

# 型の推定
#model_onnx1 = onnx.load(onnx_file)
#model_onnx1 = onnx.shape_inference.infer_shapes(model_onnx1)
#onnx.save(model_onnx1, onnx_file)

# モデル構造の最適化
#model_onnx2 = onnx.load(onnx_file)
#model_simp, check = onnxsim.simplify(model_onnx2)
#onnx.save(model_simp, onnx_file)