In [1]:
import time, torch, torch, csv
import torch.nn as nn
import pandas as pd
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as utils

from torchvision import transforms
from PIL import Image

## DeepStart network definition

DeepStar consists of two layer of max pooled convolutional neural networks that is then fed into two layers of normal neural networks.

**Get Optim**: Currentyl we are using the SGD optimizer function.<br>
**Get Loss**: Currentl we are using the Csross Entry Loss<br>

In [3]:
class DeepStar(nn.Module):
    def __init__(self, prediction_size):
        super(DeepStar, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=4, stride=2, padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=1, stride=1, padding=0)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=1, stride=1, padding=0)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)
        self.pool3 = nn.MaxPool2d(kernel_size=1, stride=1, padding=0)
        
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
        self.pool4 = nn.MaxPool2d(kernel_size=1, stride=1, padding=0)
        
        self.conv5 = nn.Conv2d(256, 512, kernel_size=1, stride=2, padding=0)
        self.pool5 = nn.MaxPool2d(kernel_size=1, stride=1, padding=0)
        
        #self.conv_dropout = nn.Dropout2d()
        self.conv_out = 512 * 9 * 9
        px, py = prediction_size

        self.fc1 = nn.Linear(self.conv_out + 4, px*py*2)
        self.dropout1 = nn.Dropout(p=0.25)
        self.fc2 = nn.Linear(px*py*2, px*py + 1)
        
    def __name__(self):
        return "DeepStar"
    
    def get_loss(self):
        return nn.CrossEntropyLoss()
        
    def get_optim(self, lr, momentum):
        return optim.SGD(self.parameters(), lr=lr, momentum=momentum)

        
    def forward(self, img, points):
        img = self.forward_conv(img)

        l = img.view(-1, self.conv_out)
        l = torch.cat((l, points), 1)
        
        l = F.relu(self.fc1(l))
        l = self.dropout1(l)

        return self.fc2(l)
    
    def forward_conv(self, img):
        img = F.relu(self.conv1(img))
        img = self.pool1(img)

        img = F.relu(self.conv2(img))
        img = self.pool2(img)
        
        img = F.relu(self.conv3(img))
        img = self.pool3(img)
        
        img = F.relu(self.conv4(img))
        img = self.pool4(img)
        
        img = F.relu(self.conv5(img))
        img = self.pool5(img)
        
        return img

## DataLoader

Because we use a custom dataset we have to have a custom data loader. Currently we only use one map and one csv file for all paths generated on that map. The map is a grayscale heightmap normalzied to between 0 and 1. The data is a list of start and stop points. To make this netowrk a classifier we create a label for each point the midpoint can be in. So labels will be width*height

In [4]:
class PathDataLoader(utils.Dataset):
    def __init__(self, data_dir, prediction_size):
        self.map = f'{data_dir}/map.png'
        self.data_path = f'{data_dir}/data.csv'
        self.to_tensor = transforms.ToTensor()
        self.data = pd.read_csv(self.data_path, encoding = "UTF-8")
        self.size = prediction_size
    
    def __len__(self):
        return len(self.data["Start"])
    
    def __getitem__(self, idx):
        with Image.open(self.map) as img:
            imgWidth, imgHeight = img.size
            width, height = self.size

            sx, sy = self.to_tuple(self.data["Start"][idx])
            ex, ey = self.to_tuple(self.data["Stop"][idx])
            mx, my = self.to_tuple(self.data["Midpoint"][idx])
            
            label = round((mx / imgWidth) * width) + (width - 1) * round((my / imgHeight) * height)

            img_tensor = self.to_tensor(img)
            pos_tensor = torch.FloatTensor([sx / imgWidth, sy / imgHeight, ex / imgWidth, ey / imgHeight])
            label_tensor = torch.LongTensor([label])
            
            #print(f'{img_tensor.size()}-{pos_tensor.size()}-{label_tensor.size()}')
            
            return img_tensor, pos_tensor, label_tensor
    
    def to_tuple(self, t):
        return tuple(map(int, t.replace('(','').replace(')', '').split(', '))) 