In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torchvision import datasets
import torchvision.models as models
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import image
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.dataset import Dataset
from torch.utils.data import random_split
import torch.nn.functional as F
import re
import shutil
import os
import pandas as pd
import gc
import scipy.misc
import sys
from torch.autograd import Variable
# numpy.set_printoptions(threshold=sys.maxsize)
torch.set_printoptions(threshold=100)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
print(torch.cuda.device_count())

In [2]:
# Beta1 hyperparam for Adam optimizers
b1 = 0.5
b2 = 0.999

# Learning rate for optimizers
lr = 0.0002

In [5]:
def read_masks(filepath):
    '''
    Read masks from directory and tranform to categorical
    '''
    file_list = [file for file in os.listdir(filepath) if file.endswith('.png')]
    file_list.sort()
    n_masks = len(file_list)
    masks = np.empty((n_masks, 512, 512))

    for i, file in enumerate(file_list):
        mask = image.imread(os.path.join(filepath, file))
        mask = (mask >= 128).astype(int)
        mask = 4 * mask[:, :, 0] + 2 * mask[:, :, 1] + mask[:, :, 2]
        masks[i, mask == 3] = 0  # (Cyan: 011) Urban land 
        masks[i, mask == 6] = 1  # (Yellow: 110) Agriculture land 
        masks[i, mask == 5] = 2  # (Purple: 101) Rangeland 
        masks[i, mask == 2] = 3  # (Green: 010) Forest land 
        masks[i, mask == 1] = 4  # (Blue: 001) Water 
        masks[i, mask == 7] = 5  # (White: 111) Barren land 
        masks[i, mask == 0] = 6  # (Black: 000) Unknown 

    return masks

def mean_iou_score(pred, labels):
    '''
    Compute mean IoU score over 6 classes
    '''
    mean_iou = 0
    for i in range(6):
        tp_fp = np.sum(pred == i)
        tp_fn = np.sum(labels == i)
        tp = np.sum((pred == i) * (labels == i))
        iou = tp / (tp_fp + tp_fn - tp)
        mean_iou += iou / 6
        print('class #%d : %1.5f'%(i, iou))
    print('\nmean_iou: %f\n' % mean_iou)

    return mean_iou

In [6]:
class SegData(Dataset):
    def __init__(self, dir, transform = None):
        pattern = "[0-9]*_mask"
        imgs = []
        img_list = os.listdir(dir)
        img_list.sort()
        n_masks = len(img_list) // 2
        masks = np.empty((n_masks, 512, 512))
        # load the data from img_lst
        for index, i in enumerate(img_list):
            loc = os.path.join(dir, i)
            img = image.imread(loc)
            if re.match(pattern, i):
                mask = img
                mask = (mask >= 128).astype(int)
                mask = 4 * mask[:, :, 0] + 2 * mask[:, :, 1] + mask[:, :, 2]
                masks[index // 2, mask == 3] = 0  # (Cyan: 011) Urban land 
                masks[index // 2, mask == 6] = 1  # (Yellow: 110) Agriculture land 
                masks[index // 2, mask == 5] = 2  # (Purple: 101) Rangeland 
                masks[index // 2, mask == 2] = 3  # (Green: 010) Forest land 
                masks[index // 2, mask == 1] = 4  # (Blue: 001) Water 
                masks[index // 2, mask == 7] = 5  # (White: 111) Barren land 
                masks[index // 2, mask == 0] = 6  # (Black: 000) Unknown 
            else:
                imgs.append(img)
        self.imgs = np.array(imgs, dtype = np.float32)[:,::2,::2, :]
        self.lbls = masks[:,::2,::2]
        self.transform = transform
    def __getitem__(self, index):
        img = self.imgs[index]
        lbl = self.lbls[index]
        if self.transform is not None:
            img = self.transform(img)
        return img, lbl
    def __len__(self):
        return len(self.imgs)
    

In [8]:
transform = transforms.Compose( # composing several transforms together
    [transforms.ToTensor(), # to tensor object
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
datadir = "./hw1_data/p2_data/train"
test_dataset = SegData(datadir, transform)
valdir = "./hw1_data/p2_data/validation"
val_dataset = SegData(valdir, transform)

In [10]:
batch_size = 50
num_classes = 7
momentum = 0
w_decay = 1e-5

In [11]:
device = 'cuda:1' if torch.cuda.is_available() else "cpu"
train_dl = DataLoader(test_dataset, batch_size, shuffle = True)
val_dl = DataLoader(val_dataset, batch_size, shuffle = False)

In [None]:
test, lab = next(iter(val_dl))
# print(lab[0].shape)
print(lab[0].shape)

In [None]:
### Neural Network

In [12]:
vgg16 = models.vgg16(pretrained= True)
for param in vgg16.parameters():
    param.requires_grad = True

In [13]:
class FCN32(nn.Module):
    def __init__(self):
        super(FCN32, self).__init__()
        self.network = nn.Sequential(*(list(vgg16.children())[:-1]))
        self.convs =nn.Sequential(nn.Conv2d(512,4096,3),
                                nn.ReLU(inplace=True),
                                nn.Dropout(),
                                nn.Conv2d(4096,4096,1),
                                nn.ReLU(inplace=True),
                                nn.Dropout(),
                                nn.Conv2d(4096,64,1),
                                nn.ReLU(inplace=True),
                                nn.Dropout()
                                )
        self.upscore=nn.ConvTranspose2d(64,num_classes,80,108)

    def forward(self,x):
        x_size=x.size()
        x = self.network(x)
        pool=self.convs(x)
        upscore=self.upscore(pool)
        return upscore

In [14]:
model = FCN32().to(device)

lr = 2e-4 # 1e-3 behaves better
epochs = 40

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(b1, b2))

In [18]:
def train_model(dataloader, model, loss_fn, optimizer):
    model.train()
    for batch, (data, label) in enumerate(dataloader, 0):
        # Compute prediction and loss
        model.zero_grad()
        label = Variable(label.type(torch.FloatTensor))    
        X = data.to(device)
        y = label.to(device)
        pred = model(X)
        pred = F.log_softmax(pred, dim= 1)
        print(pred.shape)
        pred = torch.argmax(pred, dim = 1)
        print(pred.shape)
        loss = loss_fn(pred, y)
        # Backpropagation
        loss.backward()
        optimizer.step()
        if batch % 10 == 0:
            # print out result every 100 mini batches
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}]")
            
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_model(train_dl, model, loss_fn, optimizer)

Epoch 1
-------------------------------
torch.Size([50, 7, 512, 512])
torch.Size([50, 512, 512])


RuntimeError: "log_softmax" not implemented for 'Long'

In [None]:
torch.save(model.state_dict(), "hw1p2_checkpoint.pth")

In [None]:
def test(model, testloader):
    model.eval()     
    pred = torch.FloatTensor()
    pred = pred.cuda()
    for batch, data in enumerate(testloader, 0):
        images, labels = data
        images = images.to(device)
        images = images.view(1,3,256,256)
        output = model(images)
        pred = torch.cat((pred,output.data),0)
        
    pred = np.argmax(pred,1) 
    pred_512 = np.array([resize(p,output_shape=(512,512), order=0,preserve_range=True,clip=True) for p in pred])
    mean_iou = mean_iou_score(pred_512, valid_y)
    print("mean iou score", mean_iou)  

test(model, val_dl)
# save the pred map from the mask
#     if epoch+1 in [1,10,20]: # save pred map
#         # decoding stage
#         n_masks = len(valid_X)
#         masks_RGB = np.empty((n_masks, 512, 512, 3))
#         for i, p in enumerate(pred_512):
#             masks_RGB[i, p == 0] = [0,255,255]
#             masks_RGB[i, p == 1] = [255,255,0]
#             masks_RGB[i, p == 2] = [255,0,255]
#             masks_RGB[i, p == 3] = [0,255,0]
#             masks_RGB[i, p == 4] = [0,0,255]
#             masks_RGB[i, p == 5] = [255,255,255]
#             masks_RGB[i, p == 6] = [0,0,0]
#         masks_RGB = masks_RGB.astype(np.uint8)

In [None]:
strides = 108
kernel_size = 80
padding = 0
input_size = 5
output_size = strides * (input_size-1) + kernel_size - 2*padding
print(output_size)
input_size = 512
output_size = ((input_size + (padding * 2) - kernel_size) / strides) + 1
print(output_size)