In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [1]:
import os
import numpy as np
import pandas as pd
import cv2

from tqdm import tqdm
# from copy import deepcopy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch import optim
from torchvision import transforms
from torchvision.io import read_image, ImageReadMode
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler

In [2]:
n_fold = 5  # ?
pad_left = 0
pad_right = 0
fine_size = 202 # ?
batch_size = 64
epochs = 10
snapshot = 6 # ? what for
max_lr = 0.012
min_lr = 0.001
momentum = 0.9
weight_decay = 1e-4

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

save_weight = '../rst/'

# if not os.path.isdir(save_weight):
#     os.makedirs(save_weight)
weight_name = 'model_' + str(fine_size + pad_left + pad_right)

train_image_path = './data/SaltDataset/train/images'
train_mask_path = './data/SaltDataset/train/masks'
test_image_path = './data/SaltDataset/test/images'
train_file = './data/SaltDataset/train.csv'
depths_file = './data/SaltDataset/depths.csv'

Image --[GAN get latent]--> use the depth --[get feature map latent]-->

In [3]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.encoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels=1, out_channels=16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(2, 2),

            nn.ConvTranspose2d(in_channels=16, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            # nn.MaxPool2d(2, 2),

            nn.ConvTranspose2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, padding=1),
        )

    def forward(self, img):
        out = self.encoder(img)
        return out


class Discriminator(nn.Module):
    def __init__(self, img_shape):
        super(Discriminator, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(32),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(64),
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.BatchNorm2d(128),
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
        )
        self.img_shape = (((img_shape // 2) // 2) // 2) // 2
        self.linear = nn.Linear(self.img_shape ^2 * 256, 128)
        self.fc = nn.Linear(128, 1)

    def forward(self, img):
        rst = self.conv(img)
        rst = rst.view(img.shape[0], -1)
        rst = self.linear(rst)
        rst = self.fc(rst)
        return rst

In [4]:
# depths = pd.read_csv('./data/SaltDataset/depths.csv')
# depths.sort_values('z', inplace=True)
# depths.drop('z', axis=1, inplace=True)
# depths['fold'] = (list(range(0,5)) * depths.shape[0])[:depths.shape[0]]
#
train_df = pd.read_csv('./data/SaltDataset/train.csv')
len(train_df)
# train_df = train_df.merge(depths)
# dist = []
# for id in train_df.id.values:
#   img = cv2.imread(f'./data/SaltDataset/train/images/{id}.png', cv2.IMREAD_GRAYSCALE)
#   dist.append(np.unique(img).shape[0])
# train_df['unique_pixels'] = dist


4000

In [5]:
from sklearn.model_selection import train_test_split

class SaltDataset(Dataset):
    def __init__(self, img_path, mask_path, train_file, depth_file, img_size=224, mode='train', transform=None):
        self.img_path = img_path
        self.mask_path = mask_path
        self.train_df = pd.read_csv(train_file).drop('rle_mask', axis=1)
        self.depth_df = pd.read_csv(depth_file)
        self.train_df = self.train_df.merge(self.depth_df)
        self.img_size = img_size
        self.transform = transform
        # self.mode = mode
        # if mode == 'train':
        #     self.file_idx = list(range(len(self.train_df)))
        #     self.train_idx, self.val_idx = train_test_split(file_idx, test_size=0.2, shuffle=True)


    def __len__(self):
        return len(self.train_df)


    def __getitem__(self, idx):
        # if self.train_idx:
        #     pass
        # elif self.mode == 'val':
        #     pass
        img_path = os.path.join(self.img_path, self.train_df.values[idx, 0] + '.png')
        mask_path = os.path.join(self.mask_path, self.train_df.values[idx, 0] + '.png')
        depth = self.train_df.values[idx, 1]

        # image = read_image(img_path) # , mode=ImageReadMode.GRAY
        # mask = read_image(mask_path)
        image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255

        image = np.resize(image, (self.img_size, self.img_size))
        mask = np.resize(mask, (self.img_size, self.img_size))


        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
            # depth = self.transform(depth)

        return image, mask, depth


# def trainImageFetch(image_len):
#     image_train = np.zeros((image_len, 101, 101), dtype=np.float32) / 255
#     mask_train = np.zeros((image_len, 101, 101), dtype=np.float32) / 255

# depths = pd.read_csv('./data/SaltDataset/depths.csv')
# train_df = pd.read_csv('./data/SaltDataset/train.csv')
# train_df = train_df.merge(depths)       # merge 就像数据库一样合并表，主键自动匹配

# for id in train_df.id.values:
#     img = cv2.imread(f'./data/SaltDataset/train/images/{id}.png', cv2.IMREAD_GRAYSCALE)
#     dist.append(np.unique(img).shape[0])
#     # print(img.shape)
#
#
# train_df['unique_pixels'] = dist

In [6]:
dataset = SaltDataset(train_image_path, train_mask_path, train_file, depths_file, transform=transforms.ToTensor())

file_idx = list(range(train_df.shape[0]))
train_idx, val_idx = train_test_split(file_idx, test_size=0.2, shuffle=True)

train_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(val_idx)

train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)
# print(training_data.__dict__)

In [7]:
# model
g_model = Generator().to(device)
d_model = Discriminator(img_shape=101).to(device)

# optimzer
G_optim = optim.SGD(g_model.parameters(), lr=min_lr, momentum=0.05)
D_optim = optim.SGD(d_model.parameters(), lr=min_lr, momentum=0.05)

# loss
g_loss = nn.MSELoss()
d_loss = nn.MSELoss()

### U-net

In [8]:
from U_net import Unet

model = Unet(1, pad=True).to(device)
U_optim = optim.SGD(model.parameters(), lr=min_lr, momentum=0.05)
u_loss = nn.MSELoss()

In [9]:
# print(train_loader.dataset.__getitem__(0))

def train(dataloader, model, epochs):
    pass

# train discriminator
for epoch in range(epochs):
    for idx, (imgs, masks, depths) in enumerate(train_loader, start=0):
        # print(imgs), print(masks), print(depths)
        # imgs = imgs.to(device)
        # # masks = masks.to(device)
        # pred_feature = g_model(depths)
        # pred_depth = d_model(pred_feature)
        # loss_a = d_loss(pred_depth, depths)
        # loss_b = d_loss(imgs, depths)
        # loss = (loss_a + loss_b) / 2
        #
        # # optimizer.zero
        # G_optim.zero_grad()
        # D_optim.zero_grad()
        #
        # #
        # loss.backward()
        # D_optim.step()
        # G_optim.step()
        #
        # print(f"Pred_depth: {pred_depth}\tloss_a: {loss_a}\tloss_b: {loss_b}\tTotal Loss: {loss}")
        preds = model(imgs)
        loss = u_loss(preds, masks)

        U_optim.zero_grad()
        loss.backward()
        U_optim.step()
        print(f"loss: {loss}")

  return F.mse_loss(input, target, reduction=self.reduction)


loss: 0.6277228593826294
loss: 0.5458352565765381
loss: 0.6515582203865051


KeyboardInterrupt: 

In [None]:
img_list = []
mask_list = []
# for _, id in tqdm(enumerate(train_df.id.values), total=len(train_df)):
#     # print(id)
#     img = cv2.imread(f'./data/SaltDataset/train/images/{id}.png', cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255
#     mask = cv2.imread(f'./data/SaltDataset/train/masks/{id}.png', cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255
#     # print(img)
#     img_list.append(img)
#     mask_list.append(mask)
#
# print(img_list)

In [None]:
print(mask_list)
# print(train_df)

In [None]:
print(train_df.shape[0])

In [None]:

from torch.utils.data import SubsetRandomSampler
# train_data = np.concatenate((img_list, mask_list), axis=0)
# train_data = pd.DataFrame(data=img_list)
# print(train_data)

In [None]:
# from sklearn.model_selection import train_test_split
# print(train_df)
# print(train_df.id.values)
# def datasampler(len, percentage):
file_idx = list(range(train_df.shape[0]))
train_idx, val_idx = train_test_split(file_idx, test_size=0.2, shuffle=True)
# print(val_idx.shape)
train_dl = DataLoader(train_df, batch_size=batch_size, sampler=train_idx)
val_dl = DataLoader(train_df, batch_size=batch_size, sampler=val_idx)
# train_mask_dl = DataLoader(train_df, batch_size=batch_size, sampler=train_idx)
# print(train_df)

In [None]:
train_dl.__dict__

In [None]:
def do_kaggle_metric(predict,truth, threshold=0.5):

    N = len(predict)
    predict = predict.reshape(N,-1)
    truth   = truth.reshape(N,-1)

    predict = predict>threshold
    truth   = truth>0.5
    intersection = truth & predict
    union        = truth | predict
    iou = intersection.sum(1)/(union.sum(1)+1e-8)

    #-------------------------------------------
    result = []
    precision = []
    is_empty_truth   = (truth.sum(1)==0)
    is_empty_predict = (predict.sum(1)==0)

    threshold = np.array([0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95])
    for t in threshold:
        p = iou>=t

        tp  = (~is_empty_truth)  & (~is_empty_predict) & (iou> t)
        fp  = (~is_empty_truth)  & (~is_empty_predict) & (iou<=t)
        fn  = (~is_empty_truth)  & ( is_empty_predict)
        fp_empty = ( is_empty_truth)  & (~is_empty_predict)
        tn_empty = ( is_empty_truth)  & ( is_empty_predict)

        p = (tp + tn_empty) / (tp + tn_empty + fp + fp_empty + fn)

        result.append( np.column_stack((tp,fp,fn,tn_empty,fp_empty)) )
        precision.append(p)

    result = np.array(result).transpose(1,2,0)
    precision = np.column_stack(precision)
    precision = precision.mean(1)

    return precision, result, threshold