In [1]:
from __future__ import print_function, division

import torchvision

import skimage
from PIL import Image

import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from pprint import pprint
import json
from tqdm import tqdm
import subprocess
plt.ion()   # interactive mode

In [8]:
def pil_loader(path):
    """
    Load an image into PIL format and convert it into RGB    
    :param path: String, Complete path of the image file
    :return: PIL image
    """
    image = Image.open(path)
    return image.convert("RGB")
    
def show_tensor_image(tensor):
    """
    Take a tensor and show the corresponding image
    :param tensor: Pytorch Tensor, [channels, height, width]
    :return:
    """
    tensor = tensor.transpose(0, 1)
    tensor = tensor.transpose(1, 2)
    io.imshow(tensor.cpu().numpy())

def create_valid_train_set(csv_info_name, data_clean_dir, dataset_name, test_size):
    csv_info_name = os.path.join(data_clean_dir, dataset_name, csv_info_name)
    csv_info = pd.read_csv(csv_info_name)
    return train_test_split(csv_info, test_size=test_size,
                            random_state=42, stratify=csv_info["tag"])
    

In [9]:
class SatelliteImageDataset(Dataset):
    """Load a satellite dataset"""

    def __init__(self, X, transform=transforms.ToTensor(), device=torch.device("cpu")):
        """
        Create a satellite image dataset
        :param transform: torchvion transform function, Optional transform to be applied
                on an image.
        :device: Pytorch device: cpu or gpu to move the data into the good device
        """
        self.X = X
        self.L_image_path = list(self.X["image_clean_path"])
        lab_enc = LabelEncoder()
        self.X["tag"] = lab_enc.fit_transform(self.X["tag"])
        self.L_tag = list(self.X["tag"])
        self.classes_ = lab_enc.classes_
        self.transform = transform
        self.device = device

    def __len__(self):
        return len(self.L_image_path)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        image = pil_loader(self.L_image_path[idx])
        image = self.transform(image)
        return image.to(self.device), torch.tensor(self.L_tag[idx], dtype=torch.int)

In [10]:
data_dir = "/classification_speed_boat/data/"
data_clean_dir = "/classification_speed_boat/data_clean/"
dataset_name = "train"
csv_info_name = "info_boat.csv"
csv_clean_name = "info_boat.csv"
grid_box = [100, 150, 200]

model_name = "baseline"
prediction_dir = "/classification_speed_boat/prediction/"
csv_preds_name = "preds.csv"

# Parameters
test_size = 0.2

size = 128
transform = transforms.Compose([transforms.Resize((size, size)),
                                transforms.ToTensor()])
batch_size = 64
num_worker = 0
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

X_train, X_valid = create_valid_train_set(csv_info_name, data_clean_dir, dataset_name, test_size)

# Create dataset
train_dataset = SatelliteImageDataset(X_train, transform, device)
valid_dataset = SatelliteImageDataset(X_valid, transform, device)

# create loader
train_loader = DataLoader(train_dataset, batch_size=batch_size,
                          num_workers=num_worker, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size,
                          num_workers=num_worker, shuffle=True)

In [11]:
b, c = next(iter(train_loader))

In [13]:
c

tensor([1, 1, 1, 2, 1, 2, 0, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
        1, 1, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 2, 1, 1, 0, 2, 1, 2, 1,
        2, 2, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=torch.int32)