# Installation des librairies

In [1]:
!pip install --upgrade pip
!pip install -q -q -q tifffile
!pip install -q -q -q tqdm
!pip install opencv-python-headless
!pip3 install torch==1.9.0+cu102 torchvision==0.10.0+cu102 torchaudio===0.9.0 -f https://download.pytorch.org/whl/torch_stable.html



Collecting pip
  Downloading pip-21.2.2-py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 1.8 MB/s eta 0:00:01
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 20.2.4
    Uninstalling pip-20.2.4:
      Successfully uninstalled pip-20.2.4
Successfully installed pip-21.2.2
Collecting opencv-python-headless
  Downloading opencv_python_headless-4.5.3.56-cp36-cp36m-manylinux2014_x86_64.whl (37.1 MB)
[K     |████████████████████████████████| 37.1 MB 15.5 MB/s eta 0:00:01
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.5.3.56
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.9.0+cu102
  Downloading https://download.pytorch.org/whl/cu102/torch-1.9.0%2Bcu102-cp36-cp36m-linux_x86_64.whl (831.4 MB)
[K     |████████████████████████████████| 831.4 MB 14 kB/s  eta 0:00:011     |█████▌                          | 141.5 MB 18.1

In [65]:
!pip install --upgrade Pillow

Collecting Pillow
  Downloading Pillow-8.3.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 1.7 MB/s eta 0:00:01
[?25hInstalling collected packages: Pillow
  Attempting uninstall: Pillow
    Found existing installation: Pillow 8.0.1
    Uninstalling Pillow-8.0.1:
      Successfully uninstalled Pillow-8.0.1
Successfully installed Pillow-8.3.1


# Lecture des librairies et framework

In [2]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import s3fs
import shutil

from pathlib import Path
import random

import numpy as np
from tifffile import TiffFile
import matplotlib.pyplot as plt
import colorsys
import PIL
from PIL import Image 
from tqdm import tqdm
import matplotlib.patches as mpatches
import time

from framework.dataset import LandCoverData as LCD

device= torch.device("cuda:0" )#if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print("Using {} device".format(device))
print("nom du GPU :", torch.cuda.get_device_name(device=None))
print("GPU initialisé : ", torch.cuda.is_initialized())

Using cuda:0 device
nom du GPU : Tesla T4
GPU initialisé :  True


# Téléchargement + lecture des images de la BDD

In [3]:
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': 'https://minio.lab.sspcloud.fr'})
fs.get('projet-challengedata-ens/data/additional_files_earthcube_emu4zqr.zip', 'additional_files_earthcube_emu4zqr.zip')
shutil.unpack_archive('additional_files_earthcube_emu4zqr.zip')

In [4]:
start = time.perf_counter()

DATA_FOLDER_STR = 'dataset'
DATA_FOLDER = Path(DATA_FOLDER_STR).expanduser()
# path to the unzipped dataset: contains directories train/ and test/
DATASET_FOLDER = DATA_FOLDER

# get all train images and masks
train_images_paths = sorted(list(DATASET_FOLDER.glob('train/images/*.tif')))
train_masks_paths = sorted(list(DATASET_FOLDER.glob('train/masks/*.tif')))
# get all test images
test_images_paths = sorted(list(DATASET_FOLDER.glob('test/images/*.tif')))


# Trainning and Test

In [5]:
class CustomDataset(Dataset):
    def __init__(self, image_paths, mask_paths, train=True):   # initial logic happens like transform

        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transforms = transforms.ToTensor()
        self.maskcol = np.empty((1,10))
    def __getitem__(self, idx):
        #tmask = np.empty((0,65536))
        if torch.is_tensor(idx):
            idx = idx.tolist()
        with TiffFile(self.image_paths[idx]) as tif :
            img = tif.asarray()
        if img.dtype == np.uint16:
            scale = 255/2200
            byte_im = (img)*scale
            byte_im = (byte_im.clip(0, 255) + 0.5).astype(np.uint8)
        image = byte_im
        maskcol = self.maskcol
        with TiffFile(self.mask_paths[idx]) as tif :
            mask = tif.asarray()
            maskcol[0][0] = np.count_nonzero(mask == 0)
            maskcol[0][1] = np.count_nonzero(mask == 1)
            maskcol[0][2] = np.count_nonzero(mask ==2)
            maskcol[0][3] = np.count_nonzero(mask == 3)
            maskcol[0][4] = np.count_nonzero(mask == 4)
            maskcol[0][5] = np.count_nonzero(mask == 5)
            maskcol[0][6] = np.count_nonzero(mask == 6)
            maskcol[0][7] = np.count_nonzero(mask == 7)
            maskcol[0][8] = np.count_nonzero(mask == 8)
            maskcol[0][9] = np.count_nonzero(mask == 9)    
            maskcol = maskcol/65536
        #tmask = np.concatenate((tmask, [mask]), axis = 0)
        t_image = self.transforms(image)
        t_mask = self.transforms(maskcol)
        #t_mask = torch.tensor(mask)#.type(torch.LongTensor)
        #print(idx)
        return t_image, t_mask

    def __len__(self):  # return count of sample we have
        return len(self.image_paths)

In [16]:
dataloader  = trainloader
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    print("Shape of X [N, C, H, W]: ", X.shape, X.dtype)
    print("Shape of y: ", y.shape, y.dtype)
    b = y.view(y.shape[0],y.shape[3]).float()*100
    print("shape of b:", b, b.shape, b.dtype)
    #loss = loss_fn(pred, y)
    break

Shape of X [N, C, H, W]:  torch.Size([20, 4, 256, 256]) torch.float32
Shape of y:  torch.Size([20, 1, 1, 10]) torch.float64
shape of b: tensor([[0.0000e+00, 0.0000e+00, 1.3855e+00, 4.9831e+01, 1.4943e+01, 5.3879e+00,
         2.5900e+01, 0.0000e+00, 0.0000e+00, 2.5528e+00],
        [0.0000e+00, 0.0000e+00, 4.9088e+00, 5.1393e+01, 1.0066e+01, 2.0065e+00,
         3.1625e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 8.3893e+00, 5.1115e+01, 5.8334e+00, 1.0788e+00,
         3.3574e+01, 0.0000e+00, 0.0000e+00, 9.1553e-03],
        [0.0000e+00, 0.0000e+00, 1.2268e+00, 6.4474e+01, 1.4841e+01, 2.8000e+00,
         1.6658e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 8.6823e-01, 5.4375e+01, 1.5834e+01, 2.1210e-01,
         2.8610e+01, 0.0000e+00, 0.0000e+00, 1.0071e-01],
        [0.0000e+00, 0.0000e+00, 1.1623e+01, 3.3682e+01, 1.9406e+01, 1.8738e+00,
         3.3415e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+0

In [36]:
for X, y in trainloader:
    #print(y)
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    #print(y)
    break
#print(y)

[[3 3 4 ... 6 6 6]]
[[6 6 6 ... 6 6 6]]
[[6 6 6 ... 6 6 6]]
[[3 3 3 ... 3 3 3]]
[[3 3 3 ... 6 6 6]]
[[4 6 6 ... 6 6 6]]
[[3 5 5 ... 6 6 6]]
[[6 6 6 ... 4 4 4]]
[[3 3 3 ... 3 3 3]]
[[3 3 3 ... 3 3 3]]
[[5 5 6 ... 3 3 3]]
[[6 4 4 ... 3 3 6]]
[[3 6 6 ... 6 6 6]]
[[5 5 5 ... 3 3 5]]
[[6 6 6 ... 5 5 5]]
[[6 6 6 ... 4 5 5]]
[[4 4 4 ... 3 3 3]]
[[4 5 5 ... 5 5 5]]
[[4 4 4 ... 4 4 4]]
[[3 3 3 ... 2 2 2]]
[[4 4 4 ... 6 6 6]]
[[3 3 3 ... 6 6 6]]
[[3 3 3 ... 6 6 6]]
[[4 4 4 ... 2 2 3]]
[[6 6 6 ... 4 4 4]]
[[7 7 7 ... 9 9 9]]
[[3 3 3 ... 6 6 6]]
[[3 3 3 ... 3 3 3]]
[[6 6 6 ... 6 6 6]]
Shape of X [N, C, H, W]:  torch.Size([20, 4, 256, 256])
Shape of y:  torch.Size([20, 1, 65536]) torch.int64
[[4 4 4 ... 4 4 4]]
[[3 3 3 ... 4 4 4]]
[[3 3 3 ... 6 6 6]]
[[4 4 6 ... 4 4 4]]
[[3 3 3 ... 3 3 3]]
[[6 4 5 ... 6 6 6]]
[[3 3 3 ... 3 3 3]]
[[3 3 3 ... 4 4 6]]
[[4 4 4 ... 6 6 6]]
[[2 2 6 ... 6 6 6]]
[[3 3 3 ... 3 3 3]]


In [72]:
def train(dataloader, model, loss_fn, optimizer, batch_size):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        y = y.view(y.shape[0],y.shape[3]).float().to(device)
        #print("shape of y:", y.shape, y.dtype)

        # Compute prediction error
        pred = model(X)
        #print(pred)
        #print("shape of pred:", pred.shape, pred.dtype)
        loss = loss_fn(pred, y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
            
def test(dataloader, model, loss_fn, batch_size):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss = 0 
    correct = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y = y.view(y.shape[0],y.shape[3]).float().to(device)        
            pred = model(X)
            #a = loss_fn(pred, y)
            #test_loss += a
            test_loss += loss_fn(pred,y).item()
            correct += ((pred - y)**2).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return pred, y

In [None]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.conv1 = nn.Conv2d(in_channels= 4, out_channels= 6, kernel_size=3,stride=1)
        self.conv2 = nn.Conv2d(in_channels= 6, out_channels= 2, kernel_size=6,stride=3)
        self.maxpool = nn.MaxPool2d(3, stride=3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(1458, 700)  # 5*5 from image dimension
        self.fc2 = nn.Linear(700, 100)
        self.fc3 = nn.Linear(100, 10)

    def forward(self, x):
        x = (self.conv1(x))
        x = (self.maxpool(x))
        # If the size is a square, you can specify with a single number
        x = (F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except the batch dimension
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return x
model = NeuralNetwork().to(device)
print(model)




In [10]:
params = list(model.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

2
torch.Size([4096, 65536, 3, 3])


In [26]:
# get some random training images
dataiter = iter(trainloader)
images,masks, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(image))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))

RuntimeError: DataLoader worker (pid(s) 5124) exited unexpectedly

In [8]:

# split these path using a certain percentage
len_data = len(train_images_paths)
print(len_data)
train_size = 0.7006651884700665
print(len_data*train_size)
train_image_paths = train_images_paths[:int(len_data*train_size)]
test_image_paths = train_images_paths[int(len_data*train_size):]

train_mask_paths = train_masks_paths[:int(len_data*train_size)]
test_mask_paths = train_masks_paths[int(len_data*train_size):]

batch = 20
#création du train
train_dataset = CustomDataset(train_image_paths, train_mask_paths, train=True)
trainloader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
#création du test du train
test_dataset = CustomDataset(test_image_paths, test_mask_paths, train=False)
testloader = DataLoader(test_dataset, batch_size=batch, shuffle=False)
end = time.perf_counter()

print("temps en seconde : ", end - start)

18491
12956.0
temps en seconde :  3.492848784662783


In [48]:
for batch, (X, y) in enumerate(dataloader):
        #z = y.reshape(batch_size,65536).to(device)
        X, y = X.to(device), y.to(device)
        pred = model(X)
        print(torch.sum(pred, dim=1))
        y = torch.sum(y.view(y.shape[0],y.shape[3]).float(), dim=1)
        print(y)
        break

tensor([0.6360, 0.7237, 0.6196, 0.6543, 0.6652, 0.6639, 0.6048, 0.6124, 0.6456,
        0.6394, 0.6630, 0.5972, 0.6507, 0.6363, 0.6395, 0.7070, 0.6353, 0.6477,
        0.6423, 0.6091], device='cuda:0', grad_fn=<SumBackward1>)
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.], device='cuda:0')


In [75]:
start = time.perf_counter()
epochs = 1
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_fn, optimizer, batch)
    pred, y = test(testloader, model, loss_fn, batch)
    #print(a.shape)
print("Done!")

end = time.perf_counter()
print("temps en seconde : ", end - start)

Epoch 1
-------------------------------
loss: 3.682000  [    0/12956]
loss: 2.867550  [ 4000/12956]
loss: 2.782336  [ 6000/12956]
loss: 2.047265  [ 8000/12956]
loss: 2.914593  [10000/12956]
loss: 2.581081  [12000/12956]
Test Error: 
 Accuracy: 23.5%, Avg loss: 2.637546 

Done!
temps en seconde :  1026.4305839156732


In [71]:
print("pred : ", pred)
A = ((pred-y).to('cpu')).numpy()
a = torch.sum(pred, dim=1)
print(a)
#print("mask : ", (pred-y)*100)

pred :  tensor([[0.0000, 0.0000, 0.0572, 0.3900, 0.0000, 0.0812, 0.2765, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0592, 0.3627, 0.0000, 0.0815, 0.2658, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0548, 0.3724, 0.0000, 0.0783, 0.2589, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0595, 0.3527, 0.0000, 0.0752, 0.2561, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0529, 0.3374, 0.0000, 0.0816, 0.2519, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0546, 0.3452, 0.0000, 0.0784, 0.2547, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0466, 0.3514, 0.0000, 0.0789, 0.2504, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0566, 0.3577, 0.0000, 0.0828, 0.2586, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0564, 0.3723, 0.0000, 0.0869, 0.2634, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.0630, 0.3628, 0.0000, 0.0862, 0.2590, 0.0018, 0.0000,
         0.0000],
  

RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [73]:
start = time.perf_counter()
epochs = 5
loss_fn2 = nn.CrossEntropyLoss()
optimizer2 = torch.optim.SGD(model.parameters(), lr=1e-3)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(trainloader, model, loss_fn2, optimizer2, batch)
    pred2, y2 = test(testloader, model, loss_fn2, batch)
    #print(a.shape)
print("Done!")

end = time.perf_counter()
print("temps en seconde : ", end - start)

Epoch 1
-------------------------------


RuntimeError: Expected object of scalar type Long but got scalar type Float for argument #2 'target' in call to _thnn_nll_loss_forward

In [59]:
# Pour avoir un élément du dataloader (ie un batch)
X,y  = next(iter(trainloader)) 
convolution = nn.Conv2d(in_channels= 4, out_channels= 6, kernel_size=3,stride=1)
m = nn.MaxPool2d(3, stride=3)
convolution2 = nn.Conv2d(in_channels= 6, out_channels= 2, kernel_size=6,stride=3)

X_2 = convolution(X)
M = m(X_2)
X_3 = convolution2(M)
print(X_2.shape)
print(M.shape)
print(X_3.shape)
print(X_3.shape[1]*X_3.shape[2]*X_3.shape[3])
print(F.relu(X_3).shape)

torch.Size([20, 6, 254, 254])
torch.Size([20, 6, 84, 84])
torch.Size([20, 2, 27, 27])
1458
torch.Size([20, 2, 27, 27])


In [75]:
dataloader = trainloader
for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        #z = y.reshape(batch_size,65536).to(device)
        print(y[:,0])
        print(y[3].shape)
        print(X.shape)
        break

tensor([[[0.0078, 0.0078, 0.0078,  ..., 0.0353, 0.0353, 0.0353],
         [0.0275, 0.0275, 0.0275,  ..., 0.0353, 0.0353, 0.0353],
         [0.0078, 0.0275, 0.0235,  ..., 0.0353, 0.0353, 0.0353],
         ...,
         [0.0157, 0.0157, 0.0157,  ..., 0.0235, 0.0235, 0.0235],
         [0.0235, 0.0235, 0.0235,  ..., 0.0235, 0.0235, 0.0235],
         [0.0118, 0.0118, 0.0118,  ..., 0.0235, 0.0235, 0.0235]],

        [[0.0118, 0.0118, 0.0118,  ..., 0.0118, 0.0118, 0.0118],
         [0.0118, 0.0118, 0.0118,  ..., 0.0118, 0.0118, 0.0118],
         [0.0118, 0.0118, 0.0118,  ..., 0.0118, 0.0118, 0.0118],
         ...,
         [0.0235, 0.0235, 0.0235,  ..., 0.0235, 0.0235, 0.0235],
         [0.0235, 0.0235, 0.0235,  ..., 0.0235, 0.0235, 0.0235],
         [0.0235, 0.0235, 0.0235,  ..., 0.0235, 0.0235, 0.0235]],

        [[0.0196, 0.0196, 0.0196,  ..., 0.0078, 0.0078, 0.0118],
         [0.0196, 0.0196, 0.0196,  ..., 0.0078, 0.0078, 0.0118],
         [0.0196, 0.0196, 0.0196,  ..., 0.0078, 0.0078, 0.

In [23]:
for k, idx in enumerate(random.sample(range(len(train_images_paths)), 1)):
    image_path = train_images_paths[idx]
    mask_path = train_masks_paths[idx]
    assert image_path.name == mask_path.name
    
    with TiffFile(image_path) as tif:
        arr = tif.asarray()        
    with TiffFile(mask_path) as tif:
        mask = tif.asarray()
maskt = torch.tensor(mask).type(torch.LongTensor)

print(mask)
print()
print(maskt)

[[6 6 6 ... 6 6 6]
 [6 6 6 ... 6 6 6]
 [6 6 6 ... 6 6 6]
 ...
 [3 3 3 ... 6 6 6]
 [3 3 3 ... 6 6 6]
 [3 3 3 ... 6 6 6]]

tensor([[6, 6, 6,  ..., 6, 6, 6],
        [6, 6, 6,  ..., 6, 6, 6],
        [6, 6, 6,  ..., 6, 6, 6],
        ...,
        [3, 3, 3,  ..., 6, 6, 6],
        [3, 3, 3,  ..., 6, 6, 6],
        [3, 3, 3,  ..., 6, 6, 6]])


In [36]:
12956%41

0