<a href="https://colab.research.google.com/github/AlexChrst/Amazon-data-analysis/blob/master/Easymodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install rasterio albumentations segmentation-models-pytorch --quiet

  Preparing metadata (setup.py) ... [?25l[?25hdone
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/58.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m86.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.3/121.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m62.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os
import numpy as np
import pandas as pd
import rasterio
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import albumentations as A
from albumentations.pytorch import ToTensorV2
import segmentation_models_pytorch as smp
from tqdm.notebook import tqdm

**Configuration**

In [4]:
TRAIN_IMG_DIR = '/content/drive/MyDrive/LASCAR/data/train/images'
TRAIN_MASK_DIR = '/content/drive/MyDrive/LASCAR/data/train/masks'
PROPORTION_CSV = '/content/drive/MyDrive/LASCAR/data/train_labels_GY1QjFw.csv'

In [6]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)
BATCH_SIZE = 8
IMG_SIZE = 256
NUM_CLASSES = 7 # On enlève snow nuage et no_data pour l'instant

cuda


In [7]:
def load_tif(path):
    with rasterio.open(path) as src:
        img = src.read()
        img = np.transpose(img, (1, 2, 0))
    return img.astype(np.float32)

In [8]:
# Charger CSV
df = pd.read_csv(PROPORTION_CSV)
image_paths = [os.path.join(TRAIN_IMG_DIR, f"{str(f)}.tif") for f in df['sample_id']]
selected_classes = ['cultivated', 'herbaceous', 'broadleaf', 'coniferous', 'artificial', 'water', 'natural']  # à adapter à tes vrais noms de colonnes
NUM_CLASSES = len(selected_classes)

targets = df[selected_classes].values


In [9]:
# ================== TRANSFORMS ==================
train_transform = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    A.Normalize(),
    ToTensorV2(),
])

**Méthode 1: Régression c'est à dire on prédit les proportions directement**

In [10]:
# ================== DATASET 1 - Regression ==================
class ProportionDataset(Dataset):
    def __init__(self, image_paths, targets, transform=None):
        self.image_paths = image_paths
        self.targets = targets
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = load_tif(self.image_paths[idx])
        if self.transform:
            img = self.transform(image=img)['image']
        target = torch.tensor(self.targets[idx], dtype=torch.float32)
        return img, target

# ================== MODEL 1 - Proportion Regression ==================
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(4, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d(1)
        )
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        return torch.softmax(self.fc(x), dim=1)

# ================== TRAINING LOOP ==================
def train_model(model, dataloader, criterion, optimizer, epochs=5):
    model.to(DEVICE)
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        loop = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        for x, y in loop:
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            preds = model(x)
            loss = criterion(preds, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            loop.set_postfix(loss=loss.item())
        print(f"Epoch {epoch+1}: Mean Loss = {total_loss / len(dataloader):.4f}")

In [None]:
# Split et Dataset
train_imgs, val_imgs, train_y, val_y = train_test_split(image_paths, targets, test_size=0.2)
train_imgs = train_imgs[:500]
train_y = train_y[:500]
val_imgs = val_imgs[:50]
val_y = val_y[:50]
train_ds = ProportionDataset(train_imgs, train_y, transform=train_transform)
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

# Model
model1 = SimpleCNN(NUM_CLASSES)
optimizer = torch.optim.Adam(model1.parameters(), lr=1e-3)
criterion = nn.KLDivLoss(reduction='batchmean')

print("Training Model 1 (Regression)...")
train_model(model1, train_loader, criterion, optimizer, epochs=3)

Training Model 1 (Regression)...


Epoch 1/3:   0%|          | 0/63 [00:00<?, ?it/s]

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


In [None]:
# save model
torch.save(model1.state_dict(), 'model2.pth')

In [None]:
# ================== EVALUATION - KL Divergence ==================
def kl_divergence(y_true, y_pred, eps=1e-8):
    y_true = np.clip(y_true, eps, 1)
    y_pred = np.clip(y_pred, eps, 1)
    return np.sum(y_true * np.log(y_true / y_pred)) # Removed axis=1

# Validation Dataset & Loader
val_ds = ProportionDataset(val_imgs, val_y, transform=train_transform)
val_loader = DataLoader(val_ds, batch_size=1)

model1.eval()
kl_scores = []
with torch.no_grad():
    for img, true_prop in val_loader:
        img = img.to(DEVICE)
        pred = model1(img).cpu().numpy()[0]
        true_prop = true_prop.numpy()[0]
        kl = kl_divergence(true_prop, pred)
        kl_scores.append(kl)

avg_kl = np.mean(kl_scores)
print(f"\n🔍 KL Divergence on validation set: {avg_kl:.6f}")


**Modèle 2: Classification des pixels (on prédit les masks)**

> Ajouter une citation



In [None]:
# ================== DATASET 2 - Segmentation ==================
class SegmentationDataset(Dataset):
    def __init__(self, image_paths, mask_paths, transform=None):
        self.image_paths = image_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = load_tif(self.image_paths[idx])
        mask = load_tif(self.mask_paths[idx])[:, :, 0]  # 2D mask
        if self.transform:
            augmented = self.transform(image=img, mask=mask)
            img = augmented['image']
            mask = augmented['mask']
        return img, mask.long()

# ================== MODEL 2 - Segmentation ==================
class SimpleSegNet(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels, 16, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2),
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(32, 16, 2, stride=2), nn.ReLU(),
            nn.ConvTranspose2d(16, num_classes, 2, stride=2)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
mask_paths = [os.path.join(TRAIN_MASK_DIR, f"{str(f)}.tif") for f in df['sample_id']]
train_imgs2, val_imgs2, train_masks, val_masks = train_test_split(image_paths, mask_paths, test_size=0.2)
train_imgs2 = train_imgs2[:200]
train_masks = train_masks[:200]
val_imgs2 = val_imgs2[:20]
val_masks = val_masks[:20]
train_ds2 = SegmentationDataset(train_imgs2, train_masks, transform=train_transform)
val_ds2 = SegmentationDataset(val_imgs2, val_masks, transform=train_transform)
train_loader2 = DataLoader(train_ds2, batch_size=BATCH_SIZE, shuffle=True)
val_loader2 = DataLoader(val_ds2, batch_size=1)

model2 = SimpleSegNet(in_channels=4, num_classes=NUM_CLASSES)
model2.to(DEVICE)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=1e-3)
criterion2 = nn.CrossEntropyLoss()

print("Training Model 2 (Segmentation)...")
for epoch in range(3):
    model2.train()
    total_loss = 0
    loop = tqdm(train_loader2, desc=f"Epoch {epoch+1}/5")
    for img, mask in loop:
        img, mask = img.to(DEVICE), mask.to(DEVICE)
        optimizer2.zero_grad()
        out = model2(img)
        loss = criterion2(out, mask)
        loss.backward()
        optimizer2.step()
        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())
    print(f"Epoch {epoch+1}: Loss = {total_loss / len(train_loader2):.4f}")

Training Model 2 (Segmentation)...


Epoch 1/5:   0%|          | 0/25 [00:00<?, ?it/s]

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
# ================== POST-PROCESSING ==================
def mask_to_proportions(mask_pred, num_classes):
    flat = mask_pred.flatten()
    props = [(flat == i).sum() / len(flat) for i in range(num_classes)]
    return props

# ================== METRIC - KL Divergence on Val ==================
def kl_divergence(y_true, y_pred, eps=1e-8):
    y_true = np.clip(y_true, eps, 1)
    y_pred = np.clip(y_pred, eps, 1)
    return np.sum(y_true * np.log(y_true / y_pred))

model2.eval()
kl_scores = []
with torch.no_grad():
    for img, mask in tqdm(val_loader2, desc="Evaluating KL"):
        img = img.to(DEVICE)
        pred = model2(img)
        pred_mask = torch.argmax(pred.squeeze(0), dim=0).cpu().numpy()
        true_mask = mask.squeeze(0).numpy()

        pred_prop = mask_to_proportions(pred_mask, NUM_CLASSES)
        true_prop = mask_to_proportions(true_mask, NUM_CLASSES)

        kl = kl_divergence(np.array(true_prop), np.array(pred_prop))
        kl_scores.append(kl)

avg_kl_seg = np.mean(kl_scores)
print(f"\n🔍 KL Divergence on segmentation val set: {avg_kl_seg:.6f}")

In [None]:
# ================== MODEL 2 - Segmentation ==================
mask_paths = [os.path.join(TRAIN_MASK_DIR, f.replace('.tif', '_mask.tif')) for f in df['image_name']]
train_imgs2, val_imgs2, train_masks, val_masks = train_test_split(image_paths, mask_paths, test_size=0.2)

train_ds2 = SegmentationDataset(train_imgs2, train_masks, transform=train_transform)
train_loader2 = DataLoader(train_ds2, batch_size=BATCH_SIZE, shuffle=True)

model2 = smp.Unet(encoder_name="resnet18", in_channels=3, classes=NUM_CLASSES)
model2.to(DEVICE)
optimizer2 = torch.optim.Adam(model2.parameters(), lr=1e-3)
criterion2 = nn.CrossEntropyLoss()

print("Training Model 2 (Segmentation)...")
for epoch in range(5):
    model2.train()
    total_loss = 0
    for img, mask in train_loader2:
        img, mask = img.to(DEVICE), mask.to(DEVICE)
        optimizer2.zero_grad()
        out = model2(img)
        loss = criterion2(out, mask)
        loss.backward()
        optimizer2.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}: Loss = {total_loss / len(train_loader2):.4f}")


# ================== POST-PROCESSING ==================
# Pour convertir une prédiction de segmentation en proportions :
def mask_to_proportions(mask_pred, num_classes):
    flat = mask_pred.flatten()
    props = [(flat == i).sum() / len(flat) for i in range(num_classes)]
    return props
