In [1]:
import torch
import torch.nn as nn
from depth_anything_v2.dpt import DepthAnythingV2
from torch.utils.data import DataLoader, Dataset
import os
from sklearn.model_selection import train_test_split
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.optim import AdamW
import torch.nn.functional as F
from torchvision import transforms

xFormers not available
xFormers not available


In [2]:
# Charger le modèle Depth Anything

model = DepthAnythingV2()

In [3]:
# Intégration de LoRA

class LoRA(nn.Module):
    def __init__(self, layer, rank=4):
        super(LoRA, self).__init__()
        self.rank = rank
        self.layer = layer

        # Matrices de faible rang (U et V)
        self.U = nn.Parameter(torch.randn(layer.in_features, rank) * 0.01)
        self.V = nn.Parameter(torch.randn(rank, layer.out_features) * 0.01)

    def forward(self, x):
        # Adaptation avec les matrices de faible rang
        return self.layer(x) + torch.matmul(torch.matmul(x, self.U), self.V)

# Appliquer LoRA aux couches linéaires du modèle Depth Anything
layers_to_modify = []

for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        layers_to_modify.append((name, module))

for name, module in layers_to_modify:
    lora_layer = LoRA(module)
    setattr(model, name, lora_layer)

In [4]:
# Gel des paramètres non LoRA

for name, param in model.named_parameters():
    if 'U' not in name and 'V' not in name:  # Geler les autres paramètres
        param.requires_grad = False

In [5]:
# Création du dataset

# Dataset personnalisé
class DepthDataset(Dataset):
    def __init__(self, image_dir, image_files, point_cloud_dir, point_cloud_files, transform):
        self.image_dir = image_dir
        self.image_files = image_files
        self.point_cloud_dir = point_cloud_dir
        self.point_cloud_files = point_cloud_files
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_files[idx])
        xyz_path = os.path.join(self.point_cloud_dir, self.point_cloud_files[idx])
        
        image = Image.open(image_path).convert("RGB")
        image = transform(image)
        image = np.array(image)
        xyz = np.load(xyz_path)
        
        return image, torch.tensor(xyz, dtype=torch.float32)

# Fonction de prétraitement pour redimensionner les images
def resize_to_multiple_of_patch(image, patch_size=14):
    width, height = image.size
    new_width = (width // patch_size) * patch_size
    new_height = (height // patch_size) * patch_size
    return image.resize((new_width, new_height), Image.BICUBIC)

# Exemple d'utilisation
transform = transforms.Compose([
    transforms.Lambda(lambda x: resize_to_multiple_of_patch(x, patch_size=14)),
    transforms.ToTensor(),
])

In [6]:
# Chargement des données

# Paramètres
IMAGES_DIR = "dataset/images"
DEPTH_DIR = "dataset/depth"
TRAIN_RATIO = 0.8

# Liste des fichiers
images_files = sorted(os.listdir(IMAGES_DIR))
depth_files = sorted(os.listdir(DEPTH_DIR))

# Vérification de correspondance
assert len(images_files) == len(depth_files), "Les dossiers images et depth doivent contenir le même nombre de fichiers."
assert all(img.split('.')[0][:-6] == depth.split('.')[0][:-9] for img, depth in zip(images_files, depth_files)), \
    "Les noms des fichiers images et depth doivent correspondre."

# Division des données
train_indices, val_indices = train_test_split(range(len(images_files)), train_size=TRAIN_RATIO, random_state=42)

# Création des listes d'entraînement et de validation
train_images = [images_files[idx] for idx in train_indices]
val_images = [images_files[idx] for idx in val_indices]
train_depth = [depth_files[idx] for idx in train_indices]
val_depth = [depth_files[idx] for idx in val_indices]

# Créations des datasets
train_dataset = DepthDataset(IMAGES_DIR, train_images, DEPTH_DIR, train_depth, transform)
val_dataset = DepthDataset(IMAGES_DIR, val_images, DEPTH_DIR, val_depth, transform)

# Division en mini-lots
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [None]:
# Optimiser uniquement les paramètres des couches LoRA
params_to_update = []
for name, param in model.named_parameters():
    if 'U' in name or 'V' in name:  # Ne mettre à jour que les matrices U et V de LoRA
        if param.requires_grad:
            params_to_update.append(param)

optimizer = AdamW(params_to_update, lr=1e-4)

# Fonction de perte (par exemple, l'erreur de profondeur)
def depth_loss(pred, target):
    return F.mse_loss(pred, target)

num_epochs = 1
# Entraînement
model.train()
for epoch in range(num_epochs):
    for batch in train_loader:
        images, depth_maps = batch  # images et leurs cartes de profondeur cibles

        optimizer.zero_grad()
        outputs = model(images)  # Prédire la carte de profondeur
        loss = depth_loss(outputs, depth_maps)  # Calculer la perte
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")

In [None]:
# Evaluation des performances

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

model.eval()
predictions = []
targets = []

# Exemple d'évaluation avec un DataLoader de test
for batch in test_loader:
    images, depth_maps = batch
    with torch.no_grad():
        outputs = model(images)

    predictions.append(outputs)
    targets.append(depth_maps)

# Calculer les métriques
predictions = torch.cat(predictions, dim=0).cpu().numpy()
targets = torch.cat(targets, dim=0).cpu().numpy()

# Conversion en valeurs discrètes pour le calcul des métriques (si nécessaire)
predictions = (predictions > 0.5).astype(int)
targets = (targets > 0.5).astype(int)

accuracy = accuracy_score(targets, predictions)
precision = precision_score(targets, predictions)
recall = recall_score(targets, predictions)
f1 = f1_score(targets, predictions)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")

In [None]:
# Sauvegarde des poids du modèle

torch.save(model.state_dict(), "depth_anything_lora_finetuned.pth")