# Importation des librairies

In [1]:
import transformers
import torch.nn as nn
import cv2
import torch
from depth_anything_v2.dpt import DepthAnythingV2
from transformers import TrainingArguments, Trainer
from accelerate import Accelerator
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from PIL import Image
import os
from transformers import DefaultDataCollator
from torch.utils.data import Dataset
import numpy as np

xFormers not available
xFormers not available


# Chargement du modèle DepthAnything

In [2]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

model_configs = {
    'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
    'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
    'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
    'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
}

encoder = 'vits' # or 'vits', 'vitb', 'vitg'

model = DepthAnythingV2(**model_configs[encoder])

In [3]:
model.load_state_dict(torch.load(f'depth_anything_v2_{encoder}.pth', map_location='cpu'))

  model.load_state_dict(torch.load(f'depth_anything_v2_{encoder}.pth', map_location='cpu'))


<All keys matched successfully>

# Création d'une classe pour utiliser LoRA dans le modèle

In [4]:
class LoRALayer(nn.Module):
    """Wraps a linear layer with LoRA-like adapter. Wraps an existing OPT linear layer"""
    def __init__(self, module: nn.Linear, rank: int):
        super().__init__()
        self.module = module  # pre-trained (frozen) linear layer
        self.adapter_A = nn.Parameter(torch.empty(module.in_features, rank, device=module.weight.device))
        nn.init.kaiming_uniform_(self.adapter_A, a=5 ** 0.5)
        self.adapter_B = nn.Parameter(torch.zeros(rank, module.out_features, device=module.weight.device))

    def forward(self, input):
        # Apply self.module and LoRA adapter, return the sum (self.module outputs + adapter outputs)
        adapter_output = input @ self.adapter_A @ self.adapter_B
        module_output = self.module(input)
        return module_output + adapter_output

In [5]:
lora_rank = 8

# Assurez-vous que le modèle possède un attribut 'model' (si c'est un modèle transformer, par exemple)
# Si le modèle a des blocs d'attention dans un autre sous-modèle, ajustez cette ligne pour correspondre à la structure de votre modèle

for name, module in model.named_modules():
    # Vérifier si le module est une couche d'attention, ici on cherche les projections QKV et proj
    if isinstance(module, nn.MultiheadAttention):  # Vérifie que le module est une couche d'attention multi-têtes
        print(f"Applying LoRA to {name}")

        # Appliquer LoRA sur les projections Q, K, V
        if hasattr(module, 'in_proj_weight'):
            # Le poids QKV pour la couche d'attention multi-têtes
            qkv_weight = module.in_proj_weight
            module.in_proj_weight = LoRALayer(module.in_proj_weight, rank=lora_rank).to(qkv_weight.device)

        # Appliquer LoRA sur la couche de projection de sortie
        if hasattr(module, 'out_proj'):
            out_proj_weight = module.out_proj.weight
            module.out_proj = LoRALayer(module.out_proj, rank=lora_rank).to(out_proj_weight.device)



# Classe pour parcourir le dataset

In [6]:
class DepthDataset(Dataset):
    def __init__(self, image_dir, depth_dir):
        self.image_dir = image_dir
        self.depth_dir = depth_dir
        self.image_files = sorted(os.listdir(image_dir))
        self.depth_files = sorted(os.listdir(depth_dir)) # sort pour assurer le traitement d'apres

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = os.path.join(self.image_dir, self.image_files[idx])
        depth_path = os.path.join(self.depth_dir, self.depth_files[idx])

        image = Image.open(image_path).convert("RGB")
        #depth = Image.open(depth_path).convert("L")  # Assuming depth maps are grayscale
        depth = np.load(depth_path)


        return {"image": image, "depth": depth}

# Classe pour faire je sais pas quoi

In [7]:
class DepthDataCollator(DefaultDataCollator):
    def __call__(self, features):
        images = [feature["images"] for feature in features]
        depths = [feature["depth"] for feature in features]
        return {"images": torch.stack(images), "depths": torch.stack(depths)}

# Fonction permettant de calculer des métriques pour analyser le modèle

In [8]:
def compute_classification_metrics(p):
    predictions, labels = p
    predictions = predictions.argmax(dim=1).detach().cpu().numpy()  # Prédictions des classes
    labels = labels.detach().cpu().numpy()

    # Calcul des métriques
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average='weighted', zero_division=1)
    recall = recall_score(labels, predictions, average='weighted', zero_division=1)
    f1 = f1_score(labels, predictions, average='weighted', zero_division=1)

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}

# Code principal

In [9]:
img_dir = "C:/Users/yoyoc/Desktop/DATASET_DEVOIR/DATASET_DEVOIR/images"
dpt_dir = "C:/Users/yoyoc/Desktop/DATASET_DEVOIR/DATASET_DEVOIR/depth"

img_dir = "dataset/images"
dpt_dir = "dataset/depth"

train_dataset = DepthDataset(image_dir=img_dir, depth_dir=dpt_dir)

data_collator = DepthDataCollator()
device = "cuda" if torch.cuda.is_available() else "cpu"

training_args = TrainingArguments(
    fp16=True,
    output_dir='outputs',
    report_to=None
)
accelerator = Accelerator()
trainer = accelerator.prepare(Trainer(
    model=model.to(accelerator.device),
    train_dataset=train_dataset,
    args=training_args,
    data_collator=data_collator,
))

trainer.train()

ValueError: fp16 mixed precision requires a GPU (not 'mps').