# Étape 0 : Installation des dépendances

In [None]:
!pip install -U \
  ultralytics>=8.1.0 \
  roboflow \
  transformers \
  datasets \
  torchvision \
  paddlepaddle \
  paddleocr \
  spacy \
  fuzzywuzzy \
  opencv-python-headless

!python -m spacy download fr_core_news_md
!pip uninstall -y opencv-python

!pip install -U opencv-python-headless


In [None]:
import os
import shutil
import cv2
import numpy as np
import pandas as pd
from ultralytics import YOLO
from ultralytics.nn.tasks import DetectionModel
from paddleocr import PaddleOCR
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, Seq2SeqTrainer, Seq2SeqTrainingArguments
from fuzzywuzzy import process
import spacy
from datasets import load_dataset
from roboflow import Roboflow
import matplotlib.pyplot as plt

# Étape 2 : Téléchargement des données

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Définir le chemin de base
DATASET_PATH = "/content/drive/MyDrive/pi_project_3"
os.makedirs(DATASET_PATH, exist_ok=True)


In [None]:
# Téléchargement des données via Roboflow


from roboflow import Roboflow

rf = Roboflow(api_key="m3zcgsb8zN6fHOST8Swy")
project = rf.workspace("doxariaproject").project("eya_-2bh")
version = project.version(6)
dataset = version.download("yolov8")


# Copier les données dans le dossier de travail
dataset_dir = os.path.join(DATASET_PATH, "datasets")
os.makedirs(dataset_dir, exist_ok=True)
shutil.copytree(dataset.location, dataset_dir, dirs_exist_ok=True)



# Création du fichier data.yaml

In [None]:

YOLO_DATA_YAML = os.path.join(DATASET_PATH, "data.yaml")
yaml_content = f"""train: {os.path.join(dataset_dir, 'train/images')}
val: {os.path.join(dataset_dir, 'valid/images')}
test: {os.path.join(dataset_dir, 'test/images')}

nc: 2
names:
  0: dosage
  1: medicament
"""
with open(YOLO_DATA_YAML, 'w') as f:
    f.write(yaml_content)

print("Contenu de data.yaml :")
!cat {YOLO_DATA_YAML}

## Verification des dossiers

In [None]:
import os

DATASET_PATH = "/content/drive/MyDrive/pi_project_3/datasets"
for split in ['train', 'valid', 'test']:
    img_dir = os.path.join(DATASET_PATH, split, 'images')
    label_dir = os.path.join(DATASET_PATH, split, 'labels')
    print(f"\nVérification du dossier {split}:")
    print(f"Images : {len([f for f in os.listdir(img_dir) if f.endswith(('.jpg', '.jpeg', '.png'))])}")
    print(f"Labels : {len([f for f in os.listdir(label_dir) if f.endswith('.txt')])}")

# Étape 4 : Entraînement de YOLOv8l

In [None]:
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu12

In [None]:
import torch
torch.cuda.empty_cache()


In [None]:
from ultralytics import YOLO

# Load the pre-trained YOLOv8 model
model = YOLO('yolov8l.pt')  # Large model, suitable for handwritten quality

# Train the model
model.train(
    data='/content/drive/MyDrive/pi_project_3/data.yaml',
    epochs=60,   # 60 epochs as desired
    batch=16,
    lr0=0.001,
    lrf=0.1,
    momentum=0.937,
    weight_decay=0.0005,
    warmup_epochs=5,
    optimizer='AdamW',
    degrees=5,
    translate=0.1,
    scale=0.7,
    shear=2.0,
    hsv_h=0.005,
    hsv_s=0.5,
    hsv_v=0.2,
    flipud=0.0,
    fliplr=0.1,
    name='yolo_medicament_dosage_60epochs'
)

# Evaluate the model on the test set
metrics = model.val(data='/content/drive/MyDrive/pi_project_3/data.yaml', split='test')
print(f"mAP@50: {metrics.box.map50:.3f}, mAP@50:95: {metrics.box.map:.3f}")
