# Installation des biblioth√®ques

In [0]:
# Installer les biblioth√®ques n√©cessaires
%pip install requests pydicom pillow opencv-python tensorflow numpy matplotlib scikit-learn

In [0]:
pip install --upgrade threadpoolctl

# Importation des modules

In [0]:
import os
import requests
import zipfile
import pydicom
import matplotlib.pyplot as plt
import numpy as np
import cv2
from pyspark.dbutils import DBUtils
from pyspark.sql import SparkSession
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# Chargement et configuration du fichier config.py

In [0]:
config_path = "/Workspace/Users/alban.daniel@amaris.com/Databricks-project-computer-vision/config.py"

# V√©rification et cr√©ation si n√©cessaire
if not os.path.exists(config_path):
    with open(config_path, "w") as f:
        f.write('BASE_URL = "https://services.cancerimagingarchive.net/services/v4/TCIA/query"\n')
        f.write('COLLECTION_NAME = "LIDC-IDRI"\n')

# Importer config apr√®s modification
import config
print(f"‚úÖ Config charg√© avec BASE_URL: {config.BASE_URL} et COLLECTION_NAME: {config.COLLECTION_NAME}")

# Initialisation de dbutils

In [0]:
try:
    dbutils = DBUtils(spark)
except NameError:
    print("‚ö† dbutils non disponible en dehors de Databricks.")

# V√©rification des fichiers dans le projet

In [0]:
print([f.name for f in dbutils.fs.ls("file:/Workspace/Users/alban.daniel@amaris.com/Databricks-project-computer-vision")])

# R√©cup√©ration des patients

In [0]:
patients_url = f"{config.BASE_URL}/getPatient"
params = {"Collection": config.COLLECTION_NAME}
response = requests.get(patients_url, params=params)

if response.status_code == 200:
    patients = response.json()
    print(f"Nombre total de patients : {len(patients)}")
    print("Quelques patients :", display(patients[:5]))
else:
    print("‚ùå Erreur lors de la r√©cup√©ration des patients :", response.text)

# R√©cup√©ration des s√©ries d'un patient

In [0]:
series_url = f"{config.BASE_URL}/getSeries"
params = {"PatientID": "LIDC-IDRI-0001"}  # Modifier l'ID si besoin
response = requests.get(series_url, params=params)

if response.status_code == 200:
    series = response.json()
    print(f"Nombre de s√©ries pour LIDC-IDRI-0001 : {len(series)}")
    print("Aper√ßu des s√©ries :", display(series[:5]))
else:
    print("‚ùå Erreur lors de la r√©cup√©ration des s√©ries :", response.text)

# T√©l√©chargement des images DICOM

In [0]:
image_url = f"{config.BASE_URL}/getImage"
params = {"SeriesInstanceUID": "1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192"}

response = requests.get(image_url, params=params, stream=True)

if response.status_code == 200:
    with open("test_dicom.zip", "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    print("‚úÖ T√©l√©chargement r√©ussi : test_dicom.zip")
else:
    print("‚ùå Erreur lors du t√©l√©chargement :", response.text)

# D√©compression et affichage d'une image DICOM

In [0]:
with zipfile.ZipFile("test_dicom.zip", "r") as zip_ref:
    zip_ref.extractall("dicom_images")

dicom_files = [f for f in os.listdir("dicom_images") if f.endswith(".dcm")]
print(f"üìÇ Nombre de fichiers DICOM extraits : {len(dicom_files)}")

if dicom_files:
    dicom_path = os.path.join("dicom_images", dicom_files[0])
    dicom_data = pydicom.dcmread(dicom_path)

    plt.imshow(dicom_data.pixel_array, cmap="gray")
    plt.title(f"Image DICOM : {dicom_files[0]}")
    plt.axis("off")
    plt.show()

    print(dicom_data)  # Affichage des m√©tadonn√©es
else:
    print("‚ùå Aucun fichier DICOM trouv√© !")

# D√©tection d'anomalies dans les images DICOM

In [0]:
dicom_dir = "dicom_images"
dicom_files = [f for f in os.listdir(dicom_dir) if f.endswith(".dcm")]
anomalies = []

for file in dicom_files:
    try:
        dicom_path = os.path.join(dicom_dir, file)
        dicom_data = pydicom.dcmread(dicom_path)
        pixel_array = dicom_data.pixel_array

        height, width = pixel_array.shape
        anomaly_msg = None

        if height < 64 or width < 64:
            anomaly_msg = "Image trop petite"
        else:
            mean_pixel = np.mean(pixel_array)
            if mean_pixel < 5:
                anomaly_msg = "Image quasi noire"
            elif mean_pixel > 250:
                anomaly_msg = "Image quasi blanche"

            required_fields = ["Modality", "BodyPartExamined"]
            for field in required_fields:
                if not hasattr(dicom_data, field) or getattr(dicom_data, field) == "":
                    anomaly_msg = f"M√©tadonn√©e manquante: {field}"

        if anomaly_msg:
            anomalies.append((file, anomaly_msg, pixel_array))

    except Exception as e:
        anomalies.append((file, f"Erreur de lecture DICOM: {str(e)}", None))

# üîπ Affichage des anomalies d√©tect√©es
if anomalies:
    print(f"‚ö†Ô∏è {len(anomalies)} anomalies d√©tect√©es.")
    
    num_display = min(5, len(anomalies))  # Limiter l'affichage √† 5 images max
    fig, axes = plt.subplots(1, num_display, figsize=(15, 5))

    for i, (file, msg, img) in enumerate(anomalies[:num_display]):
        if img is not None:
            axes[i].imshow(img, cmap="gray")
        axes[i].set_title(f"{file}\n{msg}", fontsize=8)
        axes[i].axis("off")

    plt.tight_layout()
    plt.show()
else:
    print("‚úÖ Aucune anomalie d√©tect√©e !")

# Clustering des images DICOM avec une ACP

In [0]:
# üìÇ Charger les images DICOM
def load_dicom_images(folder, img_size=(128, 128)):
    images = []
    
    for file in os.listdir(folder):
        if file.endswith(".dcm"):
            filepath = os.path.join(folder, file)
            dicom_data = pydicom.dcmread(filepath)
            img = dicom_data.pixel_array.astype(np.float32)
            img = cv2.resize(img, img_size)

            if np.max(img) > 0:
                img /= np.max(img)

            images.append(img.flatten())  

    return np.array(images)

# üìÇ Dossier contenant les images DICOM
folder_path = "./dicom_images"
image_data = load_dicom_images(folder_path)

# üéØ Appliquer l'ACP
pca = PCA()
pca.fit(image_data)

# üìä Variance expliqu√©e par les 20 premi√®res composantes
explained_variance = pca.explained_variance_ratio_[:20]  # Garder les 20 premi√®res
cumulative_variance = np.cumsum(explained_variance)  # Variance cumul√©e

# üé® Cr√©er la figure
plt.figure(figsize=(12, 5))

# üîπ Graphique 1 : Variance expliqu√©e
plt.subplot(1, 2, 1)
plt.bar(range(1, 21), explained_variance * 100, color="skyblue")
plt.xlabel("Dimensions")
plt.ylabel("Variance expliqu√©e (%)")
plt.title("Variance expliqu√©e")
plt.xticks(range(1, 21))
plt.grid(axis="y", linestyle="--", alpha=0.7)

# üîπ Graphique 2 : Variance cumul√©e
plt.subplot(1, 2, 2)
plt.bar(range(1, 21), cumulative_variance * 100, color="lightcoral")
plt.xlabel("Dimensions")
plt.ylabel("Variance cumul√©e (%)")
plt.title("Variance expliqu√©e cumul√©e")
plt.xticks(range(1, 21))
plt.ylim(0, 100)
plt.grid(axis="y", linestyle="--", alpha=0.7)

# üìä Afficher les graphiques
plt.tight_layout()
plt.show()

In [0]:
# üèÜ Appliquer l'ACP avec 2 composantes principales
pca = PCA(n_components=2)
image_pca = pca.fit_transform(image_data)

# ü§ñ Appliquer le clustering K-Means
kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
clusters = kmeans.fit_predict(image_pca)

# üé® Visualisation des clusters
plt.figure(figsize=(8, 6))
plt.scatter(image_pca[:, 0], image_pca[:, 1], c=clusters, cmap="viridis", alpha=0.7)
plt.colorbar(label="Cluster")
plt.xlabel("Dim 1")
plt.ylabel("Dim 2")
plt.title("Clustering des images DICOM")
plt.grid(True)
plt.show()