<a href="https://colab.research.google.com/github/ArsenyevaDiana/ADS/blob/main/Arsenyeva_Diana_AML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **1. Data Preprocessing**

In [None]:
!pip install --upgrade tensorflow



Load dataset

In [None]:
import kagglehub
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Download the dataset and assign the path to a variable
path = kagglehub.dataset_download("paultimothymooney/chest-xray-pneumonia")

# Specify the path to the dataset
dataset_dir = os.path.join(path, "chest_xray")  # Adjust subfolder if necessary

# Paths for training, validation, and test sets
train_dir = os.path.join(dataset_dir, "train")
val_dir = os.path.join(dataset_dir, "val")
test_dir = os.path.join(dataset_dir, "test")

# Define a function to load and preprocess data
def load_data(data_dir, target_size=(150, 150)):
    images = []
    labels = []
    class_names = os.listdir(data_dir)

    for label, class_name in enumerate(class_names):
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue
        for file in os.listdir(class_path):
            file_path = os.path.join(class_path, file)
            try:
                img = Image.open(file_path).convert("RGB").resize(target_size)#Конвертирует изображение в цветовое пространство RGB
                images.append(np.array(img))
                labels.append(label)
            except Exception as e:
                print(f"Error loading image {file_path}: {e}")

    return np.array(images), np.array(labels)

# Load training and validation datasets
x_train, y_train = load_data(train_dir)
x_val, y_val = load_data(val_dir)

# Normalize the images
x_train = x_train / 255.0
x_val = x_val / 255.0

# Verify the data shapes
print("Training data shape:", x_train.shape)
print("Validation data shape:", x_val.shape)
print("Training labels shape:", y_train.shape)
print("Validation labels shape:", y_val.shape)



Downloading from https://www.kaggle.com/api/v1/datasets/download/paultimothymooney/chest-xray-pneumonia?dataset_version_number=2...


100%|██████████| 2.29G/2.29G [00:27<00:00, 90.0MB/s]

Extracting files...





Training data shape: (5216, 150, 150, 3)
Validation data shape: (16, 150, 150, 3)
Training labels shape: (5216,)
Validation labels shape: (16,)


### **2. Classification**

Code for Baseline Model using VGG16:

In [None]:
from keras.models import Model
from keras.layers import GlobalAveragePooling2D, Dense
from keras.applications import VGG16 # классификации и извлечения признаков из изображений
from keras.optimizers import Adam

# Load the VGG16 model without the top classifier
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

# Freeze all layers
for layer in base_model.layers:# Проходит по всем слоям в base_model и замораживает их
    layer.trainable = False

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
output = Dense(1, activation='sigmoid')(x)

# Compile the model
model_baseline = Model(inputs=base_model.input, outputs=output)
model_baseline.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the baseline model
history_baseline = model_baseline.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=5, batch_size=32)

Epoch 1/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1414s[0m 9s/step - accuracy: 0.6424 - loss: 0.6153 - val_accuracy: 0.5000 - val_loss: 0.6994
Epoch 2/5
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1447s[0m 9s/step - accuracy: 0.7797 - loss: 0.4126 - val_accuracy: 0.6250 - val_loss: 0.5050
Epoch 3/5
[1m126/163[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m5:12[0m 8s/step - accuracy: 0.8905 - loss: 0.3316

Code for Enhanced Model with BatchNormalization and Dropout:

In [None]:
from keras.layers import Dropout, BatchNormalization

# Unfreeze the last 4 layers
for layer in base_model.layers[-4:]:
    layer.trainable = True

# Add additional layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)
output = Dense(1, activation='sigmoid')(x)

# Compile the enhanced model
model_advanced = Model(inputs=base_model.input, outputs=output)
model_advanced.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the enhanced model
history_advanced = model_advanced.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=3, batch_size=32)

Epoch 1/5


KeyboardInterrupt: 

## **3. Localization (Detection)**

Code for YOLOv5 training это мощный алгоритм для обнаружения объектов

In [None]:
# Clone YOLOv5 repository
dataset_path = path + "/chest_xray"
yaml_content = f"""
train: {dataset_path}/train
val: {dataset_path}/val
test: {dataset_path}/test
nc: 2
names: ['Normal', 'Pneumonia']
"""

with open("pneumonia.yaml", "w") as file:
    file.write(yaml_content)
!python train.py --data pneumonia.yaml --weights yolov5s.pt --epochs 50 --batch-size 16



2024-11-19 05:06:42.560693: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731992802.614973   11867 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731992802.627824   11867 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice: (30 second timeout) 
[34m[1mwandb[0m: W&B disabled due to login timeout.
[34m[1mtrain: [0mweights=yolov5s.pt, cfg=, data

Code for Faster R-CNN using Detectron2:

To install detectron2, use the following command in a Colab or Jupyter Notebook environment

In [None]:
!sudo apt-get update
!sudo apt-get install -y python3.9 python3.9-venv python3.9-dev gcc g++ ninja-build

 #Clone the Detectron2 repository
!git clone https://github.com/facebookresearch/detectron2.git
%cd detectron2
#Set up Python environment (use Python 3.9 if necessary)
!python3.9 -m venv detectron_env
!source detectron_env/bin/activate

 #Install PyTorch (ensure compatibility with CUDA 12.1)
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

 # Build Detectron2 from source
!python setup.py build develop

# Verify the installation
import detectron2
print("Detectron2 successfully installed!")


Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,113 kB]
Get:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:10 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,613 kB]
Hit:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [2,428 kB]
Hit:13 https://ppa.launchpadcontent.ne

ModuleNotFoundError: No module named 'detectron2'

In [None]:
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer
from detectron2.data import DatasetCatalog, MetadataCatalog

# Prepare the dataset in COCO format
def get_rsna_dicts(data_dir):
    # Define function to load dataset
    pass  # Implement dataset preparation logic here

DatasetCatalog.register("pneumonia_train", lambda: get_rsna_dicts("./chest_xray/train"))
MetadataCatalog.get("pneumonia_train").set(thing_classes=["PNEUMONIA"])

# Configure Faster R-CNN
cfg = get_cfg()
cfg.merge_from_file("./detectron2_repo/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("pneumonia_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 300
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

# Train Faster R-CNN
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

ModuleNotFoundError: No module named 'detectron2'

## **4. Segmentation**

Code for U-Net:U-Net извлекает пространственные признаки

In [None]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate

# Define U-Net architecture
def build_unet(input_shape=(150, 150, 3)):
    inputs = Input(input_shape)
    c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    p1 = MaxPooling2D((2, 2))(c1)
    c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    p2 = MaxPooling2D((2, 2))(c2)
    c3 = Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    u1 = Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(c3)
    u1 = concatenate([u1, c2])
    c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
    u2 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(c4)
    u2 = concatenate([u2, c1])
    outputs = Conv2D(1, (1, 1), activation='sigmoid')(u2)
    return Model(inputs=[inputs], outputs=[outputs])

# Compile U-Net
model_unet = build_unet()
model_unet.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train U-Net
history_unet = model_unet.fit(x_train, y_train_masks, validation_data=(x_val, y_val_masks), epochs=50, batch_size=16)

Visualizing Segmentation Results (For U-Net)

In [None]:
y_pred_masks = model_unet.predict(x_test[:5])
y_pred_masks_binary = (y_pred_masks > 0.5).astype("int32")

# Display original images, ground truth masks, and predicted masks
plt.figure(figsize=(15, 10))

for i in range(5):
    plt.subplot(3, 5, i + 1)
    plt.imshow(x_test[i])
    plt.title("Original Image")
    plt.axis("off")

    plt.subplot(3, 5, i + 6)
    plt.imshow(y_test_masks[i].squeeze(), cmap="gray")
    plt.title("Ground Truth Mask")
    plt.axis("off")

    plt.subplot(3, 5, i + 11)
    plt.imshow(y_pred_masks_binary[i].squeeze(), cmap="gray")
    plt.title("Predicted Mask")
    plt.axis("off")

plt.tight_layout()
plt.show()

Segmentation Results:

Visual comparison between ground truth masks and model predictions.
Shows how well the model identifies pneumonia-affected regions.