In [None]:
# Install necessary libraries
!pip install ultralytics kagglehub -q

import kagglehub
import os
import shutil
import glob
import random
from ultralytics import YOLO
from google.colab import files
import matplotlib.pyplot as plt
from PIL import Image

print("Setup complete. Ready to download data.")

# Explanation

- Installs required libraries used in this notebook (`ultralytics` for YOLO, `kagglehub` to download Kaggle datasets).
- Imports standard Python modules for filesystem operations (`os`, `shutil`, `glob`, `random`) and imaging/plotting (`PIL`, `matplotlib`).
- Sets up the environment so subsequent cells can download, process, train, and visualize the dataset.
- The `print` confirms the setup step finished successfully.

In [None]:
# 1. Download the dataset
print("Downloading dataset...")
raw_path = kagglehub.dataset_download("pacificrm/skindiseasedataset")

# 2. Define our working directory in Colab
working_dir = '/content/skin_dataset'

# Clean up previous runs to avoid errors
if os.path.exists(working_dir):
    shutil.rmtree(working_dir)

# Copy files to our working directory
print(f"Moving data to {working_dir}...")
shutil.copytree(raw_path, working_dir)

# 3. LOCATE THE ACTUAL DATA ROOT (Handling Nested Folders)
# The logs showed the data is inside "SkinDisease/SkinDisease"
# We look for the folder that actually contains 'Train' or 'train'
data_root = working_dir
for root, dirs, files in os.walk(working_dir):
    if 'Train' in dirs or 'train' in dirs:
        data_root = root
        break

print(f"Dataset root identified at: {data_root}")

# 4. Standardize Folder Names (Train -> train, Test -> val)
# YOLOv8 requires folders named exactly 'train' and 'val'
renames = {'Train': 'train', 'Test': 'val', 'test': 'val'}

for old_name, new_name in renames.items():
    old_path = os.path.join(data_root, old_name)
    new_path = os.path.join(data_root, new_name)

    # Rename if the old folder exists and the new one doesn't yet
    if os.path.exists(old_path) and not os.path.exists(new_path):
        print(f"Renaming '{old_name}' to '{new_name}'")
        os.rename(old_path, new_path)

print("\nFinal Data Structure:")
print(os.listdir(data_root))

# Explanation

- Downloads the skin disease dataset from Kaggle using `kagglehub` and defines a working directory for data processing.
- Removes any previous run folder to avoid conflicts and copies the downloaded files into the working directory.
- Searches the directory tree to find the actual dataset root (the folder that contains `train`/`val` folders), handling nested structures.
- Normalizes folder names so they match YOLOv8's expected layout (`train` and `val`), and prints the resulting data structure for verification.

In [None]:
# Change 'n' (nano) to 'm' (medium)
model = YOLO('yolov8m-cls.pt')

results = model.train(
    data=data_root,
    epochs=15,       # Larger models usually need a few more epochs
    imgsz=224,
    project='/content/runs',
    name='skin_disease_medium'
)

# Explanation

- Loads a medium-sized classification model (`yolov8m-cls.pt`). Using `m` (medium) is a trade-off between accuracy and training speed compared to `n` (nano).
- Calls `model.train(...)` to train on the dataset located at `data_root` with specified parameters:
  - `epochs`: number of full passes over the dataset.
  - `imgsz`: input image size used during training.
  - `project` and `name`: where to save training runs and a specific run name.
- The `results` object will contain training metrics and saved weights in the `project/name/weights` folder.

In [None]:
# Robustly find a validation image
val_dir = os.path.join(data_root, 'val')
image_extensions = ['*.jpg', '*.jpeg', '*.png', '*.bmp']
found_images = []

# Walk through all class folders in 'val' to find images
for class_folder in os.listdir(val_dir):
    class_path = os.path.join(val_dir, class_folder)
    if os.path.isdir(class_path):
        for ext in image_extensions:
            # Add all images of this type found in this folder
            found_images.extend(glob.glob(os.path.join(class_path, ext)))

if found_images:
    # 1. Pick a random image
    test_image_path = random.choice(found_images)
    print(f"Testing on image: {test_image_path}")

    # 2. Run Prediction
    results = model.predict(test_image_path, verbose=False)

    # 3. Get Top Result
    top_class_index = results[0].probs.top1
    prediction_name = results[0].names[top_class_index]
    confidence = results[0].probs.top1conf.item()

    # 4. Display Result
    print(f"\nAI PREDICTION: {prediction_name.upper()}")
    print(f"CONFIDENCE: {confidence * 100:.2f}%")

    # 5. Show Image
    img = Image.open(test_image_path)
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Pred: {prediction_name} ({confidence:.2f})")
    plt.show()
else:
    print("Error: Still could not find any images in the validation folder.")

# Explanation

- Gathers image files from the `val` (validation) directory across all class subfolders to find a sample image for quick qualitative testing.
- Selects a random image and runs `model.predict(...)` to obtain predictions on that image.
- Extracts the top predicted class index and its confidence score, then prints the predicted label and confidence.
- Displays the chosen image with a title summarizing the model prediction so you can visually inspect the result.
- If no validation images are found, it prints a helpful error message to guide debugging.

In [None]:
best_weight_path = '/content/runs/skin_disease_medium/weights/best.pt'

# Re-import files to ensure it refers to google.colab.files and not a local list variable
from google.colab import files

if os.path.exists(best_weight_path):
    print("Downloading model weights...")
    files.download(best_weight_path)
else:
    print("Model file not found. Training might have failed.")

# Explanation

- Defines the expected path to the best model weights produced by training (`best.pt`).
- Uses `google.colab.files` to download the `best.pt` file to your local machine (Colab download dialog) if training succeeded and the file exists.
- If the weights are not found, prints a message indicating training may have failed or the path is incorrect â€” helpful for troubleshooting.