# Computational Learning: Classification, Task A

## Code setup

*This is a setup for the code execution in the user's computer, made to provide an easy execution and ensure the reproducibility of the results showcased in this work*

In [1]:
# Basic libraries that come with every Python installation
import os
import logging
import subprocess
import sys
import importlib
from typing import Union, List, Dict

These are global variables that can eb changed by the user in order to personalize the execution of the code.

In [10]:
# Folder used to download any necessary data to perform the study (datasets). You may use a relative or absolute path of your computer.
INPUT_DATA_FOLDER = "/home/mariopasc/Uni/Computational_Learning/Lab2/taskA_output"

# Output folder to store the results of the study
OUTPUT_FOLDER = "."

### Error management output

Error management with a `.log` file. 

In [12]:
# Set up error logging to "errors.log"
logging.basicConfig(
    filename="errors.log",
    level=logging.ERROR,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

### Directory tree

Set up the directory folder tree

In [14]:
# Create the folder structure needed for the analysis of the results.
def create_folder_structure(input_data_folder: Union[str, os.PathLike], output_folder: Union[str, os.PathLike]) -> None:
    """
    Creates a folder structure with "data/train/metadata", "data/train/ground_truth",
    and "data/test" subfolders in the specified output directory.

    Parameters:
    output_folder (Union[str, os.PathLike]): The path to the output directory where folders will be created.

    Returns:
    None

    Raises:
    OSError: If there is an error creating directories, logs the error in "errors.log".
    """
    try:
        # Define base paths for the folder structure
        base_data_folder = os.path.join(input_data_folder, "data")
        train_metadata_folder = os.path.join(base_data_folder, "train", "metadata")
        train_ground_truth_folder = os.path.join(base_data_folder, "train", "ground_truth")
        test_folder = os.path.join(base_data_folder, "test")

        base_results_folder = os.path.join(output_folder, "results")

        # Create all necessary subfolders
        os.makedirs(train_metadata_folder, exist_ok=True)
        os.makedirs(train_ground_truth_folder, exist_ok=True)
        os.makedirs(test_folder, exist_ok=True)

        os.makedirs(base_results_folder, exist_ok=True)

    except OSError as e:
        # Log any errors that occur during directory creation
        logging.error(f"Failed to create folder structure in {output_folder}: {e}")

create_folder_structure(input_data_folder=INPUT_DATA_FOLDER, output_folder=OUTPUT_FOLDER)

### Install necessary libraries

The following code checks if the user alredy have the necessary libraries for execution installed. If not, it proceeds with their installation.

In [20]:
def install_libraries(libraries: List[str]) -> None:
    """
    Installs the specified libraries using pip, checking first if they are already installed.
    If running in Google Colab, it uses `!pip install` for compatibility.

    Parameters:
    libraries (List[str]): A list of library names to install.

    Returns:
    None

    Raises:
    Exception: If an error occurs during installation, logs the error in "errors.log".
    """
    # Check if running in Google Colab
    in_colab = 'google.colab' in sys.modules

    for library in libraries:
        try:
            # Check if the library is already installed by trying to import it
            importlib.import_module(library)
            print(f"Library {library} already installed.")
        
        except ImportError:
            # If the library is not installed, proceed with installation
            try:
                if in_colab:
                    # Use Google Colab's `!pip install` syntax
                    subprocess.run(f'!pip install {library}', shell=True, check=True)
                else:
                    # Standard pip install command
                    subprocess.check_call([sys.executable, "-m", "pip", "install", library])

                print(f"Installed {library} successfully.")
                
            except subprocess.CalledProcessError as e:
                # Log any errors during installation
                logging.error(f"Failed to install {library}: {e}")

In [21]:
LIBRARIES = ['requests']

install_libraries(libraries=LIBRARIES)

import requests

Library requests already installed.


## Introduction

## Systems and Methods

### Materials

Download the ISISC dataset (2019). The data may be found in this [link](https://challenge.isic-archive.com/data/#2019).

In [25]:
def download_isic_dataset(output_folder: Union[str, os.PathLike]) -> Dict[str, str]:
    """
    Downloads ISIC Challenge dataset files into the specified folder structure.

    Parameters:
    output_folder (Union[str, os.PathLike]): The path to the output directory where files will be saved.

    Returns:
    None

    Raises:
    Exception: If an error occurs during file download, logs the error in "errors.log".
    """
    # URLs for the ISIC Challenge dataset
    urls = {
        "train_metadata": "https://isic-challenge-data.s3.amazonaws.com/2019/ISIC_2019_Training_Metadata.csv",
        "train_ground_truth": "https://isic-challenge-data.s3.amazonaws.com/2019/ISIC_2019_Training_GroundTruth.csv",
        "test_metadata": "https://isic-challenge-data.s3.amazonaws.com/2019/ISIC_2019_Test_Metadata.csv"
    }
    
    # File paths where each file should be saved
    paths = {
        "train_metadata": os.path.join(output_folder, "data", "train", "metadata", "ISIC_2019_Training_Metadata.csv"),
        "train_ground_truth": os.path.join(output_folder, "data", "train", "ground_truth", "ISIC_2019_Training_GroundTruth.csv"),
        "test_metadata": os.path.join(output_folder, "data", "test", "ISIC_2019_Test_Metadata.csv")
    }

    for key, url in urls.items():
        try:
            # Download file and save it to the corresponding path
            response = requests.get(url)
            response.raise_for_status()  # Raises HTTPError for bad responses (4xx, 5xx)
            
            # Write file content to the destination path
            with open(paths[key], "wb") as file:
                file.write(response.content)
            print(f"Downloaded {key} to {paths[key]}.")

        except Exception as e:
            # Log any errors during download
            logging.error(f"Failed to download {key} from {url}: {e}")

    return paths

data_paths = download_isic_dataset(output_folder=OUTPUT_FOLDER)

Downloaded train_metadata to /home/mariopasc/Uni/Computational_Learning/Lab2/taskA_output/data/train/metadata/ISIC_2019_Training_Metadata.csv.
Downloaded train_ground_truth to /home/mariopasc/Uni/Computational_Learning/Lab2/taskA_output/data/train/ground_truth/ISIC_2019_Training_GroundTruth.csv.
Downloaded test_metadata to /home/mariopasc/Uni/Computational_Learning/Lab2/taskA_output/data/test/ISIC_2019_Test_Metadata.csv.


### Methodology