In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install kaggle

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from google.colab import files

uploaded = files.upload()

Saving kaggle.json to kaggle.json


In [None]:
import os

os.makedirs('/root/.kaggle', exist_ok=True)
!mv kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d kmader/skin-cancer-mnist-ham10000 -p /content/


Dataset URL: https://www.kaggle.com/datasets/kmader/skin-cancer-mnist-ham10000
License(s): CC-BY-NC-SA-4.0
skin-cancer-mnist-ham10000.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
import zipfile

with zipfile.ZipFile('/content/skin-cancer-mnist-ham10000.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/ham10000')

%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai.vision.all import *
from fastai.metrics import *
import pandas as pd
from pathlib import Path

path = Path('../content/ham10000')
Path.BASE_PATH = path

short_to_full_name_dict = {
    "akiec" : "Bowen's disease", # very early form of skin cancer
    "bcc" : "basal cell carcinoma" , # basal-cell cancer or white skin cancer
    "bkl" : "benign keratosis-like lesions", # non-cancerous skin tumour
    "df" : "dermatofibroma", # non-cancerous rounded bumps
    "mel" : "melanoma", # black skin cancer
    "nv" : "melanocytic nevi", # mole non-cancerous
    "vasc" : "vascular lesions", # skin condition
}

In [None]:
csv_path = "/content/ham10000/HAM10000_metadata.csv"
skin_df = pd.read_csv(csv_path)
skin_df.sort_values(by="image_id")
img_to_class_dict = skin_df.loc[:, ["image_id", "dx"]]
img_to_class_dict = img_to_class_dict.to_dict('list')
img_to_class_dict = {img_id : short_to_full_name_dict[disease] for img_id,disease in zip(img_to_class_dict['image_id'], img_to_class_dict['dx']) }
[x for x in img_to_class_dict.items()][:5]

def get_label_from_dict(path):
    return img_to_class_dict[path.stem]

dblock = DataBlock(
    blocks = (ImageBlock, CategoryBlock),
    get_items = get_image_files,
    splitter = RandomSplitter(valid_pct=0.2, seed=42),
    get_y = get_label_from_dict,
    item_tfms=[Resize(448), DihedralItem()],
    batch_tfms=RandomResizedCrop(size=224, min_scale=0.75, max_scale=1.0))



In [None]:
!pip install deap

import random
import torch
import fastai
from fastai.vision.all import *
from deap import base, creator, tools, algorithms
import numpy as np

NUM_CLASSES = 7
NUM_EPOCHS = 5
BATCH_SIZE = 64
LEARNING_RATE_RANGE = (0.0001, 0.01)
BATCH_SIZE_RANGE = (16, 128)
POPULATION_SIZE = 20
NUM_GENERATIONS = 10

path = "/content/ham10000"
dls = ImageDataLoaders.from_folder(path, valid_pct=0.2, bs=BATCH_SIZE, item_tfms=Resize(224), batch_tfms=aug_transforms())

def evaluate_fitness(params,dls):
    lr, batch_size = params

    learn = cnn_learner(dls, resnet18, metrics=accuracy, opt_func=ranger)
    learn.fine_tune(NUM_EPOCHS)

    accuracy1 = learn.validate()[1]
    return (accuracy1,)

creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
toolbox.register("attr_float", random.uniform, LEARNING_RATE_RANGE[0], LEARNING_RATE_RANGE[1])
toolbox.register("attr_int", random.randint, BATCH_SIZE_RANGE[0], BATCH_SIZE_RANGE[1])
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_float, toolbox.attr_int),
                 n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("evaluate", evaluate_fitness)
toolbox.register("mate", tools.cxBlend, alpha=0.5)
toolbox.register("mutate", tools.mutPolynomialBounded, eta=0.1, low=[LEARNING_RATE_RANGE[0], BATCH_SIZE_RANGE[0]],
                 up=[LEARNING_RATE_RANGE[1], BATCH_SIZE_RANGE[1]], indpb=0.2)
toolbox.register("select", tools.selTournament, tournsize=3)

pop = toolbox.population(n=POPULATION_SIZE)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("max", np.max)

for gen in range(NUM_GENERATIONS):


        fitness_results = []
        for ind in pop:
            fitness = toolbox.evaluate(ind, dls)
            fitness_results.append(fitness)

            ind.fitness.values = fitness

        hof.update(pop)

        offspring = toolbox.select(pop, len(pop))
        offspring = list(map(toolbox.clone, offspring))

        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < 0.5:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < 0.2:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        pop[:] = offspring

best_params = hof[0]
best_lr, best_batch_size = best_params[1], best_params[2]
best_accuracy = toolbox.evaluate(best_params)[0]





In [8]:
print(f"\nBest Parameters - Learning Rate: {best_lr}, Batch Size: {best_batch_size}")
print(f"Best Accuracy: {best_accuracy}")



Best Parameters - Learning Rate: 0.0047, Batch Size: 58
Best Accuracy: 0.68
