In [1]:
import os
import shutil
import pandas as pd
import random
from tqdm import tqdm

# Path ke metadata CSV dan folder gambar asli
metadata_path = 'HAM10000_metadata.csv'
image_dir_1 = 'HAM10000_images_part_1'
image_dir_2 = 'HAM10000_images_part_2'

# Folder output hasil seleksi
output_dir = 'selected_dataset'
os.makedirs(output_dir, exist_ok=True)

# Load metadata
df = pd.read_csv(metadata_path)

# Gabung path gambar dari dua folder
all_images = {f.split('.')[0]: os.path.join(image_dir_1, f) 
              for f in os.listdir(image_dir_1) if f.endswith('.jpg')}
all_images.update({f.split('.')[0]: os.path.join(image_dir_2, f) 
                   for f in os.listdir(image_dir_2) if f.endswith('.jpg')})

# Ambil semua label unik (dx)
labels = df['dx'].unique()

# Ambil 70 gambar per label
for label in labels:
    label_df = df[df['dx'] == label]
    selected_rows = label_df.sample(n=min(70, len(label_df)), random_state=42)

    label_folder = os.path.join(output_dir, label)
    os.makedirs(label_folder, exist_ok=True)

    for _, row in tqdm(selected_rows.iterrows(), total=len(selected_rows), desc=f'Copying {label}'):
        image_id = row['image_id']
        src_path = all_images.get(image_id)

        if src_path and os.path.exists(src_path):
            dst_path = os.path.join(label_folder, f"{image_id}.jpg")
            shutil.copy(src_path, dst_path)


Copying bkl: 100%|██████████| 70/70 [00:01<00:00, 61.35it/s]
Copying nv: 100%|██████████| 70/70 [00:01<00:00, 69.18it/s]
Copying df: 100%|██████████| 70/70 [00:01<00:00, 57.65it/s]
Copying mel: 100%|██████████| 70/70 [00:01<00:00, 61.74it/s]
Copying vasc: 100%|██████████| 70/70 [00:01<00:00, 58.52it/s]
Copying bcc: 100%|██████████| 70/70 [00:01<00:00, 55.06it/s]
Copying akiec: 100%|██████████| 70/70 [00:01<00:00, 58.59it/s]


In [None]:
import os
import cv2
import random
import shutil
import numpy as np
from tqdm import tqdm

# Path dataset hasil seleksi 70 gambar/label
input_dir = 'selected_dataset'
output_dir = 'preprocessed_dataset'
target_size = (128, 128)  # bebas diubah

# Buat folder output
os.makedirs(output_dir, exist_ok=True)

for label in os.listdir(input_dir):
    label_input_path = os.path.join(input_dir, label)
    label_output_path = os.path.join(output_dir, label)
    os.makedirs(label_output_path, exist_ok=True)

    image_files = [f for f in os.listdir(label_input_path) if f.endswith('.jpg')]
    selected_files = random.sample(image_files, k=min(70, len(image_files)))

    for filename in tqdm(selected_files, desc=f"Processing {label}"):
        img_path = os.path.join(label_input_path, filename)
        img = cv2.imread(img_path)

        if img is None:
            continue

        # Preprocessing
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, target_size)
        equalized = cv2.equalizeHist(resized)

        # Simpan hasil
        output_path = os.path.join(label_output_path, filename)
        cv2.imwrite(output_path, equalized)


Processing akiec: 100%|██████████| 70/70 [00:02<00:00, 32.76it/s]
Processing bcc: 100%|██████████| 70/70 [00:01<00:00, 44.21it/s]
Processing bkl: 100%|██████████| 70/70 [00:01<00:00, 48.46it/s]
Processing df: 100%|██████████| 70/70 [00:01<00:00, 52.69it/s]
Processing mel: 100%|██████████| 70/70 [00:01<00:00, 53.14it/s]
Processing nv: 100%|██████████| 70/70 [00:01<00:00, 51.48it/s]
Processing vasc: 100%|██████████| 70/70 [00:01<00:00, 41.48it/s]


In [5]:
import os
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import graycomatrix, graycoprops
from scipy.stats import entropy
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.metrics import (confusion_matrix, ConfusionMatrixDisplay)
import seaborn as sns

In [3]:
dataPreprocessed = []
# Membuat path ke data yang sudah di-preprocessing
preprocessed_dir = 'preprocessed_dataset'
data = []
file_name = []
labels = []

for label in os.listdir(preprocessed_dir):
    label_path = os.path.join(preprocessed_dir, label)
    for fname in os.listdir(label_path):
        if fname.endswith('.jpg'):
            img_path = os.path.join(label_path, fname)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                dataPreprocessed.append(img)
                data.append(img)
                file_name.append(fname)
                labels.append(label)

In [6]:
def glcm(image, derajat):
    if derajat == 0:
        angles = [0]
    elif derajat == 45:
        angles = [np.pi / 4]
    elif derajat == 90:
        angles = [np.pi / 2]
    elif derajat == 135:
        angles = [3 * np.pi / 4]
    else:
        raise ValueError("Invalid angle. It should be one of the following: 0, 45, 90, 135.")
    
    matriks = graycomatrix(image, [1], angles, 256, symmetric=True, normed=True)
    return matriks

In [7]:
def correlation(matriks):
	return graycoprops(matriks, 'correlation')[0, 0]

In [8]:
def dissimilarity(matriks):
    return graycoprops(matriks, 'dissimilarity')[0, 0]

In [9]:
def homogenity(matriks):
    return graycoprops(matriks, 'homogeneity')[0, 0]

In [10]:
def contrast(matriks):
    return graycoprops(matriks, 'contrast')[0, 0]

In [11]:
def ASM(matriks):
    return graycoprops(matriks, 'ASM')[0, 0]

In [12]:
def energy(matriks):
    return graycoprops(matriks, 'energy')[0, 0]

In [13]:

def entropyGlcm(matriks):
    return entropy(matriks.ravel())

In [14]:
Derajat0 = []
Derajat45 = []
Derajat90 = []
Derajat135 = []
for i in range(len(dataPreprocessed)):
    D0 = glcm(dataPreprocessed[i], 0)
    D45 = glcm(dataPreprocessed[i], 45)
    D90 = glcm(dataPreprocessed[i], 90)
    D135 = glcm(dataPreprocessed[i], 135)
    Derajat0.append(D0)
    Derajat45.append(D45)
    Derajat90.append(D90)
    Derajat135.append(D135)

In [15]:
Kontras0, Kontras45, Kontras90, Kontras135 = [], [], [], []
dissimilarity0, dissimilarity45, dissimilarity90, dissimilarity135 =  [], [], [], []
homogenity0, homogenity45, homogenity90, homogenity135 = [], [], [], []
entropy0, entropy45, entropy90, entropy135 =  [], [], [], []
ASM0, ASM45, ASM90, ASM135 =  [], [], [], []
energy0, energy45, energy90, energy135 =  [], [], [], []
correlation0, correlation45, correlation90, correlation135 = [], [], [], []

In [16]:
for i in range(len(dataPreprocessed)):
    C0 = correlation(Derajat0[i])
    correlation0.append(C0)
    C45 = correlation(Derajat45[i])
    correlation45.append(C45)
    C90 = correlation(Derajat90[i])
    correlation90.append(C90)
    C135 = correlation(Derajat135[i])
    correlation135.append(C135)

In [17]:
for i in range(len(data)):
    K0 = contrast(Derajat0[i])
    K45 = contrast(Derajat45[i])
    K90 = contrast(Derajat90[i])
    K135 = contrast(Derajat135[i])
    Kontras0.append(K0)
    Kontras45.append(K45)
    Kontras90.append(K90)
    Kontras135.append(K135)

In [18]:

for i in range(len(data)):
    Dis0 = dissimilarity(Derajat0[i])
    Dis45 = dissimilarity(Derajat45[i])
    Dis90 = dissimilarity(Derajat90[i])
    Dis135 = dissimilarity(Derajat135[i])
    dissimilarity0.append(Dis0)
    dissimilarity45.append(Dis45)
    dissimilarity90.append(Dis90)
    dissimilarity135.append(Dis135)

In [19]:
for i in range(len(data)):
    H0 = homogenity(Derajat0[i])
    H45 = homogenity(Derajat45[i])
    H90 = homogenity(Derajat90[i])
    H135 = homogenity(Derajat135[i])
    homogenity0.append(H0)
    homogenity45.append(H45)
    homogenity90.append(H90)
    homogenity135.append(H135)

In [20]:
for i in range(len(data)):  
    E0 = entropyGlcm(Derajat0[i])
    E45 = entropyGlcm(Derajat45[i])
    E90 = entropyGlcm(Derajat90[i])
    E135 = entropyGlcm(Derajat135[i])
    entropy0.append(E0)
    entropy45.append(E45)
    entropy90.append(E90)
    entropy135.append(E135)

In [21]:
for i in range(len(data)):
    A0 = ASM(Derajat0[i])
    A45 = ASM(Derajat45[i])
    A90 = ASM(Derajat90[i])
    A135 = ASM(Derajat135[i])
    ASM0.append(A0)
    ASM45.append(A45)
    ASM90.append(A90)
    ASM135.append(A135)

In [22]:
for i in range(len(data)):
    ER0 = energy(Derajat0[i])
    ER45 = energy(Derajat45[i])
    ER90 = energy(Derajat90[i])
    ER135 = energy(Derajat135[i])
    energy0.append(ER0)
    energy45.append(ER45)
    energy90.append(ER90)
    energy135.append(ER135)

In [23]:
dataTable = {'Filename': file_name, 'Label': labels,
        'Contrast0': Kontras0, 'Contrast45': Kontras45, 'Contrast90': Kontras90, 'Contrast135': Kontras135,
        'Homogeneity0': homogenity0, 'Homogeneity45': homogenity45, 'Homogeneity90': homogenity90, 'Homogeneity135': homogenity135,
        'Dissimilarity0': dissimilarity0, 'Dissimilarity45': dissimilarity45, 'Dissimilarity90': dissimilarity90, 'Dissimilarity135': dissimilarity135,
        'Entropy0': entropy0, 'Entropy45': entropy45, 'Entropy90': entropy90, 'Entropy135': entropy135,
        'ASM0': ASM0, 'ASM45': ASM45, 'ASM90': ASM90, 'ASM135': ASM135,
        'Energy0': energy0, 'Energy45': energy45, 'Energy90': energy90, 'Energy135': energy135,
        'Correlation0': correlation0, 'Correlation45': correlation45, 'Correlation90': correlation90, 'Correlation135': correlation135,
        }
df = pd.DataFrame(dataTable)
df.to_csv('hasil_ekstraksi_1.csv', index=False)

hasilEkstrak = pd.read_csv('hasil_ekstraksi_1.csv')
hasilEkstrak

Unnamed: 0,Filename,Label,Contrast0,Contrast45,Contrast90,Contrast135,Homogeneity0,Homogeneity45,Homogeneity90,Homogeneity135,...,ASM90,ASM135,Energy0,Energy45,Energy90,Energy135,Correlation0,Correlation45,Correlation90,Correlation135
0,ISIC_0024329.jpg,akiec,367.222687,469.572137,282.712660,454.703391,0.096718,0.085964,0.106788,0.084762,...,0.000139,0.000115,0.011187,0.010640,0.011783,0.010738,0.966358,0.956963,0.974170,0.958324
1,ISIC_0024463.jpg,akiec,576.685470,798.817720,481.384473,701.828880,0.076725,0.068014,0.084267,0.070703,...,0.000117,0.000103,0.010431,0.009925,0.010801,0.010125,0.947452,0.927224,0.956024,0.936062
2,ISIC_0024511.jpg,akiec,455.411479,408.061194,347.993356,618.809288,0.106285,0.105754,0.123065,0.093169,...,0.000148,0.000117,0.011382,0.011508,0.012170,0.010797,0.958490,0.962829,0.968425,0.943638
3,ISIC_0024562.jpg,akiec,1563.463091,1642.868126,1411.006521,2135.386385,0.067790,0.066276,0.072613,0.060347,...,0.000101,0.000086,0.009716,0.009497,0.010050,0.009275,0.858784,0.851305,0.872101,0.806706
4,ISIC_0024646.jpg,akiec,1145.157972,1538.028210,1064.853285,1476.134168,0.059001,0.053009,0.063337,0.053462,...,0.000131,0.000108,0.011057,0.010269,0.011427,0.010404,0.897286,0.862080,0.904120,0.867630
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
485,ISIC_0033458.jpg,vasc,335.616572,370.450679,260.427350,357.895096,0.124875,0.124056,0.168390,0.113445,...,0.000198,0.000144,0.012582,0.012222,0.014085,0.011985,0.969287,0.966099,0.976301,0.967248
486,ISIC_0033817.jpg,vasc,209.045952,285.950338,227.933686,314.615289,0.132959,0.113150,0.135702,0.111985,...,0.000169,0.000143,0.012947,0.012153,0.012988,0.011969,0.980775,0.973667,0.979136,0.971026
487,ISIC_0033969.jpg,vasc,240.111405,253.511377,194.070066,317.528923,0.120587,0.113251,0.147600,0.109704,...,0.000192,0.000140,0.012715,0.012119,0.013874,0.011814,0.978215,0.976983,0.982445,0.971171
488,ISIC_0033991.jpg,vasc,369.832247,468.120404,285.414616,416.838428,0.122326,0.114161,0.141678,0.121054,...,0.000256,0.000194,0.014501,0.013625,0.016015,0.013938,0.966820,0.958130,0.974449,0.962716
