# ===============================
# Melanoma Classification - Tabular Metadata
# Using SMOTE for Balancing
# ===============================


# Import libraries

In [15]:
import pandas as pd
import os
from glob import glob
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
import xgboost as xgb

# Configuration & Data Loading

In [1]:


# Paths
base_path = r"C:\Users\lenovo\OneDrive\Desktop\Melanoma Disease Classification\cleaned_dataset"
folders = ["melanoma_in_situ", "melanoma_invasive", "nevus"]

# Metadata
metadata = pd.read_csv(os.path.join(base_path, "metadata.csv"))
metadata


Unnamed: 0,isic_id,age_approx,anatom_site_general,sex,label,image_path
0,1,30,posterior torso,female,melanoma_invasive,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
1,1,55,lower extremity,female,melanoma_in_situ,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
2,2,30,lower extremity,female,melanoma_invasive,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
3,3,55,lower extremity,female,melanoma_invasive,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
4,4,70,upper extremity,male,melanoma_invasive,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
...,...,...,...,...,...,...
11751,9973,65,anterior torso,male,nevus,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
11752,9974,35,anterior torso,female,nevus,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
11753,9975,30,anterior torso,female,nevus,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...
11754,9976,50,upper extremity,female,nevus,C:\Users\lenovo\OneDrive\Desktop\Melanoma Dise...


# Explratory Data Analysis

In [2]:
metadata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11756 entries, 0 to 11755
Data columns (total 6 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   isic_id              11756 non-null  int64 
 1   age_approx           11756 non-null  int64 
 2   anatom_site_general  11756 non-null  object
 3   sex                  11756 non-null  object
 4   label                11756 non-null  object
 5   image_path           11756 non-null  object
dtypes: int64(2), object(4)
memory usage: 551.2+ KB


In [4]:
metadata.duplicated().sum()

0

In [6]:
metadata.isnull().sum()

isic_id                0
age_approx             0
anatom_site_general    0
sex                    0
label                  0
image_path             0
dtype: int64

In [11]:
metadata['sex'].unique()

array(['female', 'male'], dtype=object)

In [10]:
metadata['anatom_site_general'].unique()

array(['posterior torso', 'lower extremity', 'upper extremity',
       'anterior torso', 'head/neck', 'palms/soles', 'lateral torso',
       'oral/genital'], dtype=object)

In [9]:
metadata['age_approx'].unique()

array([30, 55, 70, 25, 65, 35, 60, 80, 85, 50, 75, 45, 40, 20, 15, 10,  5],
      dtype=int64)

In [8]:
metadata['label'].unique()

array(['melanoma_invasive', 'melanoma_in_situ', 'nevus'], dtype=object)

In [12]:
metadata['label'].value_counts()

label
nevus                9976
melanoma_in_situ      983
melanoma_invasive     797
Name: count, dtype: int64

# 3️⃣ Encode Target Label


In [16]:
le = LabelEncoder()
metadata['label_encoded'] = le.fit_transform(metadata['label'])
# Classes mapping
print(dict(zip(le.classes_, le.transform(le.classes_))))

{'melanoma_in_situ': 0, 'melanoma_invasive': 1, 'nevus': 2}


# 4️⃣ Features & Labels


In [17]:
X = metadata[['age_approx', 'sex', 'anatom_site_general']]
y = metadata['label_encoded']


# 5️⃣ Binary Encode Sex


In [18]:
X['sex'] = X['sex'].map({'female': 0, 'male': 1})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['sex'] = X['sex'].map({'female': 0, 'male': 1})


# 6️⃣ One-Hot Encode anatomical site


In [19]:
X = pd.get_dummies(X, columns=['anatom_site_general'], drop_first=True)

In [20]:
X

Unnamed: 0,age_approx,sex,anatom_site_general_head/neck,anatom_site_general_lateral torso,anatom_site_general_lower extremity,anatom_site_general_oral/genital,anatom_site_general_palms/soles,anatom_site_general_posterior torso,anatom_site_general_upper extremity
0,30,0,False,False,False,False,False,True,False
1,55,0,False,False,True,False,False,False,False
2,30,0,False,False,True,False,False,False,False
3,55,0,False,False,True,False,False,False,False
4,70,1,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...
11751,65,1,False,False,False,False,False,False,False
11752,35,0,False,False,False,False,False,False,False
11753,30,0,False,False,False,False,False,False,False
11754,50,0,False,False,False,False,False,False,True


**label col(target vector)**

    {'melanoma_in_situ': 0, 'melanoma_invasive': 1, 'nevus': 2}

In [21]:
y

0        1
1        0
2        1
3        1
4        1
        ..
11751    2
11752    2
11753    2
11754    2
11755    0
Name: label_encoded, Length: 11756, dtype: int32

# 7️⃣ Scale Numerical Feature


In [22]:
scaler = StandardScaler()
X['age_approx'] = scaler.fit_transform(X[['age_approx']])

# 8️⃣ Split Data (Stratified)


In [23]:
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Original training class distribution:\n", y_train.value_counts())

Original training class distribution:
 label_encoded
2    6983
0     688
1     558
Name: count, dtype: int64


In [29]:
X_train

Unnamed: 0,age_approx,sex,anatom_site_general_head/neck,anatom_site_general_lateral torso,anatom_site_general_lower extremity,anatom_site_general_oral/genital,anatom_site_general_palms/soles,anatom_site_general_posterior torso,anatom_site_general_upper extremity
141,1.221998,0,False,False,True,False,False,False,False
4835,-0.501244,1,False,False,False,False,False,True,False
2188,0.647584,0,False,False,False,False,False,True,False
3883,-0.501244,0,False,False,True,False,False,False,False
4818,2.083619,0,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...
4480,0.647584,1,False,False,False,False,False,True,False
2027,0.647584,0,True,False,False,False,False,False,False
5134,0.073170,1,False,False,True,False,False,False,False
4933,-0.214037,1,False,False,False,False,False,False,False


In [25]:
y_train

141     1
4835    2
2188    2
3883    2
4818    2
       ..
4480    2
2027    0
5134    2
4933    2
72      2
Name: label_encoded, Length: 8229, dtype: int32

# 9️⃣ Handle Imbalance with SMOTE


In [24]:
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)

print("Balanced training class distribution:\n", pd.Series(y_train_bal).value_counts())


Balanced training class distribution:
 label_encoded
1    6983
2    6983
0    6983
Name: count, dtype: int64


In [28]:
X_train_bal

Unnamed: 0,age_approx,sex,anatom_site_general_head/neck,anatom_site_general_lateral torso,anatom_site_general_lower extremity,anatom_site_general_oral/genital,anatom_site_general_palms/soles,anatom_site_general_posterior torso,anatom_site_general_upper extremity
0,1.221998,0,False,False,True,False,False,False,False
1,-0.501244,1,False,False,False,False,False,True,False
2,0.647584,0,False,False,False,False,False,True,False
3,-0.501244,0,False,False,True,False,False,False,False
4,2.083619,0,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...
20944,2.083619,1,False,False,False,False,False,True,False
20945,1.221998,1,False,False,False,False,False,True,False
20946,1.796412,0,True,False,False,False,False,False,False
20947,2.083619,1,False,False,False,False,False,False,True


In [27]:
y_train_bal

0        1
1        2
2        2
3        2
4        2
        ..
20944    1
20945    1
20946    1
20947    1
20948    1
Name: label_encoded, Length: 20949, dtype: int32

In [31]:
X_train_bal['age_approx'].max()

2.083618551338663

In [32]:
X_train_bal['age_approx'].min()

-2.5116927508832134

# 🔟 Train XGBoost Classifier


In [33]:
model = xgb.XGBClassifier(
    objective='multi:softmax', 
    num_class=len(le.classes_),
    eval_metric='mlogloss',
    use_label_encoder=False,
    random_state=42
)
model.fit(X_train_bal, y_train_bal)

# 1️⃣1️⃣ Evaluate on Validation Set


In [34]:
y_val_pred = model.predict(X_val)
print("Validation Classification Report:\n", classification_report(y_val, y_val_pred, target_names=le.classes_))
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))


Validation Classification Report:
                    precision    recall  f1-score   support

 melanoma_in_situ       0.24      0.51      0.33       147
melanoma_invasive       0.18      0.46      0.26       120
            nevus       0.95      0.72      0.82      1496

         accuracy                           0.69      1763
        macro avg       0.46      0.56      0.47      1763
     weighted avg       0.83      0.69      0.74      1763

Validation Confusion Matrix:
 [[  75   31   41]
 [  43   55   22]
 [ 192  221 1083]]


# 1️⃣2️⃣ Evaluate on Test Set


In [35]:
y_test_pred = model.predict(X_test)
print("Test Classification Report:\n", classification_report(y_test, y_test_pred, target_names=le.classes_))
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

Test Classification Report:
                    precision    recall  f1-score   support

 melanoma_in_situ       0.27      0.61      0.37       148
melanoma_invasive       0.15      0.40      0.21       119
            nevus       0.95      0.70      0.80      1497

         accuracy                           0.67      1764
        macro avg       0.45      0.57      0.46      1764
     weighted avg       0.84      0.67      0.73      1764

Test Confusion Matrix:
 [[  90   34   24]
 [  40   48   31]
 [ 206  246 1045]]


---
        ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# ===============================
# Melanoma Classification - Tabular Metadata
# Using Under-sampling for Balancing
# ===============================


In [38]:
# 1️⃣ Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import xgboost as xgb
import numpy as np

In [39]:
# 2️⃣ Load Metadata
base_path = r"C:\Users\lenovo\OneDrive\Desktop\Melanoma Disease Classification\cleaned_dataset"
metadata = pd.read_csv(base_path + r"\metadata.csv")


In [40]:
# 3️⃣ Encode Target Label
le = LabelEncoder()
metadata['label_encoded'] = le.fit_transform(metadata['label'])
print("Class mapping:", dict(zip(le.classes_, le.transform(le.classes_))))

Class mapping: {'melanoma_in_situ': 0, 'melanoma_invasive': 1, 'nevus': 2}


In [41]:
# 4️⃣ Under-sample Majority Class (nevus)
minority_class_count = max(metadata['label'].value_counts().loc[['melanoma_in_situ','melanoma_invasive']])
# For simplicity, use ~1000 samples for nevus
nevus_samples = metadata[metadata['label'] == 'nevus'].sample(n=1000, random_state=42)
minority_samples = metadata[metadata['label'] != 'nevus']
balanced_metadata = pd.concat([nevus_samples, minority_samples]).reset_index(drop=True)

print("Balanced class distribution:\n", balanced_metadata['label'].value_counts())

Balanced class distribution:
 label
nevus                1000
melanoma_in_situ      983
melanoma_invasive     797
Name: count, dtype: int64


In [42]:
# 5️⃣ Features & Labels
X = balanced_metadata[['age_approx', 'sex', 'anatom_site_general']]
y = balanced_metadata['label_encoded']


In [43]:
# 6️⃣ Encode Sex Binary (female=0, male=1)
X['sex'] = X['sex'].map({'female': 0, 'male': 1})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['sex'] = X['sex'].map({'female': 0, 'male': 1})


In [44]:
# 7️⃣ One-Hot Encode Anatomical Site
X = pd.get_dummies(X, columns=['anatom_site_general'], drop_first=True)


In [45]:
from sklearn.preprocessing import MinMaxScaler

# 8️⃣ Scale Numerical Feature using MinMaxScaler
scaler = MinMaxScaler()
X['age_approx'] = scaler.fit_transform(X[['age_approx']])


# 8️⃣ Scale Numerical Feature
# scaler = StandardScaler()
# X['age_approx'] = scaler.fit_transform(X[['age_approx']])

In [46]:
# 9️⃣ Split Data (Stratified)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Training class distribution:\n", y_train.value_counts())

Training class distribution:
 label_encoded
2    700
0    688
1    558
Name: count, dtype: int64


In [47]:
# 🔟 Train XGBoost Classifier
model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=len(le.classes_),
    eval_metric='mlogloss',
    use_label_encoder=False,
    random_state=42
)
model.fit(X_train, y_train)

# 1️⃣1️⃣ Evaluate on Validation Set
y_val_pred = model.predict(X_val)
print("Validation Classification Report:\n", classification_report(y_val, y_val_pred, target_names=le.classes_))
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

Validation Classification Report:
                    precision    recall  f1-score   support

 melanoma_in_situ       0.57      0.60      0.58       147
melanoma_invasive       0.49      0.37      0.42       120
            nevus       0.60      0.69      0.64       150

         accuracy                           0.57       417
        macro avg       0.55      0.55      0.55       417
     weighted avg       0.56      0.57      0.56       417

Validation Confusion Matrix:
 [[ 88  24  35]
 [ 42  44  34]
 [ 24  22 104]]


In [48]:
# 1️⃣2️⃣ Evaluate on Test Set
y_test_pred = model.predict(X_test)
print("Test Classification Report:\n", classification_report(y_test, y_test_pred, target_names=le.classes_))
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

Test Classification Report:
                    precision    recall  f1-score   support

 melanoma_in_situ       0.58      0.59      0.58       148
melanoma_invasive       0.52      0.38      0.44       119
            nevus       0.63      0.75      0.68       150

         accuracy                           0.59       417
        macro avg       0.58      0.57      0.57       417
     weighted avg       0.58      0.59      0.58       417

Test Confusion Matrix:
 [[ 88  27  33]
 [ 41  45  33]
 [ 24  14 112]]


# ===============================
# Melanoma Classification - Tabular Metadata
# Perfectly Balanced Dataset (797 per class)
# ===============================


In [49]:
# 1️⃣ Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
import xgboost as xgb

# 2️⃣ Load Metadata
base_path = r"C:\Users\lenovo\OneDrive\Desktop\Melanoma Disease Classification\cleaned_dataset"
metadata = pd.read_csv(base_path + r"\metadata.csv")

# 3️⃣ Encode Target Label
le = LabelEncoder()
metadata['label_encoded'] = le.fit_transform(metadata['label'])
print("Class mapping:", dict(zip(le.classes_, le.transform(le.classes_))))
# e.g., {'melanoma_in_situ': 0, 'melanoma_invasive': 1, 'nevus': 2}

# 4️⃣ Under-sample All Classes to Match Smallest Class (797 entries)
sample_size = 797
balanced_metadata = pd.concat([
    metadata[metadata['label'] == 'melanoma_in_situ'].sample(n=sample_size, random_state=42),
    metadata[metadata['label'] == 'melanoma_invasive'].sample(n=sample_size, random_state=42),
    metadata[metadata['label'] == 'nevus'].sample(n=sample_size, random_state=42)
]).reset_index(drop=True)

print("Balanced class distribution:\n", balanced_metadata['label'].value_counts())


Class mapping: {'melanoma_in_situ': 0, 'melanoma_invasive': 1, 'nevus': 2}
Balanced class distribution:
 label
melanoma_in_situ     797
melanoma_invasive    797
nevus                797
Name: count, dtype: int64


In [50]:
# 5️⃣ Features & Labels
X = balanced_metadata[['age_approx', 'sex', 'anatom_site_general']]
y = balanced_metadata['label_encoded']

# 6️⃣ Encode Sex Binary (female=0, male=1)
X = X.copy()  # Avoid SettingWithCopyWarning
X['sex'] = X['sex'].map({'female': 0, 'male': 1})

# 7️⃣ One-Hot Encode Anatomical Site
X = pd.get_dummies(X, columns=['anatom_site_general'], drop_first=True)

# 8️⃣ Scale Numerical Feature using MinMaxScaler
scaler = MinMaxScaler()
X['age_approx'] = scaler.fit_transform(X[['age_approx']])

# 9️⃣ Split Data (Stratified)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)

print("Training class distribution:\n", y_train.value_counts())

Training class distribution:
 label_encoded
1    558
0    558
2    557
Name: count, dtype: int64


In [51]:
# 🔟 Train XGBoost Classifier
model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=len(le.classes_),
    eval_metric='mlogloss',
    use_label_encoder=False,
    random_state=42
)
model.fit(X_train, y_train)

# 1️⃣1️⃣ Evaluate on Validation Set
y_val_pred = model.predict(X_val)
print("Validation Classification Report:\n", classification_report(y_val, y_val_pred, target_names=le.classes_))
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

# 1️⃣2️⃣ Evaluate on Test Set
y_test_pred = model.predict(X_test)
print("Test Classification Report:\n", classification_report(y_test, y_test_pred, target_names=le.classes_))
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

Validation Classification Report:
                    precision    recall  f1-score   support

 melanoma_in_situ       0.62      0.63      0.62       119
melanoma_invasive       0.50      0.48      0.49       120
            nevus       0.67      0.68      0.67       120

         accuracy                           0.60       359
        macro avg       0.60      0.60      0.60       359
     weighted avg       0.60      0.60      0.60       359

Validation Confusion Matrix:
 [[75 31 13]
 [34 58 28]
 [12 26 82]]
Test Classification Report:
                    precision    recall  f1-score   support

 melanoma_in_situ       0.54      0.53      0.53       120
melanoma_invasive       0.59      0.54      0.56       119
            nevus       0.65      0.72      0.69       120

         accuracy                           0.60       359
        macro avg       0.59      0.60      0.59       359
     weighted avg       0.59      0.60      0.59       359

Test Confusion Matrix:
 [[63 33 24]
 

# ==============================
# Metadata-Only Melanoma Classification do strong feature engineering, hyperparamter tunning, CV k-fold
# Balanced Dataset (797 per class)
# ==============================

In [53]:
# 1️⃣ Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
from sklearn.metrics import classification_report, confusion_matrix
import xgboost as xgb

# 2️⃣ Load Metadata
base_path = r"C:\Users\lenovo\OneDrive\Desktop\Melanoma Disease Classification\cleaned_dataset"
metadata = pd.read_csv(base_path + r"\metadata.csv")

# 3️⃣ Ordinal Encode Labels: nevus=0, melanoma_in_situ=1, melanoma_invasive=2
label_mapping = {'nevus':0, 'melanoma_in_situ':1, 'melanoma_invasive':2}
metadata['label_encoded'] = metadata['label'].map(label_mapping)

# 4️⃣ Under-sample All Classes to Match Smallest Class (797 entries)
sample_size = 797
balanced_metadata = pd.concat([
    metadata[metadata['label'] == 'melanoma_in_situ'].sample(n=sample_size, random_state=42),
    metadata[metadata['label'] == 'melanoma_invasive'].sample(n=sample_size, random_state=42),
    metadata[metadata['label'] == 'nevus'].sample(n=sample_size, random_state=42)
]).reset_index(drop=True)

# 5️⃣ Features & Labels
X = balanced_metadata[['age_approx', 'sex', 'anatom_site_general']].copy()
y = balanced_metadata['label_encoded']

# 6️⃣ Transform age into bins
bins = [0, 20, 40, 60, 80, 100]
labels_age = [0, 1, 2, 3, 4]
X['age_bin'] = pd.cut(X['age_approx'], bins=bins, labels=labels_age)
X.drop('age_approx', axis=1, inplace=True)  # remove original age

# 7️⃣ Encode categorical features
X['sex'] = X['sex'].map({'female':0, 'male':1})

# Anatomical site encoding using ordinal (arbitrary, consistent mapping)
site_order = {
    'posterior torso':0, 'lower extremity':1, 'upper extremity':2, 'anterior torso':3,
    'head/neck':4, 'palms/soles':5, 'lateral torso':6, 'oral/genital':7
}
X['anatom_site_general'] = X['anatom_site_general'].map(site_order)

# 8️⃣ Scale numerical features (age_bin)
scaler = MinMaxScaler()
X['age_bin'] = scaler.fit_transform(X[['age_bin']])

# 9️⃣ Split Data (Stratified)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)


In [54]:
# 10️⃣ XGBoost with Hyperparameter Tuning using Stratified K-Fold CV
xgb_clf = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=3,
    eval_metric='mlogloss',
    use_label_encoder=False,
    random_state=42
)

param_grid = {
    'max_depth':[3,4,5,6],
    'n_estimators':[100,200,300],
    'learning_rate':[0.05,0.1,0.2],
    'subsample':[0.8,1.0],
    'colsample_bytree':[0.8,1.0]
}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

grid = GridSearchCV(estimator=xgb_clf, param_grid=param_grid, cv=cv, scoring='accuracy', n_jobs=-1)
grid.fit(X_train, y_train)

print("Best Hyperparameters:", grid.best_params_)

best_model = grid.best_estimator_

# 11️⃣ Evaluate on Validation Set
y_val_pred = best_model.predict(X_val)
print("Validation Classification Report:\n", classification_report(y_val, y_val_pred, target_names=label_mapping.keys()))
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, y_val_pred))

# 12️⃣ Evaluate on Test Set
y_test_pred = best_model.predict(X_test)
print("Test Classification Report:\n", classification_report(y_test, y_test_pred, target_names=label_mapping.keys()))
print("Test Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

Best Hyperparameters: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 100, 'subsample': 0.8}
Validation Classification Report:
                    precision    recall  f1-score   support

            nevus       0.67      0.65      0.66       119
 melanoma_in_situ       0.50      0.51      0.50       120
melanoma_invasive       0.40      0.41      0.40       120

         accuracy                           0.52       359
        macro avg       0.52      0.52      0.52       359
     weighted avg       0.52      0.52      0.52       359

Validation Confusion Matrix:
 [[77 12 30]
 [16 61 43]
 [22 49 49]]
Test Classification Report:
                    precision    recall  f1-score   support

            nevus       0.65      0.66      0.66       120
 melanoma_in_situ       0.55      0.55      0.55       119
melanoma_invasive       0.44      0.44      0.44       120

         accuracy                           0.55       359
        macro avg       0.55  