# Skin Lesion Multiclass Classification

## Libraries

In [None]:
from Dataset.Data_Loaders import get_loader
from Conventional.Preprocessing import transforms
from Conventional.Feature_extraction import Entropy, StdBGR, contrast_entropy, calculate_kurtosis, calculate_lbp, calculate_glcm, calculate_gabor, color_features

from scipy.stats import skew, kurtosis
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2

from scipy.stats import skew
from PIL import Image
from skimage.filters import gabor
from skimage.util import img_as_ubyte
from skimage.feature import local_binary_pattern, hog, graycomatrix, graycoprops
from skimage.feature import graycomatrix
from skimage.color import rgb2gray
from skimage.feature import local_binary_pattern
from skimage.filters import gabor_kernel
from skimage import data, filters, color
from skimage.transform import radon
import pywt
from skimage.transform import rescale

from PIL import Image

from monai.data import pad_list_data_collate

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE, BorderlineSMOTE 
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score, make_scorer
from sklearn.feature_selection import RFE
from sklearn.model_selection import StratifiedKFold, cross_val_score

## Feature extraction

In [None]:
# Data_path=r"C:\Users\moham\My Files\Education\UDG\CAD\skin_binary_dataset"
Data_path=r"E:/7th year/Girona courses/CAD/Project/Skin_Multi_Data"

train_loader= get_loader(Data_path,mode="train",shuffle=True,transforms=transforms)
val_loader= get_loader(Data_path,mode="val",shuffle=True,transforms=transforms)
test_loader= get_loader(Data_path,mode="test",shuffle=False,transforms=transforms)

features=[]
Labels=[]

features_val=[]
Labels_val=[]

features_test=[]

In [None]:
for j, batch in enumerate(train_loader):
    for i in range(len(batch['label'])):
        image=batch['image'][i].numpy().transpose(1, 2, 0)  # Convert to numpy for plt
        color_feature=color_features(image)
        # histogram_color_feature = histogram_color_features(image)
        # dft_feature = dft_features(image)
        # radon_feature_vector = radon_features(image)
        # wavelet_feature_vector = wavelet_features(image)
        # fractal_feature = fractal_dimension(image)
        # fourier_features = fourier_region_features(image)
        gray_level_features= calculate_glcm(image)
        # HOG_feats=calculate_hog(image)
        pattern=calculate_lbp(image)
        entropy_feats=Entropy(image)
        kurtosis_feats=calculate_kurtosis(image)
        contrast_feats=contrast_entropy(image)
        Gabor_feats=calculate_gabor(image)
        combined_features = np.concatenate([np.array(entropy_feats),np.array(kurtosis_feats),np.array(color_feature),
                                        np.array(Gabor_feats),np.array(gray_level_features),np.array(pattern),
                                        np.array(contrast_feats)]) 
        features.append(combined_features)
        Labels.append(f"{batch['label'][i].item()}")

In [None]:
for j, batch in enumerate(val_loader):
    for i in range(len(batch['label'])):
        image=batch['image'][i].numpy().transpose(1, 2, 0)  # Convert to numpy for plt
        color_feature=color_features(image)
        # histogram_color_feature = histogram_color_features(image)
        # dft_feature = dft_features(image)
        # radon_feature_vector = radon_features(image)
        # wavelet_feature_vector = wavelet_features(image)
        # fractal_feature = fractal_dimension(image)
        # fourier_features = fourier_region_features(image)
        gray_level_features= calculate_glcm(image)
        # HOG_feats=calculate_hog(image)
        pattern=calculate_lbp(image)
        entropy_feats=Entropy(image)
        kurtosis_feats=calculate_kurtosis(image)
        contrast_feats=contrast_entropy(image)
        Gabor_feats=calculate_gabor(image)
        combined_features = np.concatenate([np.array(entropy_feats),np.array(kurtosis_feats),np.array(color_feature),
                                        np.array(Gabor_feats),np.array(gray_level_features),np.array(pattern),
                                        np.array(contrast_feats)]) 
        features_val.append(combined_features)
        Labels_val.append(f"{batch['label'][i].item()}")

In [None]:
for j, batch in enumerate(test_loader):
    for i in range(len(batch['image'])):
        image=batch['image'][i].numpy().transpose(1, 2, 0)  # Convert to numpy for plt
        color_feature=color_features(image)
        # histogram_color_feature = histogram_color_features(image)
        # dft_feature = dft_features(image)
        # radon_feature_vector = radon_features(image)
        # wavelet_feature_vector = wavelet_features(image)
        # fractal_feature = fractal_dimension(image)
        # fourier_features = fourier_region_features(image)
        gray_level_features= calculate_glcm(image)
        # HOG_feats=calculate_hog(image)
        pattern=calculate_lbp(image)
        entropy_feats=Entropy(image)
        kurtosis_feats=calculate_kurtosis(image)
        contrast_feats=contrast_entropy(image)
        Gabor_feats=calculate_gabor(image)
        combined_features = np.concatenate([np.array(entropy_feats),np.array(kurtosis_feats),np.array(color_feature),
                                        np.array(Gabor_feats),np.array(gray_level_features),np.array(pattern),
                                        np.array(contrast_feats)]) 
        features_test.append(combined_features)

## ML models

In [None]:
X_train1 = features        
X_train2 = features_val    
Y_train1 = Labels           
Y_train2 = Labels_val       
X_test = features_test      

# Combine train and val
X_train_combined = np.concatenate((X_train1, X_train2), axis=0)
Y_train_combined = np.concatenate((Y_train1, Y_train2), axis=0)

# Upsampling
borderline_smote = BorderlineSMOTE(random_state=42)
X_train_resampled, Y_train_resampled = borderline_smote.fit_resample(X_train_combined, Y_train_combined)

# Standardizing
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test)

# PCA
pca = PCA(n_components=40)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# 5-fold CV
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
kappa_scorer = make_scorer(cohen_kappa_score)

# Using best params from GridSearch on SVM
svm_params = {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
svm = SVC(**svm_params)

svm_kappa_scores = cross_val_score(svm, X_train_pca, Y_train_resampled, cv=cv, scoring=kappa_scorer)
print(f"SVM Classifier 5-Fold Cross-Validated Kappa: {svm_kappa_scores.mean():.4f} ± {svm_kappa_scores.std():.4f}")

# Train the final SVM model on full data
svm.fit(X_train_pca, Y_train_resampled)

# Test on the testing features
y_pred = svm.predict(X_test_pca)
print("SVM predicted labels shape:", y_pred.shape)

# Save SVM predictions to CSV without a header
output_path = "E:/7th year/Girona courses/CAD/Project/predictions/multiclass_svm_predictions.csv"
pd.DataFrame(y_pred).to_csv(output_path, index=False, header=False)


SVM Classifier 5-Fold Cross-Validated Kappa: 0.8456 ± 0.0070
SVM predicted labels shape: (2121,)


In [None]:
X_train = features
X_test = features_val
Y_train = Labels
Y_test = Labels_val

borderline_smote = BorderlineSMOTE(random_state=42)
X_train_resampled, Y_train_resampled = borderline_smote.fit_resample(X_train, Y_train)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test) 

pca = PCA(n_components=40)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled) 

# rfe = RFE(estimator=RandomForestClassifier(random_state=42), n_features_to_select=35, step=1)
# X_train_rfe = rfe.fit_transform(X_train_scaled, Y_train_resampled)
# X_test_rfe = rfe.transform(X_test_scaled)

svm_params = {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
model = SVC(**svm_params)
model.fit(X_train_pca, Y_train_resampled)
y_pred = model.predict(X_test_pca)

accuracy = accuracy_score(Y_test, y_pred)
kappa = cohen_kappa_score(Y_test, y_pred)
f1 = f1_score(Y_test, y_pred, average='weighted')
print(f"SVM Classifier Accuracy: {accuracy:.2f}")
print(f"SVM Classifier Kappa: {kappa:.2f}")
print(f"SVM Classifier F1 Score: {f1:.2f}")

# class_weight={'0': 50, '1': 50, '2': 200}
model = RandomForestClassifier()
model.fit(X_train_pca, Y_train_resampled)
y_pred = model.predict(X_test_pca)

accuracy = accuracy_score(Y_test, y_pred)
kappa = cohen_kappa_score(Y_test, y_pred)
f1 = f1_score(Y_test, y_pred, average='weighted')
print(f"Random Forest Classifier Accuracy: {accuracy:.2f}")
print(f"Random Forest Classifier Kappa: {kappa:.2f}")
print(f"Random Forest Classifier F1 Score: {f1:.2f}")

SVM Classifier Accuracy: 0.83
SVM Classifier Kappa: 0.70
SVM Classifier F1 Score: 0.83
Random Forest Classifier Accuracy: 0.79
Random Forest Classifier Kappa: 0.62
Random Forest Classifier F1 Score: 0.79


### SVM Grid search (Don't run again)

In [None]:
# from imblearn.over_sampling import BorderlineSMOTE
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.svm import SVC
# from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score
# from sklearn.model_selection import GridSearchCV

# # Define training and test sets
# X_train = features
# X_test = features_val
# Y_train = Labels
# Y_test = Labels_val

# # Apply Borderline-SMOTE for oversampling
# borderline_smote = BorderlineSMOTE(random_state=42)
# X_train_resampled, Y_train_resampled = borderline_smote.fit_resample(X_train, Y_train)

# # Standard scaling of features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train_resampled)
# X_test_scaled = scaler.transform(X_test)

# # PCA for dimensionality reduction
# pca = PCA(n_components=70)
# X_train_pca = pca.fit_transform(X_train_scaled)
# X_test_pca = pca.transform(X_test_scaled)

# # Define the parameter grid for SVM
# param_grid = {
#     'C': [0.1, 1, 10, 100],
#     'kernel': ['linear', 'rbf'],
#     'gamma': ['scale', 'auto']
# }

# # Initialize the SVM model and grid search
# svm = SVC()
# grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# # Fit grid search
# grid_search.fit(X_train_pca, Y_train_resampled)

# # Get the best model and make predictions
# best_svm = grid_search.best_estimator_
# y_pred = best_svm.predict(X_test_pca)

# # Evaluate the model
# accuracy = accuracy_score(Y_test, y_pred)
# kappa = cohen_kappa_score(Y_test, y_pred)
# f1 = f1_score(Y_test, y_pred, average='weighted')
# print(f"Best SVM Parameters: {grid_search.best_params_}")
# print(f"SVM Classifier Accuracy: {accuracy:.2f}")
# print(f"SVM Classifier Kappa: {kappa:.2f}")
# print(f"SVM Classifier F1 Score: {f1:.2f}")

Best SVM Parameters: {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
SVM Classifier Accuracy: 0.80
SVM Classifier Kappa: 0.64
SVM Classifier F1 Score: 0.80


### Boosting techniques (depricated)

In [None]:
# from imblearn.over_sampling import BorderlineSMOTE
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score
# import numpy as np
# import xgboost as xgb
# from catboost import CatBoostClassifier

# # Prepare training and test sets
# X_train = features
# X_test = features_val
# Y_train = Labels
# Y_test = Labels_val

# # Ensure Y_train and Y_test are integer arrays
# Y_train = np.array(Y_train, dtype=int)
# Y_test = np.array(Y_test, dtype=int)

# # Apply Borderline-SMOTE for oversampling
# borderline_smote = BorderlineSMOTE(random_state=42)
# X_train_resampled, Y_train_resampled = borderline_smote.fit_resample(X_train, Y_train)

# # Standard scaling of features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train_resampled)
# X_test_scaled = scaler.transform(X_test)

# # PCA for dimensionality reduction
# pca = PCA(n_components=35)
# X_train_pca = pca.fit_transform(X_train_scaled)
# X_test_pca = pca.transform(X_test_scaled)

# # 1. XGBoost Model
# xgb_model = xgb.XGBClassifier(max_depth=5, n_estimators=100, learning_rate=0.1, random_state=42)
# xgb_model.fit(X_train_pca, Y_train_resampled)
# y_pred_xgb = xgb_model.predict(X_test_pca)

# accuracy_xgb = accuracy_score(Y_test, y_pred_xgb)
# kappa_xgb = cohen_kappa_score(Y_test, y_pred_xgb)
# f1_xgb = f1_score(Y_test, y_pred_xgb, average='weighted')
# print("XGBoost Results:")
# print(f"Accuracy: {accuracy_xgb:.2f}")
# print(f"Kappa: {kappa_xgb:.2f}")
# print(f"F1 Score: {f1_xgb:.2f}\n")

# # 2. CatBoost Model
# cat_model = CatBoostClassifier(max_depth=5, n_estimators=100, learning_rate=0.1, random_state=42, verbose=0)
# cat_model.fit(X_train_pca, Y_train_resampled)
# y_pred_cat = cat_model.predict(X_test_pca)

# accuracy_cat = accuracy_score(Y_test, y_pred_cat)
# kappa_cat = cohen_kappa_score(Y_test, y_pred_cat)
# f1_cat = f1_score(Y_test, y_pred_cat, average='weighted')
# print("CatBoost Results:")
# print(f"Accuracy: {accuracy_cat:.2f}")
# print(f"Kappa: {kappa_cat:.2f}")
# print(f"F1 Score: {f1_cat:.2f}")

XGBoost Results:
Accuracy: 0.76
Kappa: 0.58
F1 Score: 0.77

CatBoost Results:
Accuracy: 0.71
Kappa: 0.51
F1 Score: 0.73


## Cascaded Classifier: (One vs all --> Binary) (DEPRICATED)

### Grid Search (dont run again)

In [None]:
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score
# from sklearn.model_selection import GridSearchCV
# import numpy as np

# # Prepare training and test sets
# X_train = features
# X_test = features_val
# Y_train = Labels
# Y_test = Labels_val

# # Ensure Y_train and Y_test are integer arrays
# Y_train = np.array(Y_train, dtype=int)
# Y_test = np.array(Y_test, dtype=int)

# # Standard scaling of features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# # PCA for dimensionality reduction
# pca = PCA(n_components=35)
# X_train_pca = pca.fit_transform(X_train_scaled)
# X_test_pca = pca.transform(X_test_scaled)

# # Stage 1: Random Forest Model - Class 2 vs. All with Grid Search
# Y_train_stage1 = (Y_train == 2).astype(int)
# Y_test_stage1 = (Y_test == 2).astype(int)

# # Define the parameter grid for Grid Search
# param_grid = {
#     'n_estimators': [50, 100, 200],
#     'max_depth': [None, 10, 20, 30],
#     'min_samples_split': [2, 5, 10]
# }

# # Perform Grid Search with Cross-Validation
# model_stage1 = RandomForestClassifier(random_state=42)
# grid_search = GridSearchCV(estimator=model_stage1, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
# grid_search.fit(X_train_pca, Y_train_stage1)

# # Use the best estimator found by Grid Search
# best_model_stage1 = grid_search.best_estimator_
# y_pred_stage1 = best_model_stage1.predict(X_test_pca)

# # Evaluate Stage 1
# accuracy_stage1 = accuracy_score(Y_test_stage1, y_pred_stage1)
# kappa_stage1 = cohen_kappa_score(Y_test_stage1, y_pred_stage1)
# f1_stage1 = f1_score(Y_test_stage1, y_pred_stage1)
# print(f"Stage 1 (Class 2 vs All) Accuracy: {accuracy_stage1:.2f}")
# print(f"Stage 1 (Class 2 vs All) Kappa: {kappa_stage1:.2f}")
# print(f"Stage 1 (Class 2 vs All) F1 Score: {f1_stage1:.2f}")

# # Stage 2: Random Forest Model - Class 0 vs. Class 1
# X_test_stage2 = X_test_pca[y_pred_stage1 == 0]
# Y_test_stage2 = Y_test[y_pred_stage1 == 0]

# Y_train_stage2 = Y_train[Y_train != 2]
# X_train_stage2 = X_train_pca[Y_train != 2]

# model_stage2 = RandomForestClassifier(random_state=42)
# model_stage2.fit(X_train_stage2, Y_train_stage2)
# y_pred_stage2 = model_stage2.predict(X_test_stage2)

# # Evaluate Stage 2
# accuracy_stage2 = accuracy_score(Y_test_stage2, y_pred_stage2)
# kappa_stage2 = cohen_kappa_score(Y_test_stage2, y_pred_stage2)
# f1_stage2 = f1_score(Y_test_stage2, y_pred_stage2, average='weighted')
# print(f"Stage 2 (Class 0 vs Class 1) Accuracy: {accuracy_stage2:.2f}")
# print(f"Stage 2 (Class 0 vs Class 1) Kappa: {kappa_stage2:.2f}")
# print(f"Stage 2 (Class 0 vs Class 1) F1 Score: {f1_stage2:.2f}")

# # Combine predictions for overall evaluation
# overall_predictions = np.where(y_pred_stage1 == 1, 2, -1)  # Assign Class 2 for predictions from Stage 1
# overall_predictions[y_pred_stage1 == 0] = y_pred_stage2  # Assign Class 0 or 1 from Stage 2 predictions

# # Calculate overall metrics
# overall_accuracy = accuracy_score(Y_test, overall_predictions)
# overall_kappa = cohen_kappa_score(Y_test, overall_predictions)
# overall_f1 = f1_score(Y_test, overall_predictions, average='weighted')
# print(f"Overall Accuracy: {overall_accuracy:.2f}")
# print(f"Overall Kappa: {overall_kappa:.2f}")
# print(f"Overall F1 Score: {overall_f1:.2f}")

# # Print the best parameters found by Grid Search
# print("Best parameters found by Grid Search for Stage 1:", grid_search.best_params_)


Stage 1 (Class 2 vs All) Accuracy: 0.93
Stage 1 (Class 2 vs All) Kappa: 0.06
Stage 1 (Class 2 vs All) F1 Score: 0.06
Stage 2 (Class 0 vs Class 1) Accuracy: 0.80
Stage 2 (Class 0 vs Class 1) Kappa: 0.62
Stage 2 (Class 0 vs Class 1) F1 Score: 0.77
Overall Accuracy: 0.80
Overall Kappa: 0.63
Overall F1 Score: 0.78
Best parameters found by Grid Search for Stage 1: {'max_depth': 20, 'min_samples_split': 2, 'n_estimators': 50}


### Cascaded model with best params (depricated)`

In [None]:
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score
# import numpy as np

# # Prepare training and test sets
# X_train = features
# X_test = features_val
# Y_train = Labels
# Y_test = Labels_val

# # Ensure Y_train and Y_test are integer arrays
# Y_train = np.array(Y_train, dtype=int)
# Y_test = np.array(Y_test, dtype=int)

# # Standard scaling of features
# scaler = StandardScaler()
# X_train_scaled = scaler.fit_transform(X_train)
# X_test_scaled = scaler.transform(X_test)

# # PCA for dimensionality reduction
# pca = PCA(n_components=35)
# X_train_pca = pca.fit_transform(X_train_scaled)
# X_test_pca = pca.transform(X_test_scaled)

# # Parameters for Random Forest models
# rf_params = {
#     'max_depth': 20,
#     'min_samples_split': 2,
#     'n_estimators': 50,
#     'random_state': 42
# }

# # Stage 1: Random Forest Model - Class 2 vs. All
# Y_train_stage1 = (Y_train == 2).astype(int)
# Y_test_stage1 = (Y_test == 2).astype(int)

# # Train first-stage classifier
# model_stage1 = RandomForestClassifier(**rf_params)
# model_stage1.fit(X_train_pca, Y_train_stage1)
# y_pred_stage1 = model_stage1.predict(X_test_pca)

# # Evaluate Stage 1
# accuracy_stage1 = accuracy_score(Y_test_stage1, y_pred_stage1)
# kappa_stage1 = cohen_kappa_score(Y_test_stage1, y_pred_stage1)
# f1_stage1 = f1_score(Y_test_stage1, y_pred_stage1)
# print(f"Stage 1 (Class 2 vs All) Accuracy: {accuracy_stage1:.2f}")
# print(f"Stage 1 (Class 2 vs All) Kappa: {kappa_stage1:.2f}")
# print(f"Stage 1 (Class 2 vs All) F1 Score: {f1_stage1:.2f}")

# # Stage 2: Random Forest Model - Class 0 vs. Class 1
# X_test_stage2 = X_test_pca[y_pred_stage1 == 0]
# Y_test_stage2 = Y_test[y_pred_stage1 == 0]

# Y_train_stage2 = Y_train[Y_train != 2]
# X_train_stage2 = X_train_pca[Y_train != 2]

# model_stage2 = RandomForestClassifier(**rf_params)
# model_stage2.fit(X_train_stage2, Y_train_stage2)
# y_pred_stage2 = model_stage2.predict(X_test_stage2)

# # Evaluate Stage 2
# accuracy_stage2 = accuracy_score(Y_test_stage2, y_pred_stage2)
# kappa_stage2 = cohen_kappa_score(Y_test_stage2, y_pred_stage2)
# f1_stage2 = f1_score(Y_test_stage2, y_pred_stage2, average='weighted')
# print(f"Stage 2 (Class 0 vs Class 1) Accuracy: {accuracy_stage2:.2f}")
# print(f"Stage 2 (Class 0 vs Class 1) Kappa: {kappa_stage2:.2f}")
# print(f"Stage 2 (Class 0 vs Class 1) F1 Score: {f1_stage2:.2f}")

# # Combine predictions for overall evaluation
# overall_predictions = np.where(y_pred_stage1 == 1, 2, -1)  # Assign Class 2 for predictions from Stage 1
# overall_predictions[y_pred_stage1 == 0] = y_pred_stage2  # Assign Class 0 or 1 from Stage 2 predictions

# # Calculate overall metrics
# overall_accuracy = accuracy_score(Y_test, overall_predictions)
# overall_kappa = cohen_kappa_score(Y_test, overall_predictions)
# overall_f1 = f1_score(Y_test, overall_predictions, average='weighted')
# print(f"Overall Accuracy: {overall_accuracy:.2f}")
# print(f"Overall Kappa: {overall_kappa:.2f}")
# print(f"Overall F1 Score: {overall_f1:.2f}")


Stage 1 (Class 2 vs All) Accuracy: 0.93
Stage 1 (Class 2 vs All) Kappa: 0.08
Stage 1 (Class 2 vs All) F1 Score: 0.08
Stage 2 (Class 0 vs Class 1) Accuracy: 0.79
Stage 2 (Class 0 vs Class 1) Kappa: 0.61
Stage 2 (Class 0 vs Class 1) F1 Score: 0.77
Overall Accuracy: 0.80
Overall Kappa: 0.61
Overall F1 Score: 0.77
