### Testing Features ###

In [2]:
import cv2
import numpy as np
import os
from skimage.feature import graycomatrix, graycoprops
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier


In [3]:
def extract_glcm_features(image):
    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Compute the GLCM (Gray Level Co-occurrence Matrix)
    glcm = greycomatrix(gray_image, distances=[1], angles=[0], symmetric=True, normed=True)
    
    # Extract texture properties: Contrast, Correlation, Energy, and Homogeneity
    contrast = greycoprops(glcm, 'contrast')[0, 0]
    correlation = greycoprops(glcm, 'correlation')[0, 0]
    energy = greycoprops(glcm, 'energy')[0, 0]
    homogeneity = greycoprops(glcm, 'homogeneity')[0, 0]
    
    return [contrast, correlation, energy, homogeneity]


In [4]:
def extract_hog_features(image):
    # Convert to grayscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Compute HOG features
    hog_features, hog_image = hog(gray_image, 
                                  pixels_per_cell=(16, 16), 
                                  cells_per_block=(2, 2), 
                                  block_norm='L2-Hys', 
                                  visualize=True, 
                                  multichannel=False)
    
    return hog_features


In [5]:
def load_images_from_folder(folder):
    images = []
    labels = []
    
    # Loop through subfolders, where each folder represents a class (e.g., Benign or Malignant)
    for label_folder in os.listdir(folder):
        label_path = os.path.join(folder, label_folder)
        if os.path.isdir(label_path):
            for filename in os.listdir(label_path):
                img_path = os.path.join(label_path, filename)
                img = cv2.imread(img_path)  # Read the image
                if img is not None:
                    images.append(img)
                    labels.append(label_folder)  # The folder name becomes the class label
    return images, labels


In [6]:
def prepare_data(X_images):
    X_features = []
    
    for img in X_images:
        # Extract GLCM (texture) features
        glcm_features = extract_glcm_features(img)
        
        # Extract HOG (shape) features
        hog_features = extract_hog_features(img)
        
        # Combine both sets of features
        combined_features = np.concatenate((glcm_features, hog_features))
        
        # Add the combined features to the feature list
        X_features.append(combined_features)
        
    return np.array(X_features)


In [8]:
# Load dataset (assuming images are stored in 'data' folder)
X_images, y_labels = load_images_from_folder('ISIC-images')

# Prepare feature set by extracting features from the images
X_features = prepare_data(X_images)

# Split dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_features, y_labels, test_size=0.2, random_state=42)

# Scale the feature data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [9]:
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops
from scipy.stats import skew

def preprocess_image(image_path, target_size=(300, 300)):
    # Load image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Resize
    img = cv2.resize(img, target_size)
    
    # Extract the circular region of interest
    mask = np.zeros(img.shape[:2], dtype=np.uint8)
    cv2.circle(mask, (img.shape[1]//2, img.shape[0]//2), min(img.shape[0], img.shape[1])//2 - 10, (255), -1)
    img = cv2.bitwise_and(img, img, mask=mask)
    
    # Enhance contrast
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    enhanced_lab = cv2.merge((cl,a,b))
    img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
    
    # Normalize
    img = img.astype(np.float32) / 255.0
    
    return img

def extract_color_features(img):
    features = []
    
    # Color moments (mean, std, skewness) for each channel
    for i in range(3):
        channel = img[:,:,i]
        features.extend([channel[channel != 0].mean(), channel[channel != 0].std(), skew(channel[channel != 0].ravel())])
    
    # Color histograms
    for i in range(3):
        hist = cv2.calcHist([img], [i], None, [32], [0, 1])
        features.extend(hist.flatten())
    
    return features

def extract_texture_features(img):
    features = []
    
    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    
    # Local Binary Patterns
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    features.extend(hist)
    
    # Haralick texture features
    glcm = graycomatrix(gray, [5], [0], 256, symmetric=True, normed=True)
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
    for prop in props:
        features.extend(graycoprops(glcm, prop).flatten())
    
    return features

def extract_shape_features(img):
    features = []
    
    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        cnt = max(contours, key=cv2.contourArea)
        
        # Area and Perimeter
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt, True)
        features.extend([area, perimeter])
        
        # Circularity
        circularity = 4 * np.pi * area / (perimeter ** 2)
        features.append(circularity)
        
        # Asymmetry
        (x, y), (MA, ma), angle = cv2.fitEllipse(cnt)
        asymmetry = MA / ma
        features.append(asymmetry)
        
        # Border irregularity
        hull = cv2.convexHull(cnt)
        hull_area = cv2.contourArea(hull)
        solidity = float(area) / hull_area
        features.append(solidity)
    else:
        features.extend([0] * 5)  # Placeholder if no contour found
    
    return features

def analyze_lesion(image_path):
    img = preprocess_image(image_path)
    
    color_features = extract_color_features(img)
    texture_features = extract_texture_features(img)
    shape_features = extract_shape_features(img)
    
    all_features = np.concatenate([color_features, texture_features, shape_features])
    
    return all_features

# Usage
image_path = "path_to_your_image.jpg"
features = analyze_lesion(image_path)
print(f"Extracted {len(features)} features from the image.")

[ WARN:0@400.466] global loadsave.cpp:241 findDecoder imread_('path_to_your_image.jpg'): can't open/read file: check file path/integrity


error: OpenCV(4.10.0) /Users/xperience/GHA-Actions-OpenCV/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'


In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops
from scipy.stats import skew
from tqdm import tqdm

def preprocess_image(img, target_size=(300, 300)):
    # Resize
    img = cv2.resize(img, target_size)
    
    # Extract the circular region of interest
    mask = np.zeros(img.shape[:2], dtype=np.uint8)
    cv2.circle(mask, (img.shape[1]//2, img.shape[0]//2), min(img.shape[0], img.shape[1])//2 - 10, (255), -1)
    img = cv2.bitwise_and(img, img, mask=mask)
    
    # Enhance contrast
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    enhanced_lab = cv2.merge((cl,a,b))
    img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
    
    # Normalize
    img = img.astype(np.float32) / 255.0
    
    return img

def extract_color_features(img):
    features = []
    
    # Color moments (mean, std, skewness) for each channel
    for i in range(3):
        channel = img[:,:,i]
        features.extend([channel[channel != 0].mean(), channel[channel != 0].std(), skew(channel[channel != 0].ravel())])
    
    # Color histograms
    for i in range(3):
        hist = cv2.calcHist([img], [i], None, [32], [0, 1])
        features.extend(hist.flatten())
    
    return features

def gabor_features(img, frequencies=[0.1, 0.2, 0.3], orientations=[0, 45, 90, 135]):
    features = []
    for theta in orientations:
        for frequency in frequencies:
            # Create Gabor filter
            kernel = cv2.getGaborKernel((21, 21), 8.0, np.radians(theta), frequency, 0.5, 0, ktype=cv2.CV_32F)
            filtered = cv2.filter2D(img, cv2.CV_8UC3, kernel)

            # Calculate mean and standard deviation of the filtered image
            features.append(filtered.mean())
            features.append(filtered.std())
    
    return features

def extract_texture_features(img):
    features = []
    
    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    
    # Local Binary Patterns
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    features.extend(hist)
    
    # Haralick texture features
    glcm = graycomatrix(gray, [5], [0], 256, symmetric=True, normed=True)
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
    for prop in props:
        features.extend(graycoprops(glcm, prop).flatten())
    
    # Gabor filter features
    features.extend(gabor_features(gray))  # Add Gabor features
    
    return features

def extract_shape_features(img):
    features = []
    
    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        cnt = max(contours, key=cv2.contourArea)
        
        # Area and Perimeter
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt, True)
        features.extend([area, perimeter])
        
        # Circularity
        circularity = 4 * np.pi * area / (perimeter ** 2)
        features.append(circularity)
        
        # Asymmetry
        (x, y), (MA, ma), angle = cv2.fitEllipse(cnt)
        asymmetry = MA / ma
        features.append(asymmetry)
        
        # Border irregularity
        hull = cv2.convexHull(cnt)
        hull_area = cv2.contourArea(hull)
        solidity = float(area) / hull_area
        features.append(solidity)
    else:
        features.extend([0] * 5)  # Placeholder if no contour found
    
    return features

def analyze_lesion(img):
    img = preprocess_image(img)
    
    color_features = extract_color_features(img)
    texture_features = extract_texture_features(img)
    shape_features = extract_shape_features(img)
    
    all_features = np.concatenate([color_features, texture_features, shape_features])
    
    return all_features

def process_image_folder(folder_path):
    all_features = []
    image_names = []
    
    # Get all image files
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    for image_file in tqdm(image_files, desc="Processing images"):
        image_path = os.path.join(folder_path, image_file)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        features = analyze_lesion(img)
        all_features.append(features)
        image_names.append(image_file)
    
    # Create a DataFrame with the features
    feature_names = [f'feature_{i}' for i in range(len(all_features[0]))]
    df = pd.DataFrame(all_features, columns=feature_names)
    df['image_name'] = image_names
    
    return df

# Usage
folder_path = "ISIC-images"
results_df = process_image_folder(folder_path)

# Save the results
results_df.to_csv("skin_lesion_features.csv", index=False)
print(f"Processed {len(results_df)} images. Results saved to 'skin_lesion_features.csv'")


Processing images: 100%|██████████| 25431/25431 [1:22:39<00:00,  5.13it/s]


Processed 25431 images. Results saved to 'skin_lesion_features.csv'


In [4]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import local_binary_pattern
from skimage.feature import graycomatrix, graycoprops
from scipy.stats import skew
from tqdm import tqdm

def preprocess_image(img, target_size=(300, 300)):
    # Resize
    img = cv2.resize(img, target_size)
    
    # Extract the circular region of interest
    mask = np.zeros(img.shape[:2], dtype=np.uint8)
    cv2.circle(mask, (img.shape[1]//2, img.shape[0]//2), min(img.shape[0], img.shape[1])//2 - 10, (255), -1)
    img = cv2.bitwise_and(img, img, mask=mask)
    
    # Enhance contrast
    lab = cv2.cvtColor(img, cv2.COLOR_RGB2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    enhanced_lab = cv2.merge((cl,a,b))
    img = cv2.cvtColor(enhanced_lab, cv2.COLOR_LAB2RGB)
    
    # Normalize
    img = img.astype(np.float32) / 255.0
    
    return img

def extract_color_features(img):
    features = {}
    channels = ['R', 'G', 'B']
    
    # Color moments (mean, std, skewness) for each channel
    for i, channel in enumerate(channels):
        channel_data = img[:,:,i]
        features[f'color_mean_{channel}'] = channel_data[channel_data != 0].mean()
        features[f'color_std_{channel}'] = channel_data[channel_data != 0].std()
        features[f'color_skew_{channel}'] = skew(channel_data[channel_data != 0].ravel())
    
    # Color histograms
    for i, channel in enumerate(channels):
        hist = cv2.calcHist([img], [i], None, [32], [0, 1])
        for j, value in enumerate(hist.flatten()):
            features[f'color_hist_{channel}_{j}'] = value
    
    return features

def extract_texture_features(img):
    features = {}
    
    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    
    # Local Binary Patterns
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    for i, value in enumerate(hist):
        features[f'lbp_{i}'] = value
    
    # Haralick texture features
    glcm = graycomatrix(gray, [5], [0], 256, symmetric=True, normed=True)
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
    for prop in props:
        value = graycoprops(glcm, prop)[0, 0]
        features[f'haralick_{prop}'] = value
    
    return features

def extract_shape_features(img):
    features = {}
    
    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    if contours:
        cnt = max(contours, key=cv2.contourArea)
        
        # Area and Perimeter
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt, True)
        features['shape_area'] = area
        features['shape_perimeter'] = perimeter
        
        # Circularity
        circularity = 4 * np.pi * area / (perimeter ** 2)
        features['shape_circularity'] = circularity
        
        # Asymmetry
        (x, y), (MA, ma), angle = cv2.fitEllipse(cnt)
        asymmetry = MA / ma
        features['shape_asymmetry'] = asymmetry
        
        # Border irregularity
        hull = cv2.convexHull(cnt)
        hull_area = cv2.contourArea(hull)
        solidity = float(area) / hull_area
        features['shape_border_irregularity'] = solidity
    else:
        for feature in ['shape_area', 'shape_perimeter', 'shape_circularity', 'shape_asymmetry', 'shape_border_irregularity']:
            features[feature] = 0
    
    return features

def analyze_lesion(img):
    img = preprocess_image(img)
    
    features = {}
    features.update(extract_color_features(img))
    features.update(extract_texture_features(img))
    features.update(extract_shape_features(img))
    
    return features

def process_image_folder(folder_path):
    all_features = []
    image_names = []
    
    # Get all image files
    image_files = [f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    for image_file in tqdm(image_files, desc="Processing images"):
        image_path = os.path.join(folder_path, image_file)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        features = analyze_lesion(img)
        all_features.append(features)
        image_names.append(image_file)
    
    # Create a DataFrame with the features
    df = pd.DataFrame(all_features)
    df['image_name'] = image_names
    
    return df

# Usage
folder_path = "ISIC-images"
results_df = process_image_folder(folder_path)

# Save the results
results_df.to_csv("skin_lesion_features.csv", index=False)
print(f"Processed {len(results_df)} images. Results saved to 'skin_lesion_features.csv'")
print(f"Total number of features: {len(results_df.columns) - 1}")  # -1 for the image_name column

Processing images: 100%|██████████| 100/100 [00:02<00:00, 36.93it/s]

Processed 100 images. Results saved to 'skin_lesion_features.csv'
Total number of features: 141





In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load the newly generated data after improving feature extraction
df = pd.read_csv("skin_lesion_features.csv")

# Step 2: Assume we have a target variable called 'label'
# You should replace this with the actual labels after loading the proper dataset
df['label'] = np.random.choice(['benign', 'malignant'], size=len(df))

# Step 3: Separate features and target
X = df.drop(['image_name', 'label'], axis=1)
y = df['label']

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 6: SVM Model - Perform Grid Search for hyperparameter tuning
svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['rbf', 'poly', 'linear'],  # Added 'linear' kernel for feature importance
    'gamma': ['scale', 'auto', 0.1, 1]
}

svm_grid = GridSearchCV(SVC(random_state=42), svm_param_grid, cv=5, n_jobs=-1)
svm_grid.fit(X_train_scaled, y_train)

# Step 7: Evaluate SVM Model
print("SVM Best Parameters:", svm_grid.best_params_)
print("SVM Best Score:", svm_grid.best_score_)

svm_pred = svm_grid.predict(X_test_scaled)
print("\nSVM Classification Report:")
print(classification_report(y_test, svm_pred))

# Step 8: MLP Model - Perform Grid Search for hyperparameter tuning
mlp_param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}

mlp_grid = GridSearchCV(MLPClassifier(random_state=42, max_iter=1000), mlp_param_grid, cv=5, n_jobs=-1)
mlp_grid.fit(X_train_scaled, y_train)

# Step 9: Evaluate MLP Model
print("\nMLP Best Parameters:", mlp_grid.best_params_)
print("MLP Best Score:", mlp_grid.best_score_)

mlp_pred = mlp_grid.predict(X_test_scaled)
print("\nMLP Classification Report:")
print(classification_report(y_test, mlp_pred))

# Step 10: Plot Confusion Matrices for both SVM and MLP
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(title)
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

plot_confusion_matrix(y_test, svm_pred, "SVM Confusion Matrix")
plot_confusion_matrix(y_test, mlp_pred, "MLP Confusion Matrix")

# Step 11: Feature Importance (for SVM with linear kernel)
if svm_grid.best_estimator_.kernel == 'linear':
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': abs(svm_grid.best_estimator_.coef_[0])
    }).sort_values('importance', ascending=False)
    
    plt.figure(figsize=(10, 6))
    sns.barplot(x='importance', y='feature', data=feature_importance.head(20))
    plt.title('Top 20 Important Features (SVM)')
    plt.show()

# Step 12: Predictions on New Data
new_data = X_test_scaled[:5]  # Example: First 5 samples of the test set
svm_new_pred = svm_grid.predict(new_data)
mlp_new_pred = mlp_grid.predict(new_data)

print("\nSVM Predictions on new data:", svm_new_pred)
print("MLP Predictions on new data:", mlp_new_pred)


In [1]:
# FROM HERE ON IS MY PART THAT IS WORKING .....
import os
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

def load_dataset(folder_path):
    data = []
    labels = []

    for label in ['malignant', 'benign']:
        folder = os.path.join(folder_path, label)
        for img_name in tqdm(os.listdir(folder), desc=f"Loading {label} images"):
            img_path = os.path.join(folder, img_name)
            img = cv2.imread(img_path)
            if img is not None:
                if len(img.shape) == 2:  # Grayscale image (2D)
                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)  # Convert grayscale to RGB
                else:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert to RGB from BGR
                data.append(img)
                labels.append(label)
    
    return np.array(data), np.array(labels)


# Load train and test sets
train_data, train_labels = load_dataset("train")
test_data, test_labels = load_dataset("test")

# Convert labels to binary format (0 for benign, 1 for malignant)
train_labels = np.where(train_labels == 'benign', 0, 1)
test_labels = np.where(test_labels == 'benign', 0, 1)


Loading malignant images: 100%|██████████| 1197/1197 [00:06<00:00, 172.47it/s]
Loading benign images: 100%|██████████| 1440/1440 [00:04<00:00, 347.13it/s]
Loading malignant images: 100%|██████████| 300/300 [00:01<00:00, 238.16it/s]
Loading benign images: 100%|██████████| 360/360 [00:02<00:00, 136.50it/s]


In [16]:
def extract_shape_features(img):
    features = []

    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if contours:
        cnt = max(contours, key=cv2.contourArea)

        # Area and Perimeter
        area = cv2.contourArea(cnt)
        perimeter = cv2.arcLength(cnt, True)
        features.extend([area, perimeter])

        # Circularity
        circularity = 4 * np.pi * area / (perimeter ** 2)
        features.append(circularity)

        # Check if contour has at least 5 points before fitting an ellipse
        if len(cnt) >= 5:
            (x, y), (MA, ma), angle = cv2.fitEllipse(cnt)
            asymmetry = MA / ma
            features.append(asymmetry)
        else:
            # Add a placeholder value if ellipse fitting is not possible
            features.append(0)

        # Border irregularity
        hull = cv2.convexHull(cnt)
        hull_area = cv2.contourArea(hull)
        solidity = float(area) / hull_area
        features.append(solidity)
    else:
        # Placeholder if no contour is found
        features.extend([0] * 5)

    return features

def extract_texture_features(img):
    features = {}
    
    gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    
    # Local Binary Patterns
    radius = 3
    n_points = 8 * radius
    lbp = local_binary_pattern(gray, n_points, radius, method='uniform')
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    for i, value in enumerate(hist):
        features[f'lbp_{i}'] = value
    
    # Haralick texture features
    glcm = graycomatrix(gray, [5], [0], 256, symmetric=True, normed=True)
    props = ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation']
    for prop in props:
        value = graycoprops(glcm, prop)[0, 0]
        features[f'haralick_{prop}'] = value
    
    return features

def extract_color_features(img):
    features = []
    
    # Color moments (mean, std, skewness) for each channel
    for i in range(3):
        channel = img[:,:,i]
        features.extend([channel[channel != 0].mean(), channel[channel != 0].std(), skew(channel[channel != 0].ravel())])
    
    # Color histograms
    for i in range(3):
        hist = cv2.calcHist([img], [i], None, [32], [0, 1])
        features.extend(hist.flatten())
    
    return features

def preprocess_image(img):    
    # Check if the image is RGB (3 channels)
    if len(img.shape) == 3 and img.shape[2] == 3:  # RGB image
        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)  # Convert to grayscale
    else:  # Image is already grayscale (2D)
        gray_img = img
    
    # Apply CLAHE to the grayscale image
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_img = clahe.apply(gray_img)
    
    return clahe_img

def analyze_lesion(img):
    # Check if the image is grayscale (2D) and convert it to RGB (3D) if necessary
    if len(img.shape) == 2:  # If the image is grayscale
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)  # Convert grayscale to RGB

    color_features = extract_color_features(img)
    texture_features = extract_texture_features(img)
    shape_features = extract_shape_features(img)

    return np.concatenate([color_features, texture_features, shape_features])


def process_dataset(data, labels):
    features = []
    for img in tqdm(data, desc="Processing images"):
        processed_img = preprocess_image(img)  # Preprocess the image
        img_features = analyze_lesion(processed_img)  # Extract color, texture, shape features
        features.append(img_features)
    
    features = np.array(features)
    return features, labels

# Process train and test sets
X_train, y_train = process_dataset(train_data, train_labels)
X_test, y_test = process_dataset(test_data, test_labels)

Processing images: 100%|██████████| 2637/2637 [02:48<00:00, 15.68it/s]
Processing images: 100%|██████████| 660/660 [00:40<00:00, 16.29it/s]


In [17]:

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Split and scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM Model
svm_param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['rbf', 'poly'],
    'gamma': ['scale', 'auto', 0.1, 1]
}

svm_grid = GridSearchCV(SVC(random_state=42), svm_param_grid, cv=5, n_jobs=-1)
svm_grid.fit(X_train_scaled, y_train)

print("SVM Best Parameters:", svm_grid.best_params_)
print("SVM Best Score:", svm_grid.best_score_)

svm_pred = svm_grid.predict(X_test_scaled)
print("\nSVM Classification Report:")
print(classification_report(y_test, svm_pred))

# MLP Model
mlp_param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}

mlp_grid = GridSearchCV(MLPClassifier(random_state=42, max_iter=1000), mlp_param_grid, cv=5, n_jobs=-1)
mlp_grid.fit(X_train_scaled, y_train)

print("\nMLP Best Parameters:", mlp_grid.best_params_)
print("MLP Best Score:", mlp_grid.best_score_)

mlp_pred = mlp_grid.predict(X_test_scaled)
print("\nMLP Classification Report:")
print(classification_report(y_test, mlp_pred))


SVM Best Parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}
SVM Best Score: 0.8028082053935943

SVM Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.85      0.85       360
           1       0.82      0.80      0.81       300

    accuracy                           0.83       660
   macro avg       0.83      0.83      0.83       660
weighted avg       0.83      0.83      0.83       660


MLP Best Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant'}
MLP Best Score: 0.8035672186763267

MLP Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.82      0.83       360
           1       0.79      0.80      0.79       300

    accuracy                           0.81       660
   macro avg       0.81      0.81      0.81       660
weighted avg       0.81      0.81      0.81       660



### Arnav Karnik ###

In [6]:
import cv2
import numpy as np
from skimage import feature, exposure, filters, segmentation, measure
from scipy.stats import skew, kurtosis
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern, hog
from skimage.filters import gabor, frangi, sato
from scipy.spatial import distance
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import os
from tqdm import tqdm
import pandas as pd

class SkinLesionAnalyzer:
    def __init__(self):
        self.results = {}
        
    def preprocess_image(self, image):
        """
        Enhanced preprocessing pipeline with advanced CV techniques
        """
        image = image.astype(np.float32) / 255.0
        original = image.copy()
        
        # Multi-scale segmentation
        mask = self.create_lesion_mask_multiscale(image)
        
        # Enhanced preprocessing pipeline
        preprocessed = self.apply_preprocessing_pipeline(image, mask)
        
        self.results['original'] = original
        self.results['mask'] = mask
        self.results['preprocessed'] = preprocessed
        
        return preprocessed
    
    def create_lesion_mask_multiscale(self, image):
        """
        Create binary mask using multi-scale analysis
        """
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Multi-scale Gaussian pyramid
        masks = []
        current = gray.copy()
        for _ in range(3):  # Analysis at 3 scales
            # Apply SLIC superpixels
            segments = segmentation.slic(current, n_segments=100, compactness=10)
            
            # Graph-based segmentation
            graph_mask = segmentation.felzenszwalb(current, scale=100, sigma=0.5, min_size=50)
            
            # Watershed segmentation
            gradient = filters.sobel(current)
            markers = measure.label(gradient < gradient.mean())
            watershed_mask = segmentation.watershed(gradient, markers)
            
            # Combine segmentations
            combined = (segments > 0) & (graph_mask > 0) & (watershed_mask > 0)
            masks.append(cv2.resize(combined.astype(float), (gray.shape[1], gray.shape[0])))
            
            # Downsample for next scale
            current = cv2.pyrDown(current)
        
        # Combine masks from different scales
        final_mask = np.mean(masks, axis=0) > 0.5
        
        # Post-process mask
        kernel = np.ones((5,5), np.uint8)
        final_mask = cv2.morphologyEx(final_mask.astype(np.uint8), cv2.MORPH_CLOSE, kernel)
        final_mask = cv2.morphologyEx(final_mask, cv2.MORPH_OPEN, kernel)
        
        return final_mask
    
    def apply_preprocessing_pipeline(self, image, mask):
        """
        Enhanced preprocessing pipeline with advanced filtering
        """
        # Non-local means denoising
        processed = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
        
        # Hair removal with enhanced detection
        processed = self.remove_hair_advanced(processed)
        
        # Illumination correction using DoG
        processed = self.correct_illumination(processed)
        
        # Advanced color normalization
        processed = self.normalize_color_advanced(processed)
        
        # Multi-scale contrast enhancement
        processed = self.enhance_contrast_multiscale(processed)
        
        # Apply mask
        processed = processed * np.stack([mask]*3, axis=-1)
        
        return processed
    
    def remove_hair_advanced(self, image):
        """
        Advanced hair removal using multi-scale line detection
        """
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Multi-scale line detection
        hair_mask = np.zeros_like(gray)
        for sigma in [1, 2, 3]:
            # Frangi filter for line detection
            line_mask = frangi(gray, sigmas=range(1, sigma + 1), black_ridges=True)
            hair_mask = np.maximum(hair_mask, line_mask)
        
        # Threshold and clean up
        hair_mask = (hair_mask > 0.1).astype(np.uint8)
        
        # Dilate to ensure complete hair coverage
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
        hair_mask = cv2.dilate(hair_mask, kernel, iterations=1)
        
        # Inpaint using Navier-Stokes method
        hair_removed = cv2.inpaint(image, hair_mask, 5, cv2.INPAINT_NS)
        
        return hair_removed
    
    def correct_illumination(self, image):
        """
        Correct uneven illumination using Difference of Gaussians
        """
        # Convert to LAB
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        l, a, b = cv2.split(lab)
        
        # Apply DoG
        gaussian1 = cv2.GaussianBlur(l, (0,0), 15)
        gaussian2 = cv2.GaussianBlur(l, (0,0), 2)
        dog = gaussian1 - gaussian2
        
        # Normalize and enhance L channel
        l = exposure.rescale_intensity(l - dog)
        
        # Merge channels
        corrected = cv2.merge([l, a, b])
        return cv2.cvtColor(corrected, cv2.COLOR_LAB2RGB)
    
    def normalize_color_advanced(self, image):
        """
        Advanced color normalization using color transfer and statistics
        """
        # Convert to LAB
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        
        # Color clustering
        pixels = lab.reshape(-1, 3)
        kmeans = KMeans(n_clusters=5, random_state=42)
        labels = kmeans.fit_predict(pixels)
        
        # Find dominant colors
        dominant_colors = kmeans.cluster_centers_
        
        # Normalize based on dominant colors
        normalized = np.zeros_like(lab, dtype=np.float32)
        for i in range(len(dominant_colors)):
            mask = (labels == i).reshape(lab.shape[:2])
            color_mean = dominant_colors[i]
            color_std = np.std(pixels[labels == i], axis=0)
            
            # Apply color transfer
            for c in range(3):
                channel = lab[:,:,c]
                normalized[:,:,c][mask] = ((channel[mask] - np.mean(channel[mask])) / 
                                         (np.std(channel[mask]) + 1e-6) * color_std[c] + 
                                         color_mean[c])
        
        # Convert back to RGB
        return cv2.cvtColor(normalized.astype(np.uint8), cv2.COLOR_LAB2RGB)
    
    def enhance_contrast_multiscale(self, image):
        """
        Multi-scale contrast enhancement using pyramid decomposition
        """
        # Convert to LAB
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        l, a, b = cv2.split(lab)
        
        # Build Gaussian pyramid
        pyramids = []
        current = l.copy()
        for _ in range(3):
            pyramids.append(current)
            current = cv2.pyrDown(current)
        
        # Enhance each level
        enhanced_pyramids = []
        for level in pyramids:
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            enhanced = clahe.apply(np.uint8(level * 255)) / 255.0
            enhanced_pyramids.append(enhanced)
        
        # Reconstruct image
        enhanced_l = enhanced_pyramids[0]
        for i in range(1, len(enhanced_pyramids)):
            enhanced = cv2.resize(enhanced_pyramids[i], 
                                (enhanced_l.shape[1], enhanced_l.shape[0]))
            enhanced_l = cv2.addWeighted(enhanced_l, 0.7, enhanced, 0.3, 0)
        
        # Merge channels
        enhanced = cv2.merge([enhanced_l, a, b])
        return cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB)
    
    def extract_features(self, image):
        """
        Extract enhanced feature set using advanced CV techniques
        """
        features = {}
        
        # Basic features
        features['shape'] = self.extract_shape_features(image)
        features['color'] = self.extract_color_features(image)
        features['texture'] = self.extract_texture_features(image)
        features['border'] = self.extract_border_features(image)
        
        # Advanced features
        features['hog'] = self.extract_hog_features(image)
        features['gabor'] = self.extract_gabor_features(image)
        features['fractal'] = self.extract_fractal_features(image)
        
        return features
    
    def extract_hog_features(self, image):
        """
        Extract Histogram of Oriented Gradients features
        """
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Calculate HOG features
        features, hog_image = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                                cells_per_block=(2, 2), visualize=True)
        
        return {
            'hog_mean': np.mean(features),
            'hog_std': np.std(features),
            'hog_max': np.max(features)
        }
    
    def extract_gabor_features(self, image):
        """
        Extract Gabor filter features
        """
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        features = {}
        for theta in range(4):  # 4 orientations
            for sigma in [1, 2]:  # 2 scales
                filt_real, filt_imag = gabor(gray, frequency=0.6,
                                           theta=theta * np.pi / 4,
                                           sigma_x=sigma, sigma_y=sigma)
                
                features[f'gabor_mean_{theta}_{sigma}'] = np.mean(filt_real)
                features[f'gabor_var_{theta}_{sigma}'] = np.var(filt_real)
                
        return features
    
    def extract_fractal_features(self, image):
        """
        Extract fractal dimension features using box-counting
        """
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        
        # Calculate edges using Canny
        edges = cv2.Canny(np.uint8(gray * 255), 100, 200)
        
        # Box counting
        box_sizes = np.array([2, 3, 4, 6, 8, 12, 16, 32, 64])
        counts = []
        
        for size in box_sizes:
            count = np.sum(measure.block_reduce(edges, (size, size),
                                              func=np.max) > 0)
            counts.append(count)
            
        coeffs = np.polyfit(np.log(box_sizes), np.log(counts), 1)
        
        return {
            'fractal_dimension': -coeffs[0],
            'fractal_intercept': coeffs[1]
        }


In [7]:
# [Previous code remains the same until the visualization methods]

def visualize_analysis(self):
    """
    Enhanced visualization of the analysis results with multiple plots
    """
    # Create a larger figure with subplots
    fig = plt.figure(figsize=(20, 15))
    gs = fig.add_gridspec(3, 3)
    
    # Original and basic processing
    ax1 = fig.add_subplot(gs[0, 0])
    ax1.imshow(self.results['original'])
    ax1.set_title('Original Image')
    ax1.axis('off')
    
    ax2 = fig.add_subplot(gs[0, 1])
    ax2.imshow(self.results['mask'], cmap='gray')
    ax2.set_title('Lesion Mask')
    ax2.axis('off')
    
    ax3 = fig.add_subplot(gs[0, 2])
    ax3.imshow(self.results['preprocessed'])
    ax3.set_title('Preprocessed Image')
    ax3.axis('off')
    
    # Advanced visualizations
    if 'mask' in self.results:
        # Border detection
        ax4 = fig.add_subplot(gs[1, 0])
        contours, _ = cv2.findContours(self.results['mask'].astype(np.uint8), 
                                       cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        border_img = self.results['original'].copy()
        cv2.drawContours(border_img, contours, -1, (0, 1, 0), 2)
        ax4.imshow(border_img)
        ax4.set_title('Border Detection')
        ax4.axis('off')
        
        # HOG visualization
        ax5 = fig.add_subplot(gs[1, 1])
        gray = cv2.cvtColor(self.results['preprocessed'], cv2.COLOR_RGB2GRAY)
        features, hog_image = hog(gray, orientations=9, pixels_per_cell=(8, 8),
                                  cells_per_block=(2, 2), visualize=True)
        ax5.imshow(hog_image, cmap='gray')
        ax5.set_title('HOG Features')
        ax5.axis('off')
        
        # Gabor filter visualization
        ax6 = fig.add_subplot(gs[1, 2])
        filt_real, _ = gabor(gray, frequency=0.6, theta=0, sigma_x=1, sigma_y=1)
        ax6.imshow(filt_real, cmap='gray')
        ax6.set_title('Gabor Filter Response')
        ax6.axis('off')
        
        # Multi-scale visualization
        ax7 = fig.add_subplot(gs[2, 0])
        gradient = filters.sobel(gray)
        ax7.imshow(gradient, cmap='gray')
        ax7.set_title('Gradient Magnitude')
        ax7.axis('off')
        
        # Color clustering visualization
        ax8 = fig.add_subplot(gs[2, 1])
        lab = cv2.cvtColor(self.results['preprocessed'], cv2.COLOR_RGB2LAB)
        pixels = lab.reshape(-1, 3)
        kmeans = KMeans(n_clusters=5, random_state=42)
        labels = kmeans.fit_predict(pixels)
        segmented = kmeans.cluster_centers_[labels].reshape(lab.shape)
        segmented = cv2.cvtColor(segmented.astype(np.uint8), cv2.COLOR_LAB2RGB)
        ax8.imshow(segmented)
        ax8.set_title('Color Clusters')
        ax8.axis('off')
        
        # Fractal visualization
        ax9 = fig.add_subplot(gs[2, 2])
        edges = cv2.Canny(np.uint8(gray * 255), 100, 200)
        ax9.imshow(edges, cmap='gray')
        ax9.set_title('Edge Detection (Fractal Analysis)')
        ax9.axis('off')
    
    plt.tight_layout()
    plt.show()

def plot_feature_distributions(self, features):
    """
    Plot distributions of extracted features
    """
    # Create color distribution plot
    plt.figure(figsize=(15, 5))
    
    # RGB distribution
    plt.subplot(131)
    for i, channel in enumerate(['rgb_1_mean', 'rgb_2_mean', 'rgb_3_mean']):
        if channel in features['color']:
            plt.bar(i, features['color'][channel], color=['red', 'green', 'blue'][i])
    plt.title('RGB Channel Distribution')
    plt.xticks(range(3), ['R', 'G', 'B'])
    
    # Texture features
    plt.subplot(132)
    texture_features = [f for f in features['texture'].keys() if f.startswith('glcm')]
    plt.bar(range(len(texture_features)), [features['texture'][f] for f in texture_features])
    plt.title('GLCM Texture Features')
    plt.xticks(range(len(texture_features)), texture_features, rotation=45)
    
    # Shape features
    plt.subplot(133)
    shape_features = features['shape'].keys()
    plt.bar(range(len(shape_features)), [features['shape'][f] for f in shape_features])
    plt.title('Shape Features')
    plt.xticks(range(len(shape_features)), shape_features, rotation=45)
    
    plt.tight_layout()
    plt.show()

def generate_report(self, features):
    """
    Generate a comprehensive analysis report
    """
    print("=== SKIN LESION ANALYSIS REPORT ===\n")
    
    # Shape analysis
    print("SHAPE ANALYSIS:")
    print(f"Circularity: {features['shape']['circularity']:.3f}")
    print(f"Asymmetry: {1 - features['shape']['solidity']:.3f}")
    print(f"Border Irregularity: {1 - features['shape']['circularity']:.3f}\n")
    
    # Color analysis
    print("COLOR ANALYSIS:")
    color_variation = np.std([features['color'][f] for f in features['color'] if f.startswith('rgb')])
    print(f"Color Variation: {color_variation:.3f}")
    
    # Texture analysis
    print("\nTEXTURE ANALYSIS:")
    print(f"Homogeneity: {features['texture']['glcm_homogeneity']:.3f}")
    print(f"Contrast: {features['texture']['glcm_contrast']:.3f}")
    
    # Advanced features
    print("\nADVANCED FEATURES:")
    print(f"Fractal Dimension: {features['fractal']['fractal_dimension']:.3f}")
    print(f"HOG Feature Mean: {features['hog']['hog_mean']:.3f}")
    
    # Border features
    print("\nBORDER FEATURES:")
    print(f"Border Smoothness: {features['border']['smoothness']:.3f}")
    print(f"Gradient Magnitude: {features['border']['gradient_mean']:.3f}")

def main():
    """
    Example usage of the enhanced skin lesion analyzer
    """
    # Initialize analyzer
    analyzer = SkinLesionAnalyzer()
    
    # Example usage with sample image
    """
    # Load image
    image_path = 'skin_lesion.jpg'
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Could not load image from {image_path}")
        return
        
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Process image
    print("Processing image...")
    preprocessed = analyzer.preprocess_image(image)
    
    # Extract features
    print("Extracting features...")
    features = analyzer.extract_features(preprocessed)
    
    # Visualize results
    print("Generating visualizations...")
    analyzer.visualize_analysis()
    analyzer.plot_feature_distributions(features)
    
    # Generate report
    print("\nGenerating analysis report...")
    analyzer.generate_report(features)
    """

if __name__ == "__main__":
    main()


In [8]:
def analyze_lesion(self, image):
    """
    Complete analysis pipeline for a single image
    """
    # Preprocess
    preprocessed = self.preprocess_image(image)
    
    # Extract all features
    features = self.extract_features(preprocessed)
    
    # Flatten the nested dictionary of features
    flattened_features = {}
    for category, feature_dict in features.items():
        if feature_dict is not None:  # Check for None values
            for feature_name, value in feature_dict.items():
                flattened_features[f"{category}_{feature_name}"] = value
                
    return flattened_features

def process_image_folder(self, folder_path, save_visualizations=False, visualization_folder=None):
    """
    Process all images in a folder and compile results
    
    Parameters:
    folder_path (str): Path to folder containing images
    save_visualizations (bool): Whether to save visualization plots
    visualization_folder (str): Path to save visualizations (if save_visualizations is True)
    
    Returns:
    pandas.DataFrame: DataFrame containing features for all images
    """
    
    # Create visualization folder if needed
    if save_visualizations and visualization_folder:
        os.makedirs(visualization_folder, exist_ok=True)
    
    all_features = []
    image_names = []
    failed_images = []
    
    # Get all image files
    image_files = [f for f in os.listdir(folder_path) 
                   if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    # Process each image
    for image_file in tqdm(image_files, desc="Processing images"):
        try:
            # Load and process image
            image_path = os.path.join(folder_path, image_file)
            img = cv2.imread(image_path)
            
            if img is None:
                raise ValueError(f"Could not load image: {image_file}")
                
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
            # Analyze image
            features = self.analyze_lesion(img)
            
            # Save visualizations if requested
            if save_visualizations and visualization_folder:
                self.save_analysis_visualizations(
                    os.path.join(visualization_folder, f"{os.path.splitext(image_file)[0]}_analysis.png")
                )
            
            all_features.append(features)
            image_names.append(image_file)
            
        except Exception as e:
            print(f"\nError processing {image_file}: {str(e)}")
            failed_images.append((image_file, str(e)))
            continue
    
    # Create DataFrame
    df = pd.DataFrame(all_features)
    df['image_name'] = image_names
    
    # Generate batch analysis report
    self.generate_batch_analysis_report(df, failed_images)
    
    return df

def save_analysis_visualizations(self, output_path):
    """
    Save current analysis visualizations to file
    """
    # Create figure
    fig = plt.figure(figsize=(20, 15))
    
    # Add visualizations (same as visualize_analysis method)
    self.visualize_analysis()
    
    # Save to file
    plt.savefig(output_path, bbox_inches='tight', dpi=300)
    plt.close()

def generate_batch_analysis_report(self, df, failed_images):
    """
    Generate comprehensive report for batch processing results
    """
    print("\n=== BATCH PROCESSING REPORT ===")
    print(f"\nTotal images processed: {len(df)}")
    print(f"Number of features extracted: {len(df.columns) - 1}")  # -1 for image_name
    
    if failed_images:
        print("\nFailed images:")
        for img, error in failed_images:
            print(f"- {img}: {error}")
    
    print("\nFeature Statistics:")
    # Remove image_name column for statistics
    feature_df = df.drop('image_name', axis=1)
    
    # Calculate basic statistics
    stats = feature_df.describe()
    print("\nSummary Statistics:")
    print(stats)
    
    # Save statistics to file
    stats.to_csv("batch_processing_statistics.csv")
    
    # Generate feature distribution plots
    self.plot_batch_feature_distributions(feature_df)

def plot_batch_feature_distributions(self, feature_df):
    """
    Plot distribution of features across all processed images
    """
    # Create directory for plots
    os.makedirs("feature_distributions", exist_ok=True)
    
    # Plot distributions for different feature categories
    feature_categories = {
        'shape': [col for col in feature_df.columns if col.startswith('shape_')],
        'color': [col for col in feature_df.columns if col.startswith('color_')],
        'texture': [col for col in feature_df.columns if col.startswith('texture_')],
        'border': [col for col in feature_df.columns if col.startswith('border_')],
        'advanced': [col for col in feature_df.columns if any(x in col for x in ['hog_', 'gabor_', 'fractal_'])]
    }
    
    for category, features in feature_categories.items():
        if features:
            plt.figure(figsize=(15, 5))
            
            # Create box plots for features in this category
            feature_df[features].boxplot()
            plt.xticks(rotation=45, ha='right')
            plt.title(f'{category.capitalize()} Feature Distributions')
            plt.tight_layout()
            
            # Save plot
            plt.savefig(f"feature_distributions/{category}_distributions.png")
            plt.close()

def main():
    """
    Example usage of the enhanced skin lesion analyzer with batch processing
    """
    # Initialize analyzer
    analyzer = SkinLesionAnalyzer()
    
    # Example usage for batch processing
    
    # Process folder of images
    folder_path = "ISIC-images"
    visualization_folder = "analysis_visualizations"
    
    # Process images and save results
    results_df = analyzer.process_image_folder(
        folder_path,
        save_visualizations=True,
        visualization_folder=visualization_folder
    )
    
    # Save results to CSV
    results_df.to_csv("skin_lesion_features.csv", index=False)
    print(f"\nResults saved to 'skin_lesion_features.csv'")
    
    # Example of loading and analyzing a single image
    image_path = 'sample_lesion.jpg'
    image = cv2.imread(image_path)
    if image is not None:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Process single image
        preprocessed = analyzer.preprocess_image(image)
        features = analyzer.extract_features(preprocessed)
        
        # Visualize results
        analyzer.visualize_analysis()
        analyzer.plot_feature_distributions(features)
        analyzer.generate_report(features)

if __name__ == "__main__":
    main()


AttributeError: 'SkinLesionAnalyzer' object has no attribute 'process_image_folder'