In [1]:
import os
import numpy as np
import cv2
import pywt
from PIL import Image
from skimage.feature import local_binary_pattern
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from joblib import Parallel, delayed


In [2]:
# Define dataset paths
folder_benign_train = r'data/test/benign'
folder_malignant_train = r'data/train/malignant'
folder_benign_test = r'data/test/benign'
folder_malignant_test = r'data/test/malignant'
folder_healthy=r'data\healthy'
folder_not_skin=r'data\not_skin'

In [3]:
# Load and preprocess images
def read_image(imname):
    img = Image.open(imname).convert("L")  # Convert to grayscale
    return np.asarray(img, dtype=np.uint8)
def extract_contourlet_features(image):
    coeffs = pywt.wavedec2(image, 'db1', level=3)
    features = []
    for coeff in coeffs:
        if isinstance(coeff, tuple):
            for subband in coeff:
                features.append(np.mean(subband))
                features.append(np.var(subband))
        else:
            features.append(np.mean(coeff))
            features.append(np.var(coeff))
    return features

def extract_lbp_features(image, num_points=24, radius=3):
    lbp = local_binary_pattern(image, num_points, radius, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    return hist.astype("float") / hist.sum()



In [4]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        try:
            img = read_image(img_path)
            images.append(extract_features(img))
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
    return np.array(images, dtype="float")


In [5]:
from PIL import Image
import numpy as np
import os

def load_images_from_folder(folder, target_size=(100, 100)):
    images = []
    valid_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tiff")  # Supported formats
    
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        
        if not filename.lower().endswith(valid_extensions):
            print(f"Skipping non-image file: {filename}")
            continue  # Skip non-image files
        
        try:
            img = Image.open(img_path).convert("L")  # Convert to grayscale
            img = img.resize(target_size)  # Resize
            img = np.array(img, dtype=np.uint8)  # Convert to NumPy array
            images.append(img)
        except Exception as e:
            print(f"Error loading {img_path}: {e}")

    return np.array(images) if images else np.array([])

In [6]:
def extract_contourlet_features(image):
    if len(image.shape) == 3:  # Convert RGB to Grayscale if needed
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    elif len(image.shape) != 2:  # If image is not 2D, raise an error
        raise ValueError(f"Invalid image shape: {image.shape}. Expected 2D grayscale.")
    
    coeffs = pywt.wavedec2(image, 'db1', level=3)  # Apply wavelet decomposition
    features = []
    for coeff in coeffs:
        if isinstance(coeff, tuple):
            for subband in coeff:
                features.append(np.mean(subband))
                features.append(np.var(subband))
        else:
            features.append(np.mean(coeff))
            features.append(np.var(coeff))
    return features


In [7]:
# Local Binary Pattern Feature Extraction
def extract_lbp_features(image, num_points=24, radius=3):
    lbp = local_binary_pattern(image, num_points, radius, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    return hist.astype("float") / hist.sum()

# Combined Feature Extraction
def extract_features(image):
    contourlet_features = extract_contourlet_features(image)
    lbp_features = extract_lbp_features(image)
    return np.concatenate((contourlet_features, lbp_features))

# Parallel Processing for Feature Extraction
def process_images_in_parallel(images):
    return Parallel(n_jobs=-1)(delayed(extract_features)(img) for img in images)

In [9]:
# Load dataset
X_benign = load_images_from_folder(folder_benign_train)
X_malignant = load_images_from_folder(folder_malignant_train)
X_benign_test = load_images_from_folder(folder_benign_test)
X_malignant_test = load_images_from_folder(folder_malignant_test)
X_healthy=load_images_from_folder(folder_healthy)
X_not_skin=load_images_from_folder(folder_not_skin)
from sklearn.model_selection import train_test_split

# Assuming X_healthy and X_not_skin are small datasets
X_healthy_train, X_healthy_test, y_healthy_train, y_healthy_test = train_test_split(
    X_healthy, np.full(len(X_healthy), 2), test_size=0.2, random_state=42
)

X_not_skin_train, X_not_skin_test, y_not_skin_train, y_not_skin_test = train_test_split(
    X_not_skin, np.full(len(X_not_skin), 3), test_size=0.2, random_state=42
)

In [10]:
# Create labels
y_benign = np.zeros(X_benign.shape[0])  # 0 for benign
y_malignant = np.ones(X_malignant.shape[0])  # 1 for malignant
y_benign_test = np.zeros(X_benign_test.shape[0])
y_malignant_test = np.ones(X_malignant_test.shape[0])
y_healthy = np.full(X_healthy.shape[0], 2)
y_not_skin = np.full(X_not_skin.shape[0], 3) 
y_healthy_test = np.full(X_healthy.shape[0], 2)
y_not_skin_test = np.full(X_not_skin.shape[0], 3) 

In [11]:
print("y_benign_test shape:", y_benign_test.shape)
print("y_malignant_test shape:", y_malignant_test.shape)
print("y_healthy_test shape:", y_healthy_test.shape)
print("y_not_skin_test shape:", y_not_skin_test.shape)

y_benign_test shape: (360,)
y_malignant_test shape: (300,)
y_healthy_test shape: (8,)
y_not_skin_test shape: (8,)


In [12]:
# Concatenate
X_train = np.concatenate((X_benign, X_malignant, X_healthy, X_not_skin), axis=0)
y_train = np.concatenate((y_benign, y_malignant, y_healthy, y_not_skin), axis=0)

X_test = np.concatenate((X_benign_test, X_malignant_test, X_healthy_test, X_not_skin_test), axis=0)
y_test = np.concatenate((y_benign_test, y_malignant_test, y_healthy_test, y_not_skin_test), axis=0)



In [13]:
print("X_train Shape:", X_train.shape)
print("First Image Shape:", X_train[0].shape)

X_train Shape: (1573, 100, 100)
First Image Shape: (100, 100)


In [14]:
X_train = X_train.reshape(X_train.shape[0], -1)  # Flatten images
X_test = X_test.reshape(X_test.shape[0], -1)

In [15]:
y_test = y_test[:X_test.shape[0]]  # Keep only the first 664 labels


In [16]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (1573, 10000)
y_train shape: (1573,)
X_test shape: (664, 10000)
y_test shape: (664,)


In [17]:
# Train SVM Classifier
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
svm_acc = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy:", svm_acc)

SVM Accuracy: 0.8765060240963856


In [18]:
# Train Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
rf_acc = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", rf_acc)

Random Forest Accuracy: 0.9864457831325302


In [19]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import numpy as np

# Load VGG19 model without the top layer
vgg19_model = VGG19(weights="imagenet", include_top=False, pooling="avg")

# Function to extract deep features
def extract_vgg19_features(img_path):
    img = load_img(img_path, target_size=(224, 224))  # Resize image
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array /= 255.0  # Normalize pixel values

    features = vgg19_model.predict(img_array)
    return features.flatten()  # Flatten the output


In [20]:
# Choose Best Model
best_model = svm_model if svm_acc > rf_acc else rf_model
print("Best Model Selected:", "SVM" if best_model == svm_model else "Random Forest")


Best Model Selected: Random Forest


In [21]:
from PIL import Image
import numpy as np
import cv2

def read_image(image_path, target_size=(100, 100)):
    """Reads an image, converts to grayscale, and resizes it."""
    img = Image.open(image_path).convert("L")  # Convert to grayscale
    img = img.resize(target_size)  # Resize to match training size
    return np.array(img, dtype=np.uint8)  # Convert to NumPy array

def extract_features(img):
    """Extracts features by flattening the image (100x100 → 10,000 features)."""
    return img.flatten()  # Ensures correct feature vector shape

def is_skin_image(image_path):
    """Checks if the given image is a skin image using color and texture analysis."""
    img = cv2.imread(image_path)
    if img is None:
        return False  # Invalid image file

    # Convert to HSV color space
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    # Define skin color range
    lower_skin = np.array([0, 20, 70], dtype=np.uint8)
    upper_skin = np.array([25, 255, 255], dtype=np.uint8)

    # Create skin mask
    mask = cv2.inRange(hsv, lower_skin, upper_skin)

    # Calculate the percentage of skin pixels
    skin_ratio = (np.sum(mask > 0) / mask.size) * 100

    return skin_ratio > 5  # Returns True if enough skin pixels are detected

def predict_skin_cancer(image_path, model):
    """Predicts if an image is a skin image and classifies it as Benign, Malignant, Healthy Skin, or Not a Skin Image."""
    
    if not is_skin_image(image_path):
        return "Not a Skin Image"

    img = read_image(image_path)  # Load and preprocess image
    features = extract_features(img)  # Extract features

    # Ensure `features` is a 2D array (1 sample, many features)
    features = np.array(features).reshape(1, -1)  

    # Get prediction probabilities
    pred_prob = model.predict_proba(features)[0]  

    # Unpack probability values (assuming 4-class classification)
    benign_prob, malignant_prob, healthy_prob, not_skin_prob = pred_prob

    # Classify as Not a Skin Image if probability is high
    if not_skin_prob > 0.5:
        return "Not a Skin Image"
    
    # Classify as Healthy if probability is high
    if healthy_prob > 0.5:
        return "Healthy Skin"

    # Otherwise, classify as Benign or Malignant
    pred_label = model.predict(features)[0]  
    return "Benign" if pred_label == 0 else "Malignant"



In [22]:
print("X_train shape:", X_train.shape)  # Should match model input
print("X_test shape:", X_test.shape)  # Should match X_train

X_train shape: (1573, 10000)
X_test shape: (664, 10000)


In [23]:
print("Before prediction, X shape:", X_test.shape)

Before prediction, X shape: (664, 10000)


In [24]:
# Example Prediction
sample_image = r'data\not_skin\a1.jpg'
print("Prediction:", predict_skin_cancer(sample_image, best_model))

Prediction: Not a Skin Image


In [25]:
from joblib import dump
dump(best_model, "best_model.pkl")

['best_model.pkl']