In [11]:
import os
import numpy as np
import cv2
import pywt
from PIL import Image
from skimage.feature import local_binary_pattern
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from joblib import Parallel, delayed


In [10]:
!pip install scikit-learn

Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp310-cp310-win_amd64.whl.metadata (15 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.6.1-cp310-cp310-win_amd64.whl (11.1 MB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.6.1 threadpoolctl-3.6.0


In [None]:
# Define dataset paths
folder_benign_train = r'data/test/benign'
folder_malignant_train = r'Desktop/Skin cancer detection_mini project/data/train/malignant'
folder_benign_test = r'Desktop/Skin cancer detection_mini project/data/test/benign'
folder_malignant_test = r'Desktop/Skin cancer detection_mini project/data/test/malignant'

In [6]:
# Load and preprocess images
def read_image(imname):
    img = Image.open(imname).convert("L")  # Convert to grayscale
    return np.asarray(img, dtype=np.uint8)
def extract_contourlet_features(image):
    coeffs = pywt.wavedec2(image, 'db1', level=3)
    features = []
    for coeff in coeffs:
        if isinstance(coeff, tuple):
            for subband in coeff:
                features.append(np.mean(subband))
                features.append(np.var(subband))
        else:
            features.append(np.mean(coeff))
            features.append(np.var(coeff))
    return features

def extract_lbp_features(image, num_points=24, radius=3):
    lbp = local_binary_pattern(image, num_points, radius, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    return hist.astype("float") / hist.sum()



In [7]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        try:
            img = read_image(img_path)
            images.append(extract_features(img))
        except Exception as e:
            print(f"Error loading {img_path}: {e}")
    return np.array(images, dtype="float")


In [8]:
from PIL import Image
import numpy as np
import os

def load_images_from_folder(folder, target_size=(100, 100)):
    images = []
    valid_extensions = (".jpg", ".jpeg", ".png", ".bmp", ".tiff")  # Supported formats
    
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        
        if not filename.lower().endswith(valid_extensions):
            print(f"Skipping non-image file: {filename}")
            continue  # Skip non-image files
        
        try:
            img = Image.open(img_path).convert("L")  # Convert to grayscale
            img = img.resize(target_size)  # Resize
            img = np.array(img, dtype=np.uint8)  # Convert to NumPy array
            images.append(img)
        except Exception as e:
            print(f"Error loading {img_path}: {e}")

    return np.array(images) if images else np.array([])

In [9]:
def extract_contourlet_features(image):
    if len(image.shape) == 3:  # Convert RGB to Grayscale if needed
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    elif len(image.shape) != 2:  # If image is not 2D, raise an error
        raise ValueError(f"Invalid image shape: {image.shape}. Expected 2D grayscale.")
    
    coeffs = pywt.wavedec2(image, 'db1', level=3)  # Apply wavelet decomposition
    features = []
    for coeff in coeffs:
        if isinstance(coeff, tuple):
            for subband in coeff:
                features.append(np.mean(subband))
                features.append(np.var(subband))
        else:
            features.append(np.mean(coeff))
            features.append(np.var(coeff))
    return features


In [10]:
# Local Binary Pattern Feature Extraction
def extract_lbp_features(image, num_points=24, radius=3):
    lbp = local_binary_pattern(image, num_points, radius, method="uniform")
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, num_points + 3), range=(0, num_points + 2))
    return hist.astype("float") / hist.sum()

# Combined Feature Extraction
def extract_features(image):
    contourlet_features = extract_contourlet_features(image)
    lbp_features = extract_lbp_features(image)
    return np.concatenate((contourlet_features, lbp_features))

# Parallel Processing for Feature Extraction
def process_images_in_parallel(images):
    return Parallel(n_jobs=-1)(delayed(extract_features)(img) for img in images)

In [11]:
# Load dataset
X_benign = load_images_from_folder(folder_benign_train)
X_malignant = load_images_from_folder(folder_malignant_train)
X_benign_test = load_images_from_folder(folder_benign_test)
X_malignant_test = load_images_from_folder(folder_malignant_test)

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'Desktop/Skin cancer detection_mini project/data/train/benign'

In [35]:
# Create labels
y_benign = np.zeros(X_benign.shape[0])  # 0 for benign
y_malignant = np.ones(X_malignant.shape[0])  # 1 for malignant
y_benign_test = np.zeros(X_benign_test.shape[0])
y_malignant_test = np.ones(X_malignant_test.shape[0])

In [36]:
# Merge Data
X_train = np.concatenate((X_benign, X_malignant), axis=0)
y_train = np.concatenate((y_benign, y_malignant), axis=0)
X_test = np.concatenate((X_benign_test, X_malignant_test), axis=0)
y_test = np.concatenate((y_benign_test, y_malignant_test), axis=0)

In [37]:
print("X_train Shape:", X_train.shape)
print("First Image Shape:", X_train[0].shape)

X_train Shape: (2637, 100, 100)
First Image Shape: (100, 100)


In [39]:
# Train SVM Classifier
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_features, y_train)
y_pred_svm = svm_model.predict(X_test_features)
svm_acc = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy:", svm_acc)

SVM Accuracy: 0.7818181818181819


In [44]:
# Train Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_features, y_train)
y_pred_rf = rf_model.predict(X_test_features)
rf_acc = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", rf_acc)

Random Forest Accuracy: 0.8227272727272728


In [53]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import numpy as np

# Load VGG19 model without the top layer
vgg19_model = VGG19(weights="imagenet", include_top=False, pooling="avg")

# Function to extract deep features
def extract_vgg19_features(img_path):
    img = load_img(img_path, target_size=(224, 224))  # Resize image
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array /= 255.0  # Normalize pixel values

    features = vgg19_model.predict(img_array)
    return features.flatten()  # Flatten the output


ModuleNotFoundError: No module named 'tensorflow.keras'

In [45]:
# Choose Best Model
best_model = svm_model if svm_acc > rf_acc else rf_model
print("Best Model Selected:", "SVM" if best_model == svm_model else "Random Forest")


Best Model Selected: Random Forest


In [46]:
from PIL import Image
import numpy as np

def read_image(image_path, target_size=(100, 100)):
    """Reads an image, converts to grayscale, and resizes it."""
    img = Image.open(image_path).convert("L")  # Convert to grayscale
    img = img.resize(target_size)  # Resize to match training size
    return np.array(img, dtype=np.uint8)  # Convert to NumPy array

def predict_skin_cancer(image_path):
    """Predicts skin cancer type (Benign/Malignant) for a given image."""
    img = read_image(image_path)  # Load and preprocess image
    features = extract_features(img)  # Extract features

    # Ensure `features` is a 2D array (1 sample, many features)
    features = np.array(features).reshape(1, -1)  

    # Get probabilities and prediction
    pred_prob = best_model.predict_proba(features)[0]
    pred_label = best_model.predict(features)[0]

    return "Benign" if pred_label == 0 else "Malignant"




In [48]:
# Example Prediction
sample_image = r'C:\Users\HP\Desktop\Skin cancer detection_mini project\data\test\benign\1.jpg'
print("Prediction:", predict_skin_cancer(sample_image))

Prediction: Benign
