In [1]:
#  Imports, Library Setup and  Dataset Paths.
import os
import time
import cv2
import numpy as np
import pywt
import joblib

from sklearn.metrics import (
    accuracy_score, confusion_matrix, roc_auc_score, f1_score,
    matthews_corrcoef, precision_score, recall_score
)
from skimage.feature import graycomatrix, graycoprops
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.ensemble import BaggingClassifier, VotingClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier

NORMAL_FOLDER = r"G:\Image Clssification\Dataset_new\normal"
TUMOR_FOLDER = r"G:\Image Clssification\Dataset_new\pancreatic_tumor"


In [None]:
# IMAGE LOADING & PREPROCESSING 

clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
def load_images(folder, label):
    data, labels = [], []
    for file in os.listdir(folder):
        path = os.path.join(folder, file)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            img = cv2.resize(img, (128, 128))
            img = clahe.apply(img)
            data.append(img)
            labels.append(label)
    return np.array(data), np.array(labels)


In [None]:
#Image Denoising & Contour Processing

# Morphological Denoising
def morph_denoise(img):
    return cv2.morphologyEx(img, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8))

# Boundary tracing + FCC
def boundary_following(binary):
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    return contours

def freeman_chain_code(contour):
    direction = []
    for i in range(1, len(contour)):
        dx = contour[i][0][0] - contour[i - 1][0][0]
        dy = contour[i][0][1] - contour[i - 1][0][1]
        code = {(1, 0): 0, (1, -1): 1, (0, -1): 2, (-1, -1): 3,
                (-1, 0): 4, (-1, 1): 5, (0, 1): 6, (1, 1): 7}
        direction.append(code.get((dx, dy), -1))
    return direction


In [None]:
# === FEATURE EXTRACTION ===
def extract_shape_features(images):
    features = []
    for img in images:
        _, binary = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)
        denoised = morph_denoise(binary)
        contours = boundary_following(denoised)
        if contours:
            c = max(contours, key=cv2.contourArea)
            area = cv2.contourArea(c)
            perimeter = cv2.arcLength(c, True)
            fcc = freeman_chain_code(c)
            features.append([area, perimeter, len(fcc)])
        else:
            features.append([0, 0, 0])
    return np.array(features)

def extract_glcm_features(images):
    glcm_props = ['contrast', 'homogeneity', 'energy']
    features = []
    for img in images:
        glcm = graycomatrix(img, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
        features.append([graycoprops(glcm, p)[0, 0] for p in glcm_props])
    return np.array(features)

def extract_wavelet_features(images):
    features = []
    for img in images:
        cA, (cH, cV, cD) = pywt.wavedec2(img, 'sym4', level=2)[0:2]
        features.append([
            np.mean(cA), np.var(cA),
            np.mean(cH), np.var(cH),
            np.mean(cV), np.var(cV),
            np.mean(cD), np.var(cD)
        ])
    return np.array(features)


In [None]:
# === DATA LOADING ===
normal_imgs, normal_labels = load_images(NORMAL_FOLDER, 0)
tumor_imgs, tumor_labels = load_images(TUMOR_FOLDER, 1)
images = np.vstack((normal_imgs, tumor_imgs))
labels = np.hstack((normal_labels, tumor_labels))


In [None]:
# Load datasets: - Loads normal and tumor images with their labels
normal_images, normal_labels = load_images(NORMAL_FOLDER, 0)
tumor_images, tumor_labels = load_images(TUMOR_FOLDER, 1)
images = np.vstack((normal_images, tumor_images))
labels = np.hstack((normal_labels, tumor_labels))

# Feature Extraction
shape_features = extract_shape_features(images)  #shape - area, perimeter, chain code length
glcm_features = extract_glcm_features(images)    #Texture -  GLCM (contrast, homogeneity, energy)
wavelet_features = extract_wavelet_features(images)   #Frequency- Wavelet (mean & variance of subbands)

features = np.hstack((shape_features, glcm_features, wavelet_features))
np.save("features.npy", features)       #using np Merges all features into one array: features.npy
np.save("labels.npy", labels)           #using np saves labels into labels.npy


In [None]:
# === SPLIT BEFORE FEATURE EXTRACTION (TO PREVENT LEAKAGE) ===
X_train_imgs, X_test_imgs, y_train, y_test = train_test_split(
    images, labels, test_size=0.26, stratify=labels, random_state=42
)

In [None]:
# === FEATURE EXTRACTION ===
def extract_features(images):
    return np.hstack((
        extract_shape_features(images),
        extract_glcm_features(images),
        extract_wavelet_features(images)
    ))

X_train = extract_features(X_train_imgs)
X_test = extract_features(X_test_imgs)


In [None]:
# === RESAMPLING (SMOTE) ===
X_train, y_train = SMOTE(sampling_strategy=0.7, random_state=42).fit_resample(X_train, y_train)

# === SCALING & PCA ===
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
joblib.dump(scaler, "scaler.pkl")

pca = PCA(n_components=10).fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)
joblib.dump(pca, "pca_model.pkl")


In [None]:
# === SVM HYPERPARAMETER SEARCH ===
svm = SVC(kernel='rbf', probability=True, class_weight='balanced', random_state=42)
param_dist = {'C': [1, 3, 5, 7, 10], 'gamma': ['scale', 0.01, 0.1, 1, 10]}
random_search = RandomizedSearchCV(svm, param_distributions=param_dist, n_iter=8, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)
random_search.fit(X_train, y_train)
best_params = random_search.best_params_


In [None]:
# === FINAL MODEL ===
best_svm = SVC(kernel='rbf', probability=True, class_weight='balanced', C=best_params['C'], gamma=best_params['gamma'], random_state=42)
bagged_svm = BaggingClassifier(estimator=best_svm, n_estimators=3, n_jobs=-1, random_state=42)
xgb = XGBClassifier(n_estimators=100, learning_rate=0.05, max_depth=5, use_label_encoder=False, eval_metric='logloss', n_jobs=-1, random_state=42)

ensemble = VotingClassifier(estimators=[('svm', bagged_svm), ('xgb', xgb)], voting='soft', n_jobs=-1)


In [None]:
# === TRAINING ===
start_train = time.time()
ensemble.fit(X_train, y_train)
train_time = time.time() - start_train
joblib.dump(ensemble, "ensemble_model.pkl")


In [None]:
# === TESTING & METRICS ===
start_test = time.time()
y_train_pred = ensemble.predict(X_train)
y_test_pred = ensemble.predict(X_test)
y_test_prob = ensemble.predict_proba(X_test)[:, 1]
test_time = time.time() - start_test


In [None]:
# === METRICS ===
train_acc = accuracy_score(y_train, y_train_pred)
test_acc = accuracy_score(y_test, y_test_pred)
tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred).ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)
mcc = matthews_corrcoef(y_test, y_test_pred)
auc = roc_auc_score(y_test, y_test_prob)


In [None]:
# === OUTPUT ===
print(f"Training Time: {train_time:.2f}s | Testing Time: {test_time:.2f}s")
print(f"Train Acc: {train_acc*100:.2f}% | Test Acc: {test_acc*100:.2f}%")
print(f"Sensitivity (Recall): {sensitivity*100:.2f}% | Specificity: {specificity*100:.2f}%")
print(f"Precision: {precision*100:.2f}% | F1 Score: {f1:.2f}")
print(f"MCC: {mcc:.2f} | AUC: {auc:.2f}")
