In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from skimage.feature import graycomatrix, graycoprops
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
from sklearn.metrics import classification_report, accuracy_score

def get_haralick_descriptors(image_pil_color):
    # Convert PIL image to NumPy array
    array = np.array(image_pil_color)
    
    # Convert BGR to RGB
    image_rgb = cv2.cvtColor(array, cv2.COLOR_BGR2RGB)
    
    # Convert RGB to grayscale
    image_gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
    
    # Reduce image to 16 gray levels
    image_gray //= 16
    
    # Define distances and angle
    distances = [1, 2, 4, 8, 16, 31]
    angle = 0
    
    features = []
    
    # Compute GLCM and Haralick features
    for d in distances:
        glcm = graycomatrix(image_gray, distances=[d], angles=[angle], levels=16, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
        entropy = -np.sum(glcm * np.log2(glcm + np.finfo(float).eps))
        
        # Collect the features
        features.extend([contrast, homogeneity, entropy])
    
    return features

# Base directory containing subdirectories of images
base_dir = "sub_images"

# List to hold image data and labels
features = []
labels = []

# Iterate through each subdirectory in the base directory
for subdir in os.listdir(base_dir):
    subdir_path = os.path.join(base_dir, subdir)
    
    # Check if it's a directory
    if os.path.isdir(subdir_path):
        # Process each image in the subdirectory
        for filename in os.listdir(subdir_path):
            if filename.endswith(".png"):
                # Construct full file path
                file_path = os.path.join(subdir_path, filename)
                # Open the image with PIL
                with Image.open(file_path) as img:
                    # Extract Haralick descriptors
                    img_features = get_haralick_descriptors(img)
                    
                    # Append the processed features and label
                    features.append(img_features)
                    if subdir == "Negative for intraepithelial lesion":
                        labels.append("sem_doenca")
                    else:
                        labels.append("com_doenca")

# Convert list to NumPy arrays for machine learning processing
features = np.array(features)
labels = np.array(labels)

# Split data into 80% train and 20% test subsets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.20, random_state=42)

# Optional: scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the SVM model with balanced class weights
model = svm.SVC(kernel='rbf', gamma='scale', class_weight='balanced')
model.fit(X_train_scaled, y_train)

# Save the model
joblib.dump(model, 'svm_haralick_model_binario.joblib')

# Load the model
loaded_model = joblib.load('svm_haralick_model_binario.joblib')

# Use the loaded model to make predictions
y_pred = loaded_model.predict(X_test_scaled)

# Print classification report
print("Classification report for classifier %s:\n%s\n"
      % (model, classification_report(y_test, y_pred)))


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Define the parameter grid to search
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': ['scale', 0.001, 0.01, 0.1, 1],  # Kernel coefficient for 'rbf'
}

# Create a GridSearchCV object
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', verbose=3)

# Fit GridSearchCV
grid_search.fit(X_train_scaled, y_train)

# Best parameters and best score
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

# Retrain model with best parameters
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)

# Evaluate the classifier performance on the test set
print(classification_report(y_test, y_pred))

In [None]:
joblib.dump(best_model, 'best_svm_model_binary.joblib')

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import cv2
from skimage.feature import graycomatrix, graycoprops
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib

def get_haralick_descriptors(image_pil_color):
    # Convert PIL image to NumPy array
    array = np.array(image_pil_color)
    
    # Convert BGR to RGB
    image_rgb = cv2.cvtColor(array, cv2.COLOR_BGR2RGB)
    
    # Convert RGB to grayscale
    image_gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
    
    # Reduce image to 16 gray levels
    image_gray //= 16
    
    # Define distances and angle
    distances = [1, 2, 4, 8, 16, 31]
    angle = 0
    
    features = []
    
    # Compute GLCM and Haralick features
    for d in distances:
        glcm = graycomatrix(image_gray, distances=[d], angles=[angle], levels=16, symmetric=True, normed=True)
        contrast = graycoprops(glcm, 'contrast')[0, 0]
        homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]
        entropy = -np.sum(glcm * np.log2(glcm + np.finfo(float).eps))
        
        # Collect the features
        features.extend([contrast, homogeneity, entropy])
    
    return features

# Base directory containing subdirectories of images
base_dir = "sub_images"

# List to hold image data and labels
features = []
labels = []

# Iterate through each subdirectory in the base directory
for subdir in os.listdir(base_dir):
    subdir_path = os.path.join(base_dir, subdir)
    
    # Check if it's a directory
    if os.path.isdir(subdir_path):
        # Process each image in the subdirectory
        for filename in os.listdir(subdir_path):
            if filename.endswith(".png"):
                # Construct full file path
                file_path = os.path.join(subdir_path, filename)
                # Open the image with PIL
                with Image.open(file_path) as img:
                    # Extract Haralick descriptors
                    img_features = get_haralick_descriptors(img)
                    
                    # Append the processed features and label
                    features.append(img_features)
                    labels.append(subdir)  # Use subdir as label

# Convert list to NumPy arrays for machine learning processing
features = np.array(features)
labels = np.array(labels)

# Split data into 80% train and 20% test subsets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.20, random_state=42)

# Optional: scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create the SVM model with balanced class weights
model = svm.SVC(kernel='rbf', gamma='scale', class_weight='balanced')
model.fit(X_train_scaled, y_train)

# Save the model
joblib.dump(model, 'svm_haralick_model_six_categories.joblib')


# Load the model
loaded_model = joblib.load('svm_haralick_model_six_categories.joblib')

# Use the loaded model to make predictions
y_pred = loaded_model.predict(X_test_scaled)

# Print classification report
print("Classification report for classifier %s:\n%s\n"
      % (model, classification_report(y_test, y_pred)))


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Define the parameter grid to search
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'gamma': ['scale', 0.001, 0.01, 0.1, 1],  # Kernel coefficient for 'rbf'
}

# Create a GridSearchCV object
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy', verbose=3)

# Fit GridSearchCV
grid_search.fit(X_train_scaled, y_train)

# Best parameters and best score
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))

# Retrain model with best parameters
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)

# Evaluate the classifier performance on the test set
print(classification_report(y_test, y_pred))

In [None]:
joblib.dump(best_model, 'best_svm_model_6_catgoties.joblib')